Codebase list asmjit / e644e03
New upstream version 0.0~git20200429.9057aa3 Mo Zhou 4 years ago
128 changed file(s) with 74274 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 # Editor configuration, see https://editorconfig.org for more details.
1 root = true
2
3 [*.{cpp,h,natvis}]
4 charset = utf-8
5 end_of_line = lf
6 indent_style = space
7 indent_size = 2
8 insert_final_newline = true
9 trim_trailing_whitespace = true
0 .vscode
1 .kdev4
2 *.kdev4
3 build
4 build_*
5 tools/asmdb
0 language: cpp
1
2 git:
3 depth: false
4
5 env:
6 global:
7 - BUILD_TOOLCHAIN="Unix Makefiles"
8 - MAKEFLAGS="-j2"
9
10 dist: bionic
11
12 matrix:
13 include:
14 - name: "Linux Clang Default [64-bit] [DBG]"
15 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=clang-9 && CXX=clang++-9"
16 os: linux
17 addons:
18 apt:
19 sources:
20 - sourceline: "ppa:ubuntu-toolchain-r/test"
21 packages: [clang++-9]
22
23 - name: "Linux Clang Default [64-bit] [REL]"
24 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9"
25 os: linux
26 addons:
27 apt:
28 sources:
29 - sourceline: "ppa:ubuntu-toolchain-r/test"
30 packages: [clang++-9]
31
32 - name: "Linux Clang Default [64-bit] [REL] [Sanitize=Address]"
33 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" SANITIZE=address
34 os: linux
35 addons:
36 apt:
37 sources:
38 - sourceline: "ppa:ubuntu-toolchain-r/test"
39 packages: [clang++-9]
40
41 - name: "Linux Clang Default [64-bit] [REL] [Sanitize=Undefined]"
42 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=clang-9 && CXX=clang++-9" SANITIZE=undefined
43 os: linux
44 addons:
45 apt:
46 sources:
47 - sourceline: "ppa:ubuntu-toolchain-r/test"
48 packages: [clang++-9]
49
50 - name: "Linux GCC Default [64-bit] [DBG + Valgrind]"
51 env: BUILD_MATRIX="BUILD_TYPE=Debug" USE_VALGRIND=1
52 os: linux
53 addons:
54 apt:
55 packages: [valgrind]
56
57 - name: "Linux GCC Default [64-bit] [REL + Valgrind]"
58 env: BUILD_MATRIX="BUILD_TYPE=Release" USE_VALGRIND=1
59 os: linux
60 addons:
61 apt:
62 packages: [valgrind]
63
64 - name: "Linux GCC 4.8 [32-bit] [DBG]"
65 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-4.8 && CXX=g++-4.8" CXXFLAGS=-m32 LDFLAGS=-m32
66 os: linux
67 addons:
68 apt:
69 packages: [g++-4.8, g++-4.8-multilib, "linux-libc-dev:i386"]
70
71 - name: "Linux GCC 4.8 [64-bit] [DBG]"
72 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-4.8 && CXX=g++-4.8"
73 os: linux
74 addons:
75 apt:
76 packages: [g++-4.8]
77
78 - name: "Linux GCC 5.X [32-bit] [DBG]"
79 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-5 && CXX=g++-5" CXXFLAGS=-m32 LDFLAGS=-m32
80 os: linux
81 addons:
82 apt:
83 packages: [g++-5, g++-5-multilib, "linux-libc-dev:i386"]
84
85 - name: "Linux GCC 5.X [64-bit] [DBG]"
86 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-5 && CXX=g++-5"
87 os: linux
88 addons:
89 apt:
90 packages: [g++-5]
91
92 - name: "Linux GCC 6.X [32-bit] [DBG]"
93 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-6 && CXX=g++-6" CXXFLAGS=-m32 LDFLAGS=-m32
94 os: linux
95 addons:
96 apt:
97 packages: [g++-6, g++-6-multilib, "linux-libc-dev:i386"]
98
99 - name: "Linux GCC 6.X [64-bit] [DBG]"
100 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-6 && CXX=g++-6"
101 os: linux
102 addons:
103 apt:
104 packages: [g++-6]
105
106 - name: "Linux GCC 7.X [32-bit] [DBG]"
107 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-7 && CXX=g++-7" CXXFLAGS=-m32 LDFLAGS=-m32
108 os: linux
109 addons:
110 apt:
111 packages: [g++-7, g++-7-multilib, "linux-libc-dev:i386"]
112
113 - name: "Linux GCC 7.X [64-bit] [DBG]"
114 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-7 && CXX=g++-7"
115 os: linux
116 addons:
117 apt:
118 packages: [g++-7]
119
120 - name: "Linux GCC 8.X [32-bit] [DBG]"
121 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-8 && CXX=g++-8" CXXFLAGS=-m32 LDFLAGS=-m32
122 os: linux
123 addons:
124 apt:
125 packages: [g++-8, g++-8-multilib, "linux-libc-dev:i386"]
126
127 - name: "Linux GCC 8.X [32-bit] [REL]"
128 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-8 && CXX=g++-8" CXXFLAGS=-m32 LDFLAGS=-m32
129 os: linux
130 addons:
131 apt:
132 packages: [g++-8, g++-8-multilib, "linux-libc-dev:i386"]
133
134 - name: "Linux GCC 8.X [64-bit] [DBG]"
135 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-8 && CXX=g++-8"
136 os: linux
137 addons:
138 apt:
139 packages: [g++-8]
140
141 - name: "Linux GCC 8.X [64-bit] [REL]"
142 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-8 && CXX=g++-8"
143 os: linux
144 addons:
145 apt:
146 packages: [g++-8]
147
148 - name: "Linux GCC 9.X [32-bit] [DBG]"
149 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-9 && CXX=g++-9" CXXFLAGS=-m32 LDFLAGS=-m32
150 os: linux
151 addons:
152 apt:
153 sources:
154 - sourceline: "ppa:ubuntu-toolchain-r/test"
155 packages: [g++-9, g++-9-multilib, "linux-libc-dev:i386"]
156
157 - name: "Linux GCC 9.X [32-bit] [REL]"
158 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-9 && CXX=g++-9" CXXFLAGS=-m32 LDFLAGS=-m32
159 os: linux
160 addons:
161 apt:
162 sources:
163 - sourceline: "ppa:ubuntu-toolchain-r/test"
164 packages: [g++-9, g++-9-multilib, "linux-libc-dev:i386"]
165
166 - name: "Linux GCC 9.X [64-bit] [DBG]"
167 env: BUILD_MATRIX="BUILD_TYPE=Debug && CC=gcc-9 && CXX=g++-9"
168 os: linux
169 addons:
170 apt:
171 sources:
172 - sourceline: "ppa:ubuntu-toolchain-r/test"
173 packages: [g++-9]
174
175 - name: "Linux GCC 9.X [64-bit] [REL]"
176 env: BUILD_MATRIX="BUILD_TYPE=Release && CC=gcc-9 && CXX=g++-9"
177 os: linux
178 addons:
179 apt:
180 sources:
181 - sourceline: "ppa:ubuntu-toolchain-r/test"
182 packages: [g++-9]
183
184 - name: "OSX Clang XCode 9.4 [32-bit] [DBG]"
185 env: BUILD_MATRIX="BUILD_TYPE=Debug" CXXFLAGS=-m32 LDFLAGS=-m32
186 os: osx
187 osx_image: xcode9.4
188
189 - name: "OSX Clang XCode 9.4 [32-bit] [REL]"
190 env: BUILD_MATRIX="BUILD_TYPE=Release" CXXFLAGS=-m32 LDFLAGS=-m32
191 os: osx
192 osx_image: xcode9.4
193
194 - name: "OSX Clang XCode 9.4 [64-bit] [DBG]"
195 env: BUILD_MATRIX="BUILD_TYPE=Debug"
196 os: osx
197 osx_image: xcode9.4
198
199 - name: "OSX Clang XCode 9.4 [64-bit] [REL]"
200 env: BUILD_MATRIX="BUILD_TYPE=Release"
201 os: osx
202 osx_image: xcode9.4
203
204 - name: "OSX Clang XCode 10.2 [64-bit] [DBG]"
205 env: BUILD_MATRIX="BUILD_TYPE=Debug"
206 os: osx
207 osx_image: xcode10.2
208
209 - name: "OSX Clang XCode 10.2 [64-bit] [REL]"
210 env: BUILD_MATRIX="BUILD_TYPE=Release"
211 os: osx
212 osx_image: xcode10.2
213
214 - name: "OSX Clang XCode 11 [64-bit] [DBG]"
215 env: BUILD_MATRIX="BUILD_TYPE=Debug"
216 os: osx
217 osx_image: xcode11
218
219 - name: "OSX Clang XCode 11 [64-bit] [REL]"
220 env: BUILD_MATRIX="BUILD_TYPE=Release"
221 os: osx
222 osx_image: xcode11
223
224 - name: "Windows VS2017 [32-bit] [DBG]"
225 env: BUILD_MATRIX="BUILD_TYPE=Debug" BUILD_TOOLCHAIN="Visual Studio 15 2017"
226 os: windows
227
228 - name: "Windows VS2017 [32-bit] [REL]"
229 env: BUILD_MATRIX="BUILD_TYPE=Release" BUILD_TOOLCHAIN="Visual Studio 15 2017"
230 os: windows
231
232 - name: "Windows VS2017 [64-bit] [DBG]"
233 env: BUILD_MATRIX="BUILD_TYPE=Debug" BUILD_TOOLCHAIN="Visual Studio 15 2017 Win64"
234 os: windows
235
236 - name: "Windows VS2017 [64-bit] [REL]"
237 env: BUILD_MATRIX="BUILD_TYPE=Release" BUILD_TOOLCHAIN="Visual Studio 15 2017 Win64"
238 os: windows
239
240 before_install:
241 - eval "$BUILD_MATRIX"
242
243 before_script:
244 - mkdir build
245 - cd build
246 - |
247 if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then
248 cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 -DASMJIT_SANITIZE="${SANITIZE}"
249 else
250 cmake .. -G"${BUILD_TOOLCHAIN}" -DASMJIT_TEST=1 -DASMJIT_SANITIZE="${SANITIZE}" -DCMAKE_PREFIX_PATH="${MINGW_PATH}" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}"
251 fi
252 - cd ..
253
254 script:
255 - cd build
256 - |
257 if [[ "$BUILD_TOOLCHAIN" =~ ^Visual\ Studio ]]; then
258 cmake --build . --config ${BUILD_TYPE} -- -nologo -v:minimal
259 cd ${BUILD_TYPE}
260 else
261 cmake --build .
262 fi
263
264 - |
265 if [ "$USE_VALGRIND" = "1" ]; then
266 RUN_CMD="valgrind --leak-check=full --show-reachable=yes --track-origins=yes"
267 fi
268
269 - eval "$RUN_CMD ./asmjit_test_unit --quick"
270 - eval "$RUN_CMD ./asmjit_test_opcode > /dev/null"
271 - eval "$RUN_CMD ./asmjit_test_x86_asm"
272 - eval "$RUN_CMD ./asmjit_test_x86_cc"
273 - eval "$RUN_CMD ./asmjit_test_x86_sections"
0 cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
1
2 cmake_policy(PUSH)
3 cmake_policy(SET CMP0063 NEW) # Honor visibility properties.
4
5 include(CheckCXXCompilerFlag)
6
7 # Don't create a project if it was already created by another CMakeLists.txt.
8 # This allows one library to embed another library without making a collision.
9 if (NOT CMAKE_PROJECT_NAME OR "${CMAKE_PROJECT_NAME}" STREQUAL "asmjit")
10 project(asmjit CXX)
11 endif()
12
13 # =============================================================================
14 # [AsmJit - Deprecated]
15 # =============================================================================
16
17 if (DEFINED ASMJIT_BUILD_EMBED)
18 message(DEPRECATION "ASMJIT_BUILD_EMBED is deprecated, use ASMJIT_EMBED")
19 set(ASMJIT_EMBED "${ASMJIT_BUILD_EMBED}")
20 endif()
21
22 if (DEFINED ASMJIT_BUILD_STATIC)
23 message(DEPRECATION "ASMJIT_BUILD_STATIC is deprecated, use ASMJIT_STATIC")
24 set(ASMJIT_STATIC "${ASMJIT_BUILD_STATIC}")
25 endif()
26
27 # =============================================================================
28 # [AsmJit - Configuration]
29 # =============================================================================
30
31 if (NOT DEFINED ASMJIT_EMBED)
32 set(ASMJIT_EMBED FALSE)
33 endif()
34
35 if (NOT DEFINED ASMJIT_STATIC)
36 set(ASMJIT_STATIC ${ASMJIT_EMBED})
37 endif()
38
39 if (NOT DEFINED ASMJIT_BUILD_ARM)
40 set(ASMJIT_BUILD_ARM FALSE)
41 endif()
42
43 if (NOT DEFINED ASMJIT_BUILD_X86)
44 set(ASMJIT_BUILD_X86 FALSE)
45 endif()
46
47 if (NOT DEFINED ASMJIT_TEST)
48 set(ASMJIT_TEST FALSE)
49 endif()
50
51 # EMBED implies STATIC.
52 if (ASMJIT_EMBED AND NOT ASMJIT_STATIC)
53 set(ASMJIT_STATIC TRUE)
54 endif()
55
56 set(ASMJIT_DIR "${CMAKE_CURRENT_LIST_DIR}" CACHE PATH "Location of 'asmjit'")
57 set(ASMJIT_TEST ${ASMJIT_TEST} CACHE BOOL "Build 'asmjit' test applications")
58 set(ASMJIT_EMBED ${ASMJIT_EMBED} CACHE BOOL "Embed 'asmjit' library (no targets)")
59 set(ASMJIT_STATIC ${ASMJIT_STATIC} CACHE BOOL "Build 'asmjit' library as static")
60 set(ASMJIT_SANITIZE ${ASMJIT_SANITIZE} CACHE BOOL "Build with C/C++ sanitizers enabled")
61 set(ASMJIT_BUILD_X86 ${ASMJIT_BUILD_X86} CACHE BOOL "Build X86 backends (X86 and X86_64)")
62 set(ASMJIT_BUILD_ARM ${ASMJIT_BUILD_ARM} CACHE BOOL "Build ARM backends")
63
64 # =============================================================================
65 # [AsmJit - Project]
66 # =============================================================================
67
68 set(ASMJIT_INCLUDE_DIRS "${ASMJIT_DIR}/src") # Include directory is the same as source dir.
69 set(ASMJIT_DEPS "") # AsmJit dependencies (libraries) for the linker.
70 set(ASMJIT_LIBS "") # Dependencies of libs/apps that want to use AsmJit.
71 set(ASMJIT_CFLAGS "") # Public compiler flags.
72 set(ASMJIT_PRIVATE_CFLAGS "") # Private compiler flags independent of build type.
73 set(ASMJIT_PRIVATE_CFLAGS_DBG "") # Private compiler flags used by debug builds.
74 set(ASMJIT_PRIVATE_CFLAGS_REL "") # Private compiler flags used by release builds.
75 set(ASMJIT_SANITIZE_CFLAGS "") # Compiler flags required by currently enabled sanitizers.
76 set(ASMJIT_SANITIZE_LFLAGS "") # Linker flags required by currently enabled sanitizers.
77
78 # =============================================================================
79 # [AsmJit - Utilities]
80 # =============================================================================
81
82 function(asmjit_detect_cflags out)
83 set(out_array ${${out}})
84 foreach(flag ${ARGN})
85 string(REGEX REPLACE "[+]" "x" flag_signature "${flag}")
86 string(REGEX REPLACE "[-=:;/.\]" "_" flag_signature "${flag_signature}")
87 check_cxx_compiler_flag(${flag} "__CxxFlag_${flag_signature}")
88 if (${__CxxFlag_${flag_signature}})
89 list(APPEND out_array "${flag}")
90 endif()
91 endforeach()
92 set(${out} "${out_array}" PARENT_SCOPE)
93 endfunction()
94
95 # Support for various sanitizers provided by C/C++ compilers.
96 function(asmjit_detect_sanitizers out)
97 set(_out_array ${${out}})
98 set(_flags "")
99
100 foreach(_arg ${ARGN})
101 string(REPLACE "," ";" _arg "${_arg}")
102 list(APPEND _flags ${_arg})
103 endforeach()
104
105 foreach(_flag ${_flags})
106 if (NOT "${_flag}" MATCHES "^-fsanitize=")
107 SET(_flag "-fsanitize=${_flag}")
108 endif()
109
110 # Sanitizers also require link flags, see CMAKE_REQUIRED_FLAGS.
111 set(CMAKE_REQUIRED_FLAGS "${_flag}")
112 asmjit_detect_cflags(_out_array ${_flag})
113 unset(CMAKE_REQUIRED_FLAGS)
114 endforeach()
115
116 set(${out} "${_out_array}" PARENT_SCOPE)
117 endfunction()
118
119 function(asmjit_add_target target target_type src deps cflags cflags_dbg cflags_rel)
120 if ("${target_type}" STREQUAL "EXECUTABLE")
121 add_executable(${target} ${src})
122 else()
123 add_library(${target} ${target_type} ${src})
124 endif()
125
126 target_link_libraries(${target} PRIVATE ${deps})
127
128 # target_link_options was added in cmake 3.13, which doesn't work for us.
129 # target_link_options(${target} PRIVATE ${flags})
130 foreach(link_flag ${ASMJIT_SANITIZE_LFLAGS})
131 set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " ${link_flag}")
132 endforeach()
133
134 if (${CMAKE_VERSION} VERSION_LESS "3.8.0")
135 set_property(TARGET ${target} PROPERTY CXX_STANDARD 11)
136 else()
137 target_compile_features(${target} PUBLIC cxx_std_11)
138 endif()
139 set_property(TARGET ${target} PROPERTY CXX_EXTENSIONS NO)
140 set_property(TARGET ${target} PROPERTY CXX_VISIBILITY_PRESET hidden)
141 target_compile_options(${target} PRIVATE ${cflags} ${ASMJIT_SANITIZE_CFLAGS} $<$<CONFIG:Debug>:${cflags_dbg}> $<$<NOT:$<CONFIG:Debug>>:${cflags_rel}>)
142 endfunction()
143
144 # =============================================================================
145 # [AsmJit - Compiler Support]
146 # =============================================================================
147
148 set(ASMJIT_INCLUDE_DIRS "${ASMJIT_DIR}/src") # Include directory is the same as source dir.
149 set(ASMJIT_DEPS "") # AsmJit dependencies (libraries) for the linker.
150 set(ASMJIT_LIBS "") # Dependencies of libs/apps that want to use AsmJit.
151 set(ASMJIT_CFLAGS "") # Public compiler flags.
152 set(ASMJIT_PRIVATE_CFLAGS "") # Private compiler flags independent of build type.
153 set(ASMJIT_PRIVATE_CFLAGS_DBG "") # Private compiler flags used by debug builds.
154 set(ASMJIT_PRIVATE_CFLAGS_REL "") # Private compiler flags used by release builds.
155 set(ASMJIT_SANITIZE_CFLAGS "") # Compiler flags required by currently enabled sanitizers.
156 set(ASMJIT_SANITIZE_LFLAGS "") # Linker flags required by currently enabled sanitizers.
157
158 # TODO: Backward compatibility.
159 set(ASMJIT_INCLUDE_DIR "${ASMJIT_INCLUDE_DIRS}")
160
161 if (NOT ASMJIT_NO_CUSTOM_FLAGS)
162 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
163 list(APPEND ASMJIT_PRIVATE_CFLAGS
164 -MP # [+] Multi-Process Compilation.
165 -GR- # [-] Runtime type information.
166 -GF # [+] Eliminate duplicate strings.
167 -Zc:inline # [+] Remove unreferenced COMDAT.
168 -Zc:strictStrings # [+] Strict const qualification of string literals.
169 -Zc:threadSafeInit- # [-] Thread-safe statics.
170 -W4) # [+] Warning level 4.
171
172 list(APPEND ASMJIT_PRIVATE_CFLAGS_DBG
173 -GS) # [+] Buffer security-check.
174
175 list(APPEND ASMJIT_PRIVATE_CFLAGS_REL
176 -GS- # [-] Buffer security-check.
177 -O2 # [+] Favor speed over size.
178 -Oi) # [+] Generate intrinsic functions.
179 elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "^(GNU|Clang|AppleClang)$")
180 list(APPEND ASMJIT_PRIVATE_CFLAGS -Wall -Wextra)
181 list(APPEND ASMJIT_PRIVATE_CFLAGS -fno-math-errno)
182 list(APPEND ASMJIT_PRIVATE_CFLAGS_REL -O2)
183
184 asmjit_detect_cflags(ASMJIT_PRIVATE_CFLAGS
185 -fno-threadsafe-statics
186 -fno-semantic-interposition)
187
188 asmjit_detect_cflags(ASMJIT_PRIVATE_CFLAGS_REL
189 -fmerge-all-constants)
190 endif()
191 endif()
192
193 # Support for sanitizers.
194 if (ASMJIT_SANITIZE)
195 ASMJIT_detect_sanitizers(ASMJIT_SANITIZE_CFLAGS ${ASMJIT_SANITIZE})
196 if (ASMJIT_SANITIZE_CFLAGS)
197 message("-- Enabling sanitizers: '${ASMJIT_SANITIZE_CFLAGS}'")
198
199 # Linker must receive the same flags as the compiler when it comes to sanitizers.
200 set(ASMJIT_SANITIZE_LFLAGS ${ASMJIT_SANITIZE_CFLAGS})
201
202 # Don't omit frame pointer if sanitizers are enabled.
203 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC")
204 list(APPEND ASMJIT_SANITIZE_CFLAGS -Oy-)
205 else()
206 list(APPEND ASMJIT_SANITIZE_CFLAGS -fno-omit-frame-pointer -g)
207 endif()
208
209 list(APPEND ASMJIT_PRIVATE_CFLAGS ${ASMJIT_SANITIZE_CFLAGS})
210 list(APPEND ASMJIT_PRIVATE_LFLAGS ${ASMJIT_SANITIZE_LFLAGS})
211 endif()
212 endif()
213
214 if (NOT WIN32)
215 list(APPEND ASMJIT_DEPS pthread)
216 endif()
217
218 if ("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
219 list(APPEND ASMJIT_DEPS rt)
220 endif()
221
222 set(ASMJIT_LIBS ${ASMJIT_DEPS})
223 if (NOT ASMJIT_EMBED)
224 list(INSERT ASMJIT_LIBS 0 asmjit)
225 endif()
226
227 if (ASMJIT_EMBED)
228 set(ASMJIT_TARGET_TYPE "EMBED")
229 elseif (ASMJIT_STATIC)
230 set(ASMJIT_TARGET_TYPE "STATIC")
231 else()
232 set(ASMJIT_TARGET_TYPE "SHARED")
233 endif()
234
235 foreach(build_option ASMJIT_STATIC
236 ASMJIT_BUILD_X86
237 #ASMJIT_BUILD_ARM
238 ASMJIT_BUILD_A64
239 ASMJIT_NO_JIT
240 ASMJIT_NO_LOGGING
241 ASMJIT_NO_BUILDER
242 ASMJIT_NO_COMPILER
243 ASMJIT_NO_TEXT
244 ASMJIT_NO_VALIDATION
245 ASMJIT_NO_INTROSPECTION)
246 if (${build_option})
247 List(APPEND ASMJIT_CFLAGS "-D${build_option}")
248 List(APPEND ASMJIT_PRIVATE_CFLAGS "-D${build_option}")
249 endif()
250 endforeach()
251
252 # =============================================================================
253 # [AsmJit - Source]
254 # =============================================================================
255
256 set(ASMJIT_SRC_LIST
257 asmjit/asmjit.h
258
259 asmjit/core.h
260 asmjit/core/api-build_p.h
261 asmjit/core/api-config.h
262 asmjit/core/arch.cpp
263 asmjit/core/arch.h
264 asmjit/core/assembler.cpp
265 asmjit/core/assembler.h
266 asmjit/core/builder.cpp
267 asmjit/core/builder.h
268 asmjit/core/callconv.cpp
269 asmjit/core/callconv.h
270 asmjit/core/codebufferwriter_p.h
271 asmjit/core/codeholder.cpp
272 asmjit/core/codeholder.h
273 asmjit/core/compiler.cpp
274 asmjit/core/compiler.h
275 asmjit/core/constpool.cpp
276 asmjit/core/constpool.h
277 asmjit/core/cpuinfo.cpp
278 asmjit/core/cpuinfo.h
279 asmjit/core/datatypes.h
280 asmjit/core/emitter.cpp
281 asmjit/core/emitter.h
282 asmjit/core/features.h
283 asmjit/core/func.cpp
284 asmjit/core/func.h
285 asmjit/core/globals.cpp
286 asmjit/core/globals.h
287 asmjit/core/inst.cpp
288 asmjit/core/inst.h
289 asmjit/core/jitallocator.cpp
290 asmjit/core/jitallocator.h
291 asmjit/core/jitruntime.cpp
292 asmjit/core/jitruntime.h
293 asmjit/core/logging.cpp
294 asmjit/core/logging.h
295 asmjit/core/misc_p.h
296 asmjit/core/operand.cpp
297 asmjit/core/operand.h
298 asmjit/core/osutils.cpp
299 asmjit/core/osutils.h
300 asmjit/core/raassignment_p.h
301 asmjit/core/rabuilders_p.h
302 asmjit/core/radefs_p.h
303 asmjit/core/ralocal.cpp
304 asmjit/core/ralocal_p.h
305 asmjit/core/rapass.cpp
306 asmjit/core/rapass_p.h
307 asmjit/core/rastack.cpp
308 asmjit/core/rastack_p.h
309 asmjit/core/string.cpp
310 asmjit/core/string.h
311 asmjit/core/support.cpp
312 asmjit/core/support.h
313 asmjit/core/target.cpp
314 asmjit/core/target.h
315 asmjit/core/type.cpp
316 asmjit/core/type.h
317 asmjit/core/virtmem.cpp
318 asmjit/core/virtmem.h
319 asmjit/core/zone.cpp
320 asmjit/core/zone.h
321 asmjit/core/zonehash.cpp
322 asmjit/core/zonehash.h
323 asmjit/core/zonelist.cpp
324 asmjit/core/zonelist.h
325 asmjit/core/zonestack.cpp
326 asmjit/core/zonestack.h
327 asmjit/core/zonestring.h
328 asmjit/core/zonetree.cpp
329 asmjit/core/zonetree.h
330 asmjit/core/zonevector.cpp
331 asmjit/core/zonevector.h
332
333 asmjit/x86.h
334 asmjit/x86/x86assembler.cpp
335 asmjit/x86/x86assembler.h
336 asmjit/x86/x86builder.cpp
337 asmjit/x86/x86builder.h
338 asmjit/x86/x86callconv.cpp
339 asmjit/x86/x86callconv_p.h
340 asmjit/x86/x86compiler.cpp
341 asmjit/x86/x86compiler.h
342 asmjit/x86/x86emitter.h
343 asmjit/x86/x86features.cpp
344 asmjit/x86/x86features.h
345 asmjit/x86/x86globals.h
346 asmjit/x86/x86internal.cpp
347 asmjit/x86/x86internal_p.h
348 asmjit/x86/x86instdb.cpp
349 asmjit/x86/x86instdb.h
350 asmjit/x86/x86instdb_p.h
351 asmjit/x86/x86instapi.cpp
352 asmjit/x86/x86instapi_p.h
353 asmjit/x86/x86logging.cpp
354 asmjit/x86/x86logging_p.h
355 asmjit/x86/x86operand.cpp
356 asmjit/x86/x86operand.h
357 asmjit/x86/x86rapass.cpp
358 asmjit/x86/x86rapass_p.h
359 )
360
361 #if (MSVC)
362 # list(APPEND ASMJIT_SRC_LIST asmjit.natvis)
363 #endif()
364
365 set(ASMJIT_SRC "")
366 foreach(_src_file ${ASMJIT_SRC_LIST})
367 list(APPEND ASMJIT_SRC "${ASMJIT_DIR}/src/${_src_file}")
368 endforeach()
369 if (NOT ${CMAKE_VERSION} VERSION_LESS "3.8.0")
370 source_group(TREE "${ASMJIT_DIR}" FILES ${ASMJIT_SRC})
371 endif()
372
373 # =============================================================================
374 # [AsmJit - Summary]
375 # =============================================================================
376
377 message("** AsmJit Summary **")
378 message(" ASMJIT_DIR=${ASMJIT_DIR}")
379 message(" ASMJIT_TEST=${ASMJIT_TEST}")
380 message(" ASMJIT_TARGET_TYPE=${ASMJIT_TARGET_TYPE}")
381 message(" ASMJIT_DEPS=${ASMJIT_DEPS}")
382 message(" ASMJIT_LIBS=${ASMJIT_LIBS}")
383 message(" ASMJIT_CFLAGS=${ASMJIT_CFLAGS}")
384 message(" ASMJIT_PRIVATE_CFLAGS=${ASMJIT_PRIVATE_CFLAGS}")
385 message(" ASMJIT_PRIVATE_CFLAGS_DBG=${ASMJIT_PRIVATE_CFLAGS_DBG}")
386 message(" ASMJIT_PRIVATE_CFLAGS_REL=${ASMJIT_PRIVATE_CFLAGS_REL}")
387
388 # =============================================================================
389 # [AsmJit - Targets]
390 # =============================================================================
391
392 if (NOT ASMJIT_EMBED)
393 # Add 'asmjit' target.
394 asmjit_add_target(asmjit "${ASMJIT_TARGET_TYPE}"
395 "${ASMJIT_SRC}"
396 "${ASMJIT_DEPS}"
397 "${ASMJIT_PRIVATE_CFLAGS}"
398 "${ASMJIT_PRIVATE_CFLAGS_DBG}"
399 "${ASMJIT_PRIVATE_CFLAGS_REL}")
400 target_include_directories(asmjit BEFORE INTERFACE ${ASMJIT_INCLUDE_DIRS})
401 target_compile_options(asmjit INTERFACE ${ASMJIT_CFLAGS})
402
403 # Add AsmJit::AsmJit target (alias to asmjit).
404 add_library(AsmJit::AsmJit ALIAS asmjit)
405
406 # Install 'asmjit' target (shared or static).
407 install(TARGETS asmjit RUNTIME DESTINATION "bin"
408 LIBRARY DESTINATION "lib${LIB_SUFFIX}"
409 ARCHIVE DESTINATION "lib${LIB_SUFFIX}")
410
411 # Install 'asmjit' header files (private headers are filtered out).
412 foreach(_src_file ${ASMJIT_SRC_LIST})
413 if ("${_src_file}" MATCHES "\\.h$" AND NOT "${_src_file}" MATCHES "_p\\.h$")
414 get_filename_component(_src_dir ${_src_file} PATH)
415 install(FILES "${ASMJIT_DIR}/src/${_src_file}" DESTINATION "include/${_src_dir}")
416 endif()
417 endforeach()
418
419 # Add 'asmjit' tests.
420 if (ASMJIT_TEST)
421 set(ASMJIT_TEST_SRC test/asmjit_test_unit.cpp test/broken.cpp test/broken.h)
422
423 asmjit_add_target(asmjit_test_unit EXECUTABLE
424 "${ASMJIT_SRC};${ASMJIT_TEST_SRC}"
425 "${ASMJIT_DEPS}"
426 "${ASMJIT_PRIVATE_CFLAGS}"
427 "${ASMJIT_PRIVATE_CFLAGS_DBG}"
428 "${ASMJIT_PRIVATE_CFLAGS_REL}")
429 target_compile_definitions(asmjit_test_unit PRIVATE ASMJIT_TEST ASMJIT_STATIC)
430
431 foreach(_target asmjit_bench_x86
432 asmjit_test_opcode
433 asmjit_test_x86_asm
434 asmjit_test_x86_cc
435 asmjit_test_x86_sections)
436 asmjit_add_target(${_target} EXECUTABLE
437 "test/${_target}.cpp"
438 "${ASMJIT_LIBS}"
439 "${ASMJIT_PRIVATE_CFLAGS}"
440 "${ASMJIT_PRIVATE_CFLAGS_DBG}"
441 "${ASMJIT_PRIVATE_CFLAGS_REL}")
442 endforeach()
443 endif()
444 endif()
445
446 cmake_policy(POP)
0 Copyright (c) 2008-2019, Petr Kobalicek
1
2 This software is provided 'as-is', without any express or implied
3 warranty. In no event will the authors be held liable for any damages
4 arising from the use of this software.
5
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it
8 freely, subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not
11 claim that you wrote the original software. If you use this software
12 in a product, an acknowledgment in the product documentation would be
13 appreciated but is not required.
14 2. Altered source versions must be plainly marked as such, and must not be
15 misrepresented as being the original software.
16 3. This notice may not be removed or altered from any source distribution.
0 AsmJit
1 ------
2
3 Complete x86/x64 JIT and AOT Assembler for C++.
4
5 * [Official Repository (asmjit/asmjit)](https://github.com/asmjit/asmjit)
6 * [Official Blog (asmbits)](https://asmbits.blogspot.com/ncr)
7 * [Official Chat (gitter)](https://gitter.im/asmjit/asmjit)
8 * [Permissive ZLIB license](./LICENSE.md)
9
10
11 Introduction
12 ------------
13
14 AsmJit is a complete JIT and AOT assembler for C++ language. It can generate native code for x86 and x64 architectures and supports the whole x86/x64 instruction set - from legacy MMX to the newest AVX512. It has a type-safe API that allows C++ compiler to do semantic checks at compile-time even before the assembled code is generated and/or executed.
15
16 AsmJit, as the name implies, started as a project that provided JIT code-generation and execution. However, AsmJit evolved and it now contains features that are far beyond the scope of a simple JIT compilation. To keep the library small and lightweight the functionality not strictly related to JIT is provided by a sister project called [asmtk](https://github.com/asmjit/asmtk).
17
18
19 Minimal Example
20 ---------------
21
22 ```c++
23 #include <asmjit/asmjit.h>
24 #include <stdio.h>
25
26 using namespace asmjit;
27
28 // Signature of the generated function.
29 typedef int (*Func)(void);
30
31 int main(int argc, char* argv[]) {
32 JitRuntime rt; // Runtime specialized for JIT code execution.
33
34 CodeHolder code; // Holds code and relocation information.
35 code.init(rt.codeInfo()); // Initialize to the same arch as JIT runtime.
36
37 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
38 a.mov(x86::eax, 1); // Move one to 'eax' register.
39 a.ret(); // Return from function.
40 // ----> x86::Assembler is no longer needed from here and can be destroyed <----
41
42 Func fn;
43 Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
44 if (err) return 1; // Handle a possible error returned by AsmJit.
45 // ----> CodeHolder is no longer needed from here and can be destroyed <----
46
47 int result = fn(); // Execute the generated code.
48 printf("%d\n", result); // Print the resulting "1".
49
50 // All classes use RAII, all resources will be released before `main()` returns,
51 // the generated function can be, however, released explicitly if you intend to
52 // reuse or keep the runtime alive, which you should in a production-ready code.
53 rt.release(fn);
54
55 return 0;
56 }
57 ```
58
59
60 AsmJit Summary
61 --------------
62
63 * Complete x86/x64 instruction set - MMX, SSE+, BMI+, ADX, TBM, XOP, AVX+, FMA+, and AVX512+.
64 * Different emitters providing various abstraction levels (Assembler, Builder, Compiler).
65 * Support for sections for separating code and data.
66 * Built-in CPU vendor and features detection.
67 * Advanced logging, formatting, and error handling.
68 * JIT memory allocator - interface similar to malloc/free for JIT code-generation and execution.
69 * Lightweight and easily embeddable - ~300kB compiled with all built-in features.
70 * Modular design - unneeded features can be disabled at compile-time to make the library smaller.
71 * Zero dependencies - no external libraries, no STL/RTTI - easy to embed and/or link statically.
72 * Doesn't use exceptions internally, but allows to attach a "throwable" error handler of your choice.
73
74
75 Advanced Features
76 -----------------
77
78 * AsmJit contains a highly compressed instruction database:
79 * Instruction names - allows to convert instruction id to its name and vice versa.
80 * Instruction metadata - access (read|write) of all operand combinations of all instructions.
81 * Instruction signatures - allows to strictly validate if an instruction (with all its operands) is valid.
82 * AsmJit allows to precisely control how instructions are encoded if there are multiple variations.
83 * AsmJit is highly dynamic, constructing operands at runtime is a common practice.
84 * Multiple emitters with the same interface - emit machine code directly or to a representation that can be post-processed.
85
86
87 Important
88 ---------
89
90 Breaking the official API is sometimes inevitable, what to do?
91 * See asmjit tests, they always compile and provide an implementation of a lot of use-cases:
92 * [asmjit_test_x86_asm.cpp](./test/asmjit_test_x86_asm.cpp) - Tests that demonstrate the purpose of emitters.
93 * [asmjit_test_x86_cc.cpp](./test/asmjit_test_x86_cc.cpp) - A lot of tests targeting Compiler infrastructure.
94 * [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) - Multiple sections test.
95 * Visit our [Official Chat](https://gitter.im/asmjit/asmjit) if you need a quick help.
96
97
98 TODO
99 ----
100
101 * [ ] Add support for user external buffers in CodeHolder.
102
103
104 Supported Environments
105 ----------------------
106
107 ### C++ Compilers:
108
109 * Requirements:
110 * AsmJit won't build without C++11 enabled. If you use older GCC or Clang you would have to enable at least c++11 through compiler flags.
111 * Tested:
112 * **Clang** - tested by Travis-CI - Clang 3.9+ (with C++11 enabled) is officially supported (older Clang versions having C++11 support are probably fine, but are not regularly tested).
113 * **GNU** - tested by Travis-CI - GCC 4.8+ (with C++11 enabled) is officially supported.
114 * **MINGW** - tested by Travis-CI - Use the latest version, if possible.
115 * **MSVC** - tested by Travis-CI - VS2017+ is officially supported, VS2015 is reported to work.
116 * Untested:
117 * **Intel** - no maintainers and no CI environment to regularly test this compiler.
118 * Other c++ compilers would require basic support in [core/build.h](./src/asmjit/core/build.h).
119
120 ### Operating Systems:
121
122 * Tested:
123 * **Linux** - tested by Travis-CI - any distribution is generally supported.
124 * **OSX** - tested by Travis-CI - any version is supported.
125 * **Windows** - tested by Travis-CI - Windows 7+ is officially supported.
126 * Untested:
127 * **BSDs** - no maintainers, no CI environment to regularly test these OSes.
128 * **Haiku** - not regularly tested, but reported to work.
129 * Other operating systems would require some testing and support in [core/build.h](./src/asmjit/core/build.h), [core/osutils.cpp](./src/asmjit/core/osutils.cpp), and [core/virtmem.cpp](./src/asmjit/core/virtmem.cpp).
130
131 ### Backends:
132
133 * **X86** - tested by both Travis-CI - both 32-bit and 64-bit backends are fully functional.
134 * **ARM** - work-in-progress (not public at the moment).
135
136
137 Project Organization
138 --------------------
139
140 * **`/`** - Project root.
141 * **src** - Source code.
142 * **asmjit** - Source code and headers (always point include path in here).
143 * **core** - Core API, backend independent except relocations.
144 * **arm** - ARM specific API, used only by ARM and AArch64 backends.
145 * **x86** - X86 specific API, used only by X86 and X64 backends.
146 * **test** - Unit and integration tests (don't embed in your project).
147 * **tools** - Tools used for configuring, documenting and generating data files.
148
149
150 Configuring & Feature Selection
151 -------------------------------
152
153 AsmJit is designed to be easy embeddable in any project. However, it depends on some compile-time macros that can be used to build a specific version of AsmJit that includes or excludes certain features. A typical way of building AsmJit is to use [cmake](https://www.cmake.org), but it's also possible to just include AsmJit source code in your project and just build it. The easiest way to include AsmJit in your project is to just include **src** directory in your project and to define `ASMJIT_STATIC`. AsmJit can be just updated from time to time without any changes to this integration process. Do not embed AsmJit's [/test](./test) files in such case as these are used for testing.
154
155 ### Build Type:
156
157 * `ASMJIT_BUILD_DEBUG` - Define to always turn debugging on (regardless of compile-time options detected).
158 * `ASMJIT_BUILD_RELEASE` - Define to always turn debugging off (regardless of compile-time options detected).
159
160 By default none of these is defined, AsmJit detects build-type based on compile-time macros and supports most IDE and compiler settings out of box. By default AsmJit switches to release mode when `NDEBUG` is defined.
161
162 ### Build Mode:
163
164 * `ASMJIT_STATIC` - Define to build AsmJit statically - either as a static library or as a part of another project. No symbols are exported in such case.
165
166 By default AsmJit build is configured to be built as a shared library, this means `ASMJIT_STATIC` must be explicitly enabled if you want to compile AsmJit statically.
167
168 ### Build Backends:
169
170 * `ASMJIT_BUILD_ARM` - Build ARM backends (not ready, work-in-progress).
171 * `ASMJIT_BUILD_X86` - Build X86 backends (X86 and X86_64).
172 * `ASMJIT_BUILD_HOST` - Build only the host backend (default).
173
174 If none of `ASMJIT_BUILD_...` is defined AsmJit bails to `ASMJIT_BUILD_HOST`, which will detect the target architecture at compile-time. Each backend automatically supports 32-bit and 64-bit targets, so for example AsmJit with X86 support can generate both 32-bit and 64-bit code.
175
176 ### Disabling Features:
177
178 * `ASMJIT_NO_BUILDER` - Disables both `Builder` and `Compiler` emitters (only `Assembler` will be available). Ideal for users that don't use `Builder` concept and want to have AsmJit a bit smaller.
179 * `ASMJIT_NO_COMPILER` - Disables `Compiler` emitter. For users that use `Builder`, but not `Compiler`.
180 * `ASMJIT_NO_JIT` - Disables JIT execution engine, which includes `JitUtils`, `JitAllocator`, and `JitRuntime`.
181 * `ASMJIT_NO_LOGGING` - Disables logging (`Logger` and all classes that inherit it) and instruction formatting.
182 * `ASMJIT_NO_TEXT` - Disables everything that uses text-representation and that causes certain strings to be stored in the resulting binary. For example when this flag is set all instruction and error names (and related APIs) will not be available. This flag has to be disabled together with `ASMJIT_NO_LOGGING`. This option is suitable for deployment builds or builds that don't want to reveal the use of AsmJit.
183 * `ASMJIT_NO_INST_API` - Disables instruction query features, strict validation, read/write information, and all additional data and APIs that can output information about instructions.
184
185 NOTE: Please don't disable any features if you plan to build AsmJit as a shared library that will be used by multiple projects that you don't control (for example asmjit in a Linux distribution). The possibility to disable certain features exists mainly for customized builds of AsmJit.
186
187
188 Using AsmJit
189 ------------
190
191 AsmJit library uses one global namespace called `asmjit` that provides the whole functionality. Architecture specific code is prefixed by the architecture name and architecture specific registers and operand builders have their own namespace. For example API targeting both X86 and X64 architectures is prefixed with `X86` and registers & operand builders are accessible through `x86` namespace. This design is very different from the initial version of AsmJit and it seems now as the most convenient one.
192
193 ### CodeHolder & Emitters
194
195 AsmJit provides two classes that are used together for code generation:
196
197 * `CodeHolder` - Provides functionality to hold generated code and stores all necessary information about code sections, labels, symbols, and possible relocations.
198 * `BaseEmitter` - Provides functionality to emit code into `CodeHolder`. `BaseEmitter` is abstract and provides just basic building blocks that are then implemented by `BaseAssembler`, `BaseBuilder`, `BaseCompiler`, and their architecture-specific implementations like `x86::Assembler`, `x86::Builder`, and `x86::Compiler`.
199
200 Code emitters:
201
202 * `[Base]Assembler` - Emitter designed to emit machine code directly into a `CodeBuffer` held by `CodeHolder`.
203 * `[Base]Builder` - Emitter designed to emit code into a representation that can be processed afterwards. It stores the whole code in a double linked list consisting of nodes (`BaseNode` and all derived classes). There are nodes that represent instructions (`InstNode`), labels (`LabelNode`), and other building blocks (`AlignNode`, `DataNode`, ...). Some nodes are used as markers (`SentinelNode` and comments (`CommentNode`).
204 * `[Base]Compiler` - High-level code emitter that uses virtual registers and contains high-level function building features. Compiler extends `[Base]Builder` functionality and introduces new nodes like `FuncNode`, `FuncRetNode`, and `FuncCallNode`. Compiler is the simplest way to start with AsmJit as it abstracts lots of details required to generate a function that can be called from a C/C++ language.
205
206 ### Targets and JitRuntime
207
208 AsmJit's `Target` class is an interface that provides basic target abstraction. At the moment only one implementation called `JitRuntime` is provided, which as the name suggests provides JIT code target and execution runtime. `JitRuntime` provides all the necessary functionality to implement a simple JIT functionality with basic memory management. It only provides `add()` and `release()` functions that are used to either add code to the runtime or release it. The `JitRuntime` doesn't do any decisions on when the code should be released. Once you add new code into it you must decide when that code is no longer needed and should be released.
209
210 ### Instructions & Operands
211
212 Instructions specify operations performed by the CPU, and operands specify the operation's input(s) and output(s). Each AsmJit's instruction has it's own unique id (`Inst::Id` for example) and platform specific code emitters always provide a type safe intrinsic (or multiple overloads) to emit such instruction. There are two ways of emitting an instruction:
213
214 * Using `BaseEmitter::inst(operands...)` - A type-safe way provided by platform specific emitters - for example `x86::Assembler` provides `x86::Assembler::mov(x86::Gp, x86::Gp)`.
215 * Using `BaseEmitter::emit(instId, operands...)` - Allows to emit an instruction in a dynamic way - you just need to know instruction's id and provide its operands.
216
217 AsmJit's operands all inherit from a base class called `Operand` and then specialize its type to:
218
219 * **None** (not used or uninitialized operand).
220 * **Register** (`BaseReg`) - Describes either physical or virtual register. Physical registers have id that matches the target's machine id directly whereas virtual registers must be allocated into physical registers by a register allocator pass. Register operand provides:
221 * **Register Type** - Unique id that describes each possible register provided by the target architecture - for example X86 backend provides `x86::Reg::RegType`, which defines all variations of general purpose registers (GPB-LO, GPB-HI, GPW, GPD, and GPQ) and all types of other registers like K, MM, BND, XMM, YMM, and ZMM.
222 * **Register Group** - Groups multiple register types under a single group - for example all general-purpose registers (of all sizes) on X86 are `x86::Reg::kGroupGp`, all SIMD registers (XMM, YMM, ZMM) are `x86::Reg::kGroupVec`, etc.
223 * **Register Size** - Contains the size of the register in bytes. If the size depends on the mode (32-bit vs 64-bit) then generally the higher size is used (for example RIP register has size 8 by default).
224 * **Register ID** - Contains physical or virtual id of the register.
225 * Each architecture provides its own register that adds a architecture-specific API to `BaseReg`.
226 * **Memory Address** (`BaseMem`) - Used to reference a memory location. Memory operand provides:
227 * **Base Register** - A base register type and id (physical or virtual).
228 * **Index Register** - An index register type and id (physical or virtual).
229 * **Offset** - Displacement or absolute address to be referenced (32-bit if base register is used and 64-bit if base register is not used).
230 * **Flags** that can describe various architecture dependent information (like scale and segment-override on X86).
231 * Each architecture provides its own register that adds a architecture-specific API to `BaseMem`.
232 * **Immediate Value** (`Imm`) - Immediate values are usually part of instructions (encoded within the instruction itself) or data.
233 * **Label** - used to reference a location in code or data. Labels must be created by the `BaseEmitter` or by `CodeHolder`. Each label has its unique id per `CodeHolder` instance.
234
235 AsmJit allows to construct operands dynamically, to store them, and to query a complete information about them at run-time. Operands are small (always 16 bytes per `Operand`) and should be always copied (by value) if you intend to store them (don't create operands by using `new` keyword, it's not recommended). Operands are safe to be `memcpy()`ed and `memset()`ed if you need to work with arrays of operands.
236
237 Small example of manipulating and using operands:
238
239 ```c++
240 #include <asmjit/asmjit.h>
241
242 using namespace asmjit;
243
244 x86::Gp dstRegByValue() { return x86::ecx; }
245
246 void usingOperandsExample(x86::Assembler& a) {
247 // Create some operands.
248 x86::Gp dst = dstRegByValue(); // Get `ecx` register returned by a function.
249 x86::Gp src = x86::rax; // Get `rax` register directly from the provided `x86` namespace.
250 x86::Gp idx = x86::gpq(10); // Construct `r10` dynamically.
251 x86::Mem m = x86::ptr(src, idx); // Construct [src + idx] memory address - referencing [rax + r10].
252
253 // Examine `m`:
254 m.indexType(); // Returns `x86::Reg::kTypeGpq`.
255 m.indexId(); // Returns 10 (`r10`).
256
257 // Reconstruct `idx` stored in mem:
258 x86::Gp idx_2 = x86::Gp::fromTypeAndId(m.indexType(), m.indexId());
259 idx == idx_2; // True, `idx` and idx_2` are identical.
260
261 Operand op = m; // Possible.
262 op.isMem(); // True (can be casted to BaseMem or architecture-specific Mem).
263
264 m == op; // True, `op` is just a copy of `m`.
265 static_cast<BaseMem&>(op).addOffset(1); // Static cast is fine and valid here.
266 op.as<BaseMem>().addOffset(1); // However, using `as<T>()` to cast to a derived type is preferred.
267 m == op; // False, `op` now points to [rax + r10 + 1], which is not [rax + r10].
268
269 // Emitting 'mov'
270 a.mov(dst, m); // Type-safe way.
271 a.mov(dst, op); // Not possible, `mov` doesn't provide `mov(x86::Gp, Operand)` overload.
272
273 a.emit(x86::Inst::kIdMov, dst, m); // Type-unsafe, but possible.
274 a.emit(x86::Inst::kIdMov, dst, op); // Also possible, `emit()` is typeless and can be used with raw `Operand`s.
275 }
276 ```
277
278 Some operands have to be created explicitly by `BaseEmitter`. For example labels must be created by `newLabel()` before they are used.
279
280 ### Assembler Example
281
282 `x86::Assembler` is a code emitter that emits machine code into a CodeBuffer directly. It's capable of targeting both 32-bit and 64-bit instruction sets and it's possible to target both instruction sets within the same code-base. The following example shows how to generate a function that works in both 32-bit and 64-bit modes, and how to use JitRuntime, `CodeHolder`, and `x86::Assembler` together.
283
284 The example handles 3 calling conventions manually just to show how it could be done, however, AsmJit contains utilities that can be used to create function prologs and epilogs automatically, but these concepts will be explained later.
285
286 ```c++
287 #include <asmjit/asmjit.h>
288 #include <stdio.h>
289
290 using namespace asmjit;
291
292 // Signature of the generated function.
293 typedef int (*SumFunc)(const int* arr, size_t count);
294
295 int main(int argc, char* argv[]) {
296 JitRuntime jit; // Create a runtime specialized for JIT.
297 CodeHolder code; // Create a CodeHolder.
298
299 code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`.
300 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
301
302 // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
303 // 32-BIT - passed all arguments by stack.
304 // WIN64 - passes first 4 arguments by RCX, RDX, R8, and R9.
305 // UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
306 x86::Gp arr, cnt;
307 x86::Gp sum = x86::eax; // Use EAX as 'sum' as it's a return register.
308
309 if (ASMJIT_ARCH_BITS == 64) {
310 #if defined(_WIN32)
311 arr = x86::rcx; // First argument (array ptr).
312 cnt = x86::rdx; // Second argument (number of elements)
313 #else
314 arr = x86::rdi; // First argument (array ptr).
315 cnt = x86::rsi; // Second argument (number of elements)
316 #endif
317 }
318 else {
319 arr = x86::edx; // Use EDX to hold the array pointer.
320 cnt = x86::ecx; // Use ECX to hold the counter.
321 a.mov(arr, x86::ptr(x86::esp, 4)); // Fetch first argument from [ESP + 4].
322 a.mov(cnt, x86::ptr(x86::esp, 8)); // Fetch second argument from [ESP + 8].
323 }
324
325 Label Loop = a.newLabel(); // To construct the loop, we need some labels.
326 Label Exit = a.newLabel();
327
328 a.xor_(sum, sum); // Clear 'sum' register (shorter than 'mov').
329 a.test(cnt, cnt); // Border case:
330 a.jz(Exit); // If 'cnt' is zero jump to 'Exit' now.
331
332 a.bind(Loop); // Start of a loop iteration.
333 a.add(sum, x86::dword_ptr(arr)); // Add int at [arr] to 'sum'.
334 a.add(arr, 4); // Increment 'arr' pointer.
335 a.dec(cnt); // Decrease 'cnt'.
336 a.jnz(Loop); // If not zero jump to 'Loop'.
337
338 a.bind(Exit); // Exit to handle the border case.
339 a.ret(); // Return from function ('sum' == 'eax').
340 // ----> x86::Assembler is no longer needed from here and can be destroyed <----
341
342 SumFunc fn;
343 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
344
345 if (err) return 1; // Handle a possible error returned by AsmJit.
346 // ----> CodeHolder is no longer needed from here and can be destroyed <----
347
348 static const int array[6] = { 4, 8, 15, 16, 23, 42 };
349
350 int result = fn(array, 6); // Execute the generated code.
351 printf("%d\n", result); // Print sum of array (108).
352
353 jit.release(fn); // Remove the function from the runtime.
354 return 0;
355 }
356 ```
357
358 The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64, and SysV64 caling conventions and will work on most X86 environments.
359
360 ### More About Memory Addresses
361
362 X86 provides a complex memory addressing model that allows to encode addresses having a BASE register, INDEX register with a possible scale (left shift), and displacement (called offset in AsmJit). Memory address can also specify memory segment (segment-override in X86 terminology) and some instructions (gather / scatter) require INDEX to be a VECTOR register instead of a general-purpose register. AsmJit allows to encode and work with all forms of addresses mentioned and implemented by X86. It also allows to construct a 64-bit memory address, which is only allowed in one form of 'mov' instruction.
363
364 ```c++
365 #include <asmjit/asmjit.h>
366
367 // Memory operand construction is provided by x86 namespace.
368 using namespace asmjit;
369 using namespace asmjit::x86; // Easier to access x86 regs.
370
371 // BASE + OFFSET.
372 x86::Mem a = ptr(rax); // a = [rax]
373 x86::Mem b = ptr(rax, 15) // b = [rax + 15]
374
375 // BASE + INDEX << SHIFT - Shift is in BITS as used by X86!
376 x86::Mem c = ptr(rax, rbx) // c = [rax + rbx]
377 x86::Mem d = ptr(rax, rbx, 2) // d = [rax + rbx << 2]
378 x86::Mem e = ptr(rax, rbx, 2, 15) // e = [rax + rbx << 2 + 15]
379
380 // BASE + VM (Vector Index) (encoded as MOD+VSIB).
381 x86::Mem f = ptr(rax, xmm1) // f = [rax + xmm1]
382 x86::Mem g = ptr(rax, xmm1, 2) // g = [rax + xmm1 << 2]
383 x86::Mem h = ptr(rax, xmm1, 2, 15) // h = [rax + xmm1 << 2 + 15]
384
385 // WITHOUT BASE:
386 uint64_t ADDR = (uint64_t)0x1234;
387 x86::Mem i = ptr(ADDR); // i = [0x1234]
388 x86::Mem j = ptr(ADDR, rbx); // j = [0x1234 + rbx]
389 x86::Mem k = ptr(ADDR, rbx, 2); // k = [0x1234 + rbx << 2]
390
391 // LABEL - Will be encoded as RIP (64-bit) or absolute address (32-bit).
392 Label L = ...;
393 x86::Mem m = ptr(L); // m = [L]
394 x86::Mem n = ptr(L, rbx); // n = [L + rbx]
395 x86::Mem o = ptr(L, rbx, 2); // o = [L + rbx << 2]
396 x86::Mem p = ptr(L, rbx, 2, 15); // p = [L + rbx << 2 + 15]
397
398 // RIP - 64-bit only (RIP can't use INDEX).
399 x86::Mem q = ptr(rip, 24); // q = [rip + 24]
400 ```
401
402 Memory operands can optionally contain memory size. This is required by instructions where the memory size cannot be deduced from other operands, like `inc` and `dec`:
403
404 ```c++
405 x86::Mem a = x86::dword_ptr(rax, rbx); // dword ptr [rax + rbx].
406 x86::Mem b = x86::qword_ptr(rdx, rsi, 0, 1); // qword ptr [rdx + rsi << 0 + 1].
407 ```
408
409 Memory operands provide API that can be used to work with them:
410
411 ```c++
412 x86::Mem mem = x86::dword_ptr(rax, 12); // dword ptr [rax + 12].
413
414 mem.hasBase(); // true.
415 mem.hasIndex(); // false.
416 mem.size(); // 4.
417 mem.offset(); // 12.
418
419 mem.setSize(0); // Sets the size to 0 (makes it sizeless).
420 mem.addOffset(-1); // Adds -1 to the offset and makes it 11.
421 mem.setOffset(0); // Sets the offset to 0.
422 mem.setBase(rcx); // Changes BASE to RCX.
423 mem.setIndex(rax); // Changes INDEX to RAX.
424 mem.hasIndex(); // true.
425
426 // ...
427 ```
428
429 Making changes to memory operand is very comfortable when emitting loads and stores:
430
431 ```c++
432 #include <asmjit/asmjit.h>
433
434 using namespace asmjit;
435
436 x86::Assembler a(...); // Your initialized x86::Assembler.
437 x86::Mem m = x86::ptr(eax); // Construct [eax] memory operand.
438
439 // One way of emitting bunch of loads is to use `mem.adjusted()`. It returns
440 // a new memory operand and keeps the source operand unchanged.
441 a.movaps(x86::xmm0, m); // No adjustment needed to load [eax].
442 a.movaps(x86::xmm1, m.adjusted(16)); // Loads from [eax + 16].
443 a.movaps(x86::xmm2, m.adjusted(32)); // Loads from [eax + 32].
444 a.movaps(x86::xmm3, m.adjusted(48)); // Loads from [eax + 48].
445
446 // ... do something with xmm0-3 ...
447
448 // Another way of adjusting memory is to change the operand in-place. If you
449 // want to keep the original operand you can simply clone it.
450 x86::Mem mx = m.clone();
451 a.movaps(mx, x86::xmm0); mx.addOffset(16);// Stores to [eax] (and adds 16 to mx).
452 a.movaps(mx, x86::xmm1); mx.addOffset(16);// Stores to [eax + 16] (and adds 16 to mx).
453 a.movaps(mx, x86::xmm2); mx.addOffset(16);// Stores to [eax + 32] (and adds 16 to mx).
454 a.movaps(mx, x86::xmm3); // Stores to [eax + 48].
455 ```
456
457 You can explore the possibilities by taking a look at:
458
459 * [core/operand.h](./src/asmjit/core/operand.h)
460 * [x86/x86operand.h](./src/asmjit/x86/x86operand.h).
461
462 ### More About CodeInfo
463
464 In the first complete example the `CodeInfo` is retrieved from `JitRuntime`. It's logical as `JitRuntime` will always return a `CodeInfo` that is compatible with the runtime environment. For example if your application runs in 64-bit mode the `CodeInfo` will use `ArchInfo::kIdX64` architecture in contrast to `ArchInfo::kIdX86`, which will be used in 32-bit mode. AsmJit also allows to setup `CodeInfo` manually, and to select a different architecture when needed. So let's do something else this time, let's always generate a 32-bit code and print it's binary representation. To do that, we create our own `CodeInfo` and initialize it to `ArchInfo::kIdX86` architecture. CodeInfo will populate all basic fields just based on the architecture we provide, so it's super-easy:
465
466 ```c++
467 #include <asmjit/asmjit.h>
468 #include <stdio.h>
469
470 using namespace asmjit;
471
472 int main(int argc, char* argv[]) {
473 using namespace asmjit::x86; // Easier access to x86/x64 registers.
474
475 CodeHolder code; // Create a CodeHolder.
476 code.init(CodeInfo(ArchInfo::kIdX86));// Initialize it for a 32-bit X86 target.
477
478 // Generate a 32-bit function that sums 4 floats and looks like:
479 // void func(float* dst, const float* a, const float* b)
480 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
481
482 a.mov(eax, dword_ptr(esp, 4)); // Load the destination pointer.
483 a.mov(ecx, dword_ptr(esp, 8)); // Load the first source pointer.
484 a.mov(edx, dword_ptr(esp, 12)); // Load the second source pointer.
485
486 a.movups(xmm0, ptr(ecx)); // Load 4 floats from [ecx] to XMM0.
487 a.movups(xmm1, ptr(edx)); // Load 4 floats from [edx] to XMM1.
488 a.addps(xmm0, xmm1); // Add 4 floats in XMM1 to XMM0.
489 a.movups(ptr(eax), xmm0); // Store the result to [eax].
490 a.ret(); // Return from function.
491
492 // We have no Runtime this time, it's on us what we do with the code.
493 // CodeHolder stores code in `Section`, which provides some basic properties
494 // and CodeBuffer structure. We are interested in section's CodeBuffer only.
495 //
496 // NOTE: The first section is always '.text', so it's safe to just use 0 index.
497 // Get it by using either `code.sectionById(0)` or `code.textSection()`.
498 CodeBuffer& buffer = code.sectionById(0)->buffer();
499
500 // Print the machine-code generated or do something more interesting with it?
501 // 8B4424048B4C24048B5424040F28010F58010F2900C3
502 for (size_t i = 0; i < buffer.length; i++)
503 printf("%02X", buffer.data[i]);
504
505 return 0;
506 }
507 ```
508
509 ### Explicit Code Relocation
510
511 CodeInfo contains much more information than just the target architecture. It can be configured to specify a base-address (or a virtual base-address in a linker terminology), which could be static (useful when you know the location of the target's machine code) or dynamic. AsmJit assumes dynamic base-address by default and relocates the code held by `CodeHolder` to a user-provided address on-demand. To be able to relocate to a user-provided address it needs to store some information about relocations, which is represented by `RelocEntry`. Relocation entries are only required if you call external functions from the generated code that cannot be encoded by using a 32-bit displacement (X64 architecture doesn't provide an encodable 64-bit displacement).
512
513 There is also a concept called `LabelLink` - label links are lightweight structs that don't have any identifier and are stored per label in a single-linked list. Label links represent either unbound yet used labels (that are valid in cases in which label was not bound but was already referenced by an instruction) and links that cross-sections (only relevant to code that uses multiple sections). Since crossing sections is something that cannot be resolved immediately these links persist until offsets of these sections are assigned and `CodeHolder::resolveUnresolvedLinks()` is called. It's an error if you end up with code that has unresolved label links after flattening. You can verify it by calling `CodeHolder::hasUnresolvedLinks()` and `CodeHolder::unresolvedLinkCount()`.
514
515 AsmJit can flatten code that uses multiple sections by assigning each section an incrementing offset that respects its alignment. Use `CodeHolder::flatten()` to do that. After the sections are flattened their offsets and virtual-sizes were adjusted to respect section's buffer size and alignment. You must call `CodeHolder::resolveUnresolvedLinks()` before relocating the code held by it. You can also flatten your code manually by iterating over all sections and calculating their offsets (relative to base) by your own algorithm. In that case you don't have to call `CodeHolder::flatten()`, but you must still call `CodeHolder::resolveUnresolvedLinks()`.
516
517 Next example shows how to use a built-in virtual memory allocator `JitAllocator` instead of using `JitRuntime` (just in case you want to use your own memory management) and how to relocate the generated code into your own memory block - you can use your own virtual memory allocator if you prefer that, but that's OS specific and it's already provided by AsmJit, so we will use what AsmJit offers instead of going deep into OS specific APIs.
518
519 The following code is similar to the previous one, but implements a function working in both 32-bit and 64-bit environments:
520
521 ```c++
522 #include <asmjit/asmjit.h>
523 #include <stdio.h>
524
525 using namespace asmjit;
526
527 typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
528
529 int main(int argc, char* argv[]) {
530 CodeHolder code; // Create a CodeHolder.
531 code.init(CodeInfo(ArchInfo::kIdHost)); // Initialize it for the host architecture.
532
533 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
534
535 // Generate a function runnable in both 32-bit and 64-bit architectures:
536 bool isX86 = ASMJIT_ARCH_X86 == 32;
537
538 // Signature: 'void func(int* dst, const int* a, const int* b)'.
539 x86::Gp dst;
540 x86::Gp src_a;
541 x86::Gp src_b;
542
543 // Handle the difference between 32-bit and 64-bit calling convention.
544 // (arguments passed through stack vs. arguments passed by registers).
545 if (isX86) {
546 dst = x86::eax;
547 src_a = x86::ecx;
548 src_b = x86::edx;
549 a.mov(dst , x86::dword_ptr(x86::esp, 4)); // Load the destination pointer.
550 a.mov(src_a, x86::dword_ptr(x86::esp, 8)); // Load the first source pointer.
551 a.mov(src_b, x86::dword_ptr(x86::esp, 12)); // Load the second source pointer.
552 }
553 else {
554 #if defined(_WIN32)
555 dst = x86::rcx; // First argument (destination pointer).
556 src_a = x86::rdx; // Second argument (source 'a' pointer).
557 src_b = x86::r8; // Third argument (source 'b' pointer).
558 #else
559 dst = x86::rdi; // First argument (destination pointer).
560 src_a = x86::rsi; // Second argument (source 'a' pointer).
561 src_b = x86::rdx; // Third argument (source 'b' pointer).
562 #endif
563 }
564
565 a.movdqu(x86::xmm0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
566 a.movdqu(x86::xmm1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
567 a.paddd(x86::xmm0, x86::xmm1); // Add 4 ints in XMM1 to XMM0.
568 a.movdqu(x86::ptr(dst), x86::xmm0); // Store the result to [dst].
569 a.ret(); // Return from function.
570
571 // Even when we didn't use multiple sections AsmJit could insert one section
572 // called '.addrtab' (address table section), which would be filled by data
573 // required by relocations (absolute jumps and calls). You can omit this code
574 // if you are 100% sure your code doesn't contain multiple sections and
575 // such relocations. You can use `CodeHolder::hasAddressTable()` to verify
576 // whether the address table section does exist.
577 code.flatten();
578 code.resolveUnresolvedLinks();
579
580 // After the code was generated it can be relocated manually to any memory
581 // location, however, we need to know it's size before we perform memory
582 // allocation. `CodeHolder::codeSize()` returns the worst estimated code
583 // size in case that relocations are not possible without trampolines (in
584 // that case some extra code at the end of the current code buffer is
585 // generated during relocation).
586 size_t estimatedSize = code.codeSize();
587
588 // Instead of rolling up our own memory allocator we can use the one AsmJit
589 // provides. It's decoupled so you don't need to use `JitRuntime` for that.
590 JitAllocator allocator;
591
592 // Allocate an executable virtual memory and handle a possible failure.
593 void* p = allocator.alloc(estimatedSize);
594 if (!p) return 0;
595
596 // Now relocate the code to the address provided by the memory allocator.
597 // Please note that this DOESN'T COPY anything to `p`. This function will
598 // store the address in CodeInfo and use relocation entries to patch the
599 // existing code in all sections to respect the base address provided.
600 code.relocateToBase((uint64_t)p);
601
602 // This is purely optional. There are cases in which the relocation can
603 // omit unneeded data, which would shrink the size of address table. If
604 // that happened the `codeSize` returned after `relocateToBase()` would
605 // be smaller than the originally `estimatedSize`.
606 size_t codeSize = code.codeSize();
607
608 // This will copy code from all sections to `p`. Iterating over all
609 // sections and calling `memcpy()` would work as well, however, this
610 // function supports additional options that can be used to also zero
611 // pad sections' virtual size, etc.
612 //
613 // With some additional features, copyFlattenData() does roughly this:
614 // for (Section* section : code.sections())
615 // memcpy((uint8_t*)p + section->offset(),
616 // section->data(),
617 // section->bufferSize());
618 code.copyFlattenedData(p, codeSize, CodeHolder::kCopyWithPadding);
619
620 // Execute the generated function.
621 int inA[4] = { 4, 3, 2, 1 };
622 int inB[4] = { 1, 5, 2, 8 };
623 int out[4];
624
625 // This code uses AsmJit's ptr_as_func<> to cast between void* and SumIntsFunc.
626 ptr_as_func<SumIntsFunc>(p)(out, inA, inB);
627
628 // Prints {5 8 4 9}
629 printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
630
631 // Release 'p' is it's no longer needed. It will be destroyed with 'vm'
632 // instance anyway, but it's a good practice to release it explicitly
633 // when you know that the function will not be needed anymore.
634 allocator.release(p);
635
636 return 0;
637 }
638 ```
639
640 If you know your base-address in advance (before code generation) you can use `CodeInfo::setBaseAddress()` to setup its initial value. In that case Assembler will know the absolute position of each instruction and would be able to use it during instruction encoding and prevent relocations in case the instruction is encodable. The following example shows how to configure the base address:
641
642 ```c++
643 // Configure CodeInfo with base address.
644 CodeInfo ci(...);
645 ci.setBaseAddress(uint64_t(0x1234));
646
647 // Then initialize CodeHolder with it.
648 CodeHolder code;
649 code.init(ci);
650 ```
651
652 ### Using Native Registers - zax, zbx, zcx, ...
653
654 AsmJit's X86 code emitters always provide functions to construct machine-size registers depending on the target. This feature is for people that want to write code targeting both 32-bit and 64-bit at the same time. In AsmJit terminology these registers are named **zax**, **zcx**, **zdx**, **zbx**, **zsp**, **zbp**, **zsi**, and **zdi** (they are defined in this exact order by X86). They are accessible through `x86::Assembler`, `x86::Builder`, and `x86::Compiler`. The following example illustrates how to use this feature:
655
656 ```c++
657 #include <asmjit/asmjit.h>
658 #include <stdio.h>
659
660 using namespace asmjit;
661
662 typedef int (*Func)(void);
663
664 int main(int argc, char* argv[]) {
665 JitRuntime jit; // Create a runtime specialized for JIT.
666 CodeHolder code; // Create a CodeHolder.
667
668 code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`.
669 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
670
671 // Let's get these registers from x86::Assembler.
672 x86::Gp zbp = a.zbp();
673 x86::Gp zsp = a.zsp();
674
675 int stackSize = 32;
676
677 // Function prolog.
678 a.push(zbp);
679 a.mov(zbp, zsp);
680 a.sub(zsp, stackSize);
681
682 // ... emit some code (this just sets return value to zero) ...
683 a.xor_(x86::eax, x86::eax);
684
685 // Function epilog and return.
686 a.mov(zsp, zbp);
687 a.pop(zbp);
688 a.ret();
689
690 // To make the example complete let's call it.
691 Func fn;
692 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
693 if (err) return 1; // Handle a possible error returned by AsmJit.
694
695 int result = fn(); // Execute the generated code.
696 printf("%d\n", result); // Print the resulting "0".
697
698 jit.release(fn); // Remove the function from the runtime.
699 return 0;
700 }
701 ```
702
703 The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
704
705 ```c++
706 void example(x86::Assembler& a) {
707 x86::Gp zax = a.gpz(x86::Gp::kIdAx);
708 x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
709 x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
710 x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
711
712 // You can also change register's id easily.
713 x86::Gp zsp = zax;
714 zsp.setId(4); // or x86::Gp::kIdSp.
715 }
716 ```
717
718 Cloning existing registers and chaning their IDs is fine in AsmJit; and this technique is used internally in many places.
719
720 ### Using Assembler as Code-Patcher
721
722 This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine-code it generates into a `CodeBuffer`, however, it also allows to set the offset in `CodeBuffer` explicitly and to overwrite its content. This technique is extremely dangerous for asm beginners as X86 instructions have variable length (see below), so you should in general only patch code to change instruction's offset or some basic other details you didn't know about the first time you emitted it. A typical scenario that requires code-patching is when you start emitting function and you don't know how much stack you want to reserve for it.
723
724 Before we go further it's important to introduce instruction options, because they can help with code-patching (and not only patching, but that will be explained in AVX-512 section):
725
726 * Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit this is usually called 'short form' and 'long form'.
727 * AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always good - this decision is used by majority of assemblers out there.
728 * AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit the long form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note the underscore after each function name as it avoids collision with built-in C++ types.
729
730 To illustrate what short form and long form means in binary let's assume we want to emit `add esp, 16` instruction, which has two possible binary encodings:
731
732 * `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4) and an 8-bit immediate value representing `16`.
733 * `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
734
735 If you generate an instruction in a short form and then patch it in a long form or vice-versa then something really bad will happen when you try to execute such code. The following example illustrates how to patch the code properly (it just extends the previous example):
736
737 ```c++
738 #include <asmjit/asmjit.h>
739 #include <stdio.h>
740
741 using namespace asmjit;
742
743 typedef int (*Func)(void);
744
745 int main(int argc, char* argv[]) {
746 JitRuntime jit; // Create a runtime specialized for JIT.
747 CodeHolder code; // Create a CodeHolder.
748
749 code.init(jit.codeInfo()); // Initialize it to be compatible with `jit`.
750 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
751
752 // Let's get these registers from x86::Assembler.
753 x86::Gp zbp = a.zbp();
754 x86::Gp zsp = a.zsp();
755
756 // Function prolog.
757 a.push(zbp);
758 a.mov(zbp, zsp);
759
760 // This is where we are gonna patch the code later, so let's get the offset
761 // (the current location) from the beginning of the code-buffer.
762 size_t patchOffset = a.offset();
763 // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
764 a.long_().sub(zsp, 0);
765
766 // ... emit some code (this just sets return value to zero) ...
767 a.xor_(x86::eax, x86::eax);
768
769 // Function epilog and return.
770 a.mov(zsp, zbp);
771 a.pop(zbp);
772 a.ret();
773
774 // Now we know how much stack size we want to reserve. I have chosen 128
775 // bytes on purpose as it's encodable only in long form that we have used.
776
777 int stackSize = 128; // Number of bytes to reserve on the stack.
778 a.setOffset(patchOffset); // Move the current cursor to `patchOffset`.
779 a.long_().sub(zsp, stackSize); // Patch the code; don't forget to use LONG form.
780
781 // Now the code is ready to be called
782 Func fn;
783 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
784 if (err) return 1; // Handle a possible error returned by AsmJit.
785
786 int result = fn(); // Execute the generated code.
787 printf("%d\n", result); // Print the resulting "0".
788
789 jit.release(fn); // Remove the function from the runtime.
790 return 0;
791 }
792 ```
793
794 If you run the example it would just work. As an experiment you can try removing `long_()` form to see what happens when wrong code is generated.
795
796 ### Code Patching and REX Prefix
797
798 In 64-bit mode there is one more thing to worry about when patching code - REX prefix. It's a single byte prefix designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64 bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation width then it's important to take care of REX prefix as well.
799
800 AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the instruction emitted will always use REX prefix even when it's encodable without it. The following list contains some instructions and their binary representations to illustrate when it's emitted:
801
802 * `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix.
803 * `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
804 * `4883C410` - `add rsp, 16` - 64-bit operation in 64-bit mode requires REX prefix (0x48).
805 * `4183C410` - `add r12d, 16` - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
806 * `4983C410` - `add r12, 16` - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
807
808 ### Generic Function API
809
810 So far all examples shown above handled creating function prologs and epilogs manually. While it's possible to do it that way it's much better to automate such process as function calling conventions vary across architectures and also across operating systems.
811
812 AsmJit contains a functionality that can be used to define function signatures and to calculate automatically optimal function frame that can be used directly by a prolog and epilog inserter. This feature was exclusive to AsmJit's Compiler for a very long time, but was abstracted out and is now available for all users regardless of BaseEmitter they use. The design of handling functions prologs and epilogs allows generally two use cases:
813
814 * Calculate function frame before the function is generated - this is the only way if you use pure `Assembler` emitter and shown in the next example.
815 * Calculate function frame after the function is generated - this way is generally used by `Builder` and `Compiler` emitters(will be described together with `x86::Compiler`).
816
817 The following concepts are used to describe and create functions in AsmJit:
818
819 * `Type` - Type is an 8-bit value that describes a platform independent type as we know from C/C++. It provides abstractions for most common types like `int8_t`, `uint32_t`, `uintptr_t`, `float`, `double`, and all possible vector types to match ISAs up to AVX512. `Type::Id` was introduced originally to be used with the Compiler infrastucture, but is now used by `FuncSignature` as well.
820
821 * `CallConv` - Describes a calling convention - this class contains instructions to assign registers and stack addresses to function arguments and return value(s), but doesn't specify any function signature. Calling conventions are architecture and OS dependent.
822
823 * `FuncSignature` - Describes a function signature, for example `int func(int, int)`. `FuncSignature` contains a function calling convention id, return value type, and function arguments. The signature itself is platform independent and uses `Type::Id` to describe types of function arguments and its return value(s).
824
825 * `FuncDetail` - Architecture and ABI dependent information that describes `CallConv` and expanded `FuncSignature`. Each function argument and return value is represented as `FuncValue` that contains the original `Type::Id` enriched by additional information that specifies if the value is passed/returned by register (and which register) or by stack. Each value also contains some other metadata that provide additional information required to handle it properly (for example if a vector value is passed indirectly by a pointer as required by WIN64 calling convention, etc...).
826
827 * `FuncFrame` - Contains information about the function frame that can be used by prolog/epilog inserter (PEI). Holds call stack size size and alignment, local stack size and alignment, and various attributes that describe how prolog and epilog should be constructed. `FuncFrame` doesn't know anything about function's arguments or return values, it hold only information necessary to create a valid and ABI conforming function prologs and epilogs.
828
829 * `FuncArgsAssignment` - A helper class that can be used to reassign function arguments into user specified registers. It's architecture and ABI dependent mapping from function arguments described by CallConv and FuncDetail into registers specified by the user.
830
831 It's a lot of concepts where each represents one step in the function frame calculation. In addition, the whole machinery can also be used to create function calls, instead of function prologs and epilogs. The next example shows how AsmJit can be used to create functions for both 32-bit and 64-bit targets and various calling conventions:
832
833 ```c++
834 #include <asmjit/asmjit.h>
835 #include <stdio.h>
836
837 using namespace asmjit;
838
839 typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
840
841 int main(int argc, char* argv[]) {
842 JitRuntime jit; // Create JIT Runtime.
843 CodeHolder code; // Create a CodeHolder.
844
845 code.init(jit.codeInfo()); // Initialize it to match `jit`.
846 x86::Assembler a(&code); // Create and attach x86::Assembler to `code`.
847
848 // Decide which registers will be mapped to function arguments. Try changing
849 // registers of `dst`, `src_a`, and `src_b` and see what happens in function's
850 // prolog and epilog.
851 x86::Gp dst = a.zax();
852 x86::Gp src_a = a.zcx();
853 x86::Gp src_b = a.zdx();
854
855 X86::Xmm vec0 = x86::xmm0;
856 X86::Xmm vec1 = x86::xmm1;
857
858 // Create and initialize `FuncDetail` and `FuncFrame`.
859 FuncDetail func;
860 func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConv::kIdHost));
861
862 FuncFrame frame;
863 frame.init(func);
864
865 // Make XMM0 and XMM1 dirty; `kGroupVec` describes XMM|YMM|ZMM registers.
866 frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1));
867
868 // Alternatively, if you don't want to use register masks you can pass `BaseReg`
869 // to `addDirtyRegs()`. The following code would add both `xmm0` and `xmm1`.
870 frame.addDirtyRegs(x86::xmm0, x86::xmm1);
871
872 FuncArgsAssignment args(&func); // Create arguments assignment context.
873 args.assignAll(dst, src_a, src_b); // Assign our registers to arguments.
874 args.updateFrameInfo(frame); // Reflect our args in FuncFrame.
875 frame.finalize(); // Finalize the FuncFrame (updates it).
876
877 a.emitProlog(frame); // Emit function prolog.
878 a.emitArgsAssignment(frame, args); // Assign arguments to registers.
879 a.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
880 a.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
881 a.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
882 a.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
883 a.emitEpilog(frame); // Emit function epilog and return.
884
885 SumIntsFunc fn;
886 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
887 if (err) return 1; // Handle a possible error case.
888
889 // Execute the generated function.
890 int inA[4] = { 4, 3, 2, 1 };
891 int inB[4] = { 1, 5, 2, 8 };
892 int out[4];
893 fn(out, inA, inB);
894
895 // Prints {5 8 4 9}
896 printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
897
898 jit.release(fn); // Remove the function from the runtime.
899 return 0;
900 }
901 ```
902
903
904 Builder Interface
905 -----------------
906
907 Both `Builder` and `Compiler` are emitters that emit everything to a representation that allows further processing. The code stored in such representation is completely safe to be patched, simplified, reordered, obfuscated, removed, injected, analyzed, and 'think-of-anything-else'. Each instruction, label, directive, etc... is stored in `BaseNode` (or derived class like `InstNode` or `LabelNode`) and contains all the information required to pass it later to the `Assembler`.
908
909 There is a huge difference between `Builder` and `Compiler`:
910
911 * `Builder` (low-level):
912 * Maximum compatibility with `Assembler`, easy to switch from `Assembler` to `Builder` and vice versa.
913 * Doesn't generate machine code directly, allows to serialize to `Assembler` when the whole code is ready to be encoded.
914
915 * `Compiler` (high-level):
916 * Virtual registers - allows to use unlimited number of virtual registers which are allocated into physical registers by a built-in register allocator.
917 * Function nodes - allows to create functions by specifying their signatures and assigning virtual registers to function arguments and return value(s).
918 * Function calls - allows to call other functions within the generated code by using the same interface that is used to create functions.
919
920 There are multiple node types used by both `Builder` and `Compiler`:
921
922 * Basic nodes:
923 * `BaseNode` - Base class for all nodes.
924 * `InstNode` - Instruction node.
925 * `AlignNode` - Alignment directive (.align).
926 * `LabelNode` - Label (location where to bound it).
927
928 * Data nodes:
929 * `DataNode` - Data embedded into the code.
930 * `ConstPoolNode` - Constant pool data.
931 * `LabelDataNode` - Label address embedded as data.
932
933 * Informative nodes:
934 * `CommentNode` - Contains a comment string, doesn't affect code generation.
935 * `SentinelNode` - A marker that can be used to remember certain position, doesn't affect code generation.
936
937 * Compiler-only nodes:
938 * `FuncNode` - Start of a function.
939 * `FuncRetNode` - Return from a function.
940 * `FuncCallNode` - Function call.
941
942 ### Using Builder
943
944 The Builder interface was designed to be used as an `Assembler` replacement in case that post-processing of the generated code is required. The code can be modified during or after code generation. The post processing can be done manually or through `Pass` (Code-Builder Pass) object. Builder stores the emitted code as a double-linked list, which allows O(1) insertion and removal.
945
946 The code representation used by `Builder` is compatible with everything AsmJit provides. Each instruction is stored as `InstNode`, which contains instruction id, options, and operands. Each instruction emitted will create a new `InstNode` instance and add it to the current cursor in the double-linked list of nodes. Since the instruction stream used by `Builder` can be manipulated, we can rewrite the **SumInts** example into the following:
947
948 ```c++
949 #include <asmjit/asmjit.h>
950 #include <stdio.h>
951
952 using namespace asmjit;
953
954 typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
955
956 // Small helper function to print the current content of `cb`.
957 static void dumpCode(BaseBuilder& cb, const char* phase) {
958 StringBuilder sb;
959 cb.dump(sb);
960 printf("%s:\n%s\n", phase, sb.data());
961 }
962
963 int main(int argc, char* argv[]) {
964 JitRuntime jit; // Create JIT Runtime.
965 CodeHolder code; // Create a CodeHolder.
966
967 code.init(jit.codeInfo()); // Initialize it to match `jit`.
968 x86::Builder cb(&code); // Create and attach x86::Builder to `code`.
969
970 // Decide which registers will be mapped to function arguments. Try changing
971 // registers of `dst`, `src_a`, and `src_b` and see what happens in function's
972 // prolog and epilog.
973 x86::Gp dst = cb.zax();
974 x86::Gp src_a = cb.zcx();
975 x86::Gp src_b = cb.zdx();
976
977 X86::Xmm vec0 = x86::xmm0;
978 X86::Xmm vec1 = x86::xmm1;
979
980 // Create and initialize `FuncDetail`.
981 FuncDetail func;
982 func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConv::kIdHost));
983
984 // Remember prolog insertion point.
985 BaseNode* prologInsertionPoint = cb.cursor();
986
987 // Emit function body:
988 cb.movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
989 cb.movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
990 cb.paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
991 cb.movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
992
993 // Remember epilog insertion point.
994 BaseNode* epilogInsertionPoint = cb.cursor();
995
996 // Let's see what we have now.
997 dumpCode(cb, "Raw Function");
998
999 // Now, after we emitted the function body, we can insert the prolog, arguments
1000 // allocation, and epilog. This is not possible with using pure x86::Assembler.
1001 FuncFrame frame;
1002 frame.init(func);
1003
1004 // Make XMM0 and XMM1 dirty; `kGroupVec` describes XMM|YMM|ZMM registers.
1005 frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1));
1006
1007 FuncArgsAssignment args(&func); // Create arguments assignment context.
1008 args.assignAll(dst, src_a, src_b); // Assign our registers to arguments.
1009 args.updateFrame(frame); // Reflect our args in FuncFrame.
1010 frame.finalize(); // Finalize the FuncFrame (updates it).
1011
1012 // Insert function prolog and allocate arguments to registers.
1013 cb.setCursor(prologInsertionPoint);
1014 cb.emitProlog(frame);
1015 cb.emitArgsAssignment(frame, args);
1016
1017 // Insert function epilog.
1018 cb.setCursor(epilogInsertionPoint);
1019 cb.emitEpilog(frame);
1020
1021 // Let's see how the function's prolog and epilog looks.
1022 dumpCode(cb, "Prolog & Epilog");
1023
1024 // IMPORTANT: Builder requires `finalize()` to be called to serialize the code
1025 // to the Assembler (it automatically creates one if not attached).
1026 cb.finalize();
1027
1028 SumIntsFunc fn;
1029 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
1030 if (err) return 1; // Handle a possible error case.
1031
1032 // Execute the generated function.
1033 int inA[4] = { 4, 3, 2, 1 };
1034 int inB[4] = { 1, 5, 2, 8 };
1035 int out[4];
1036 fn(out, inA, inB);
1037
1038 // Prints {5 8 4 9}
1039 printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
1040
1041 jit.release(fn); // Remove the function from the runtime.
1042 return 0;
1043 }
1044 ```
1045
1046 When the example is executed it should output the following (this one using AMD64-SystemV ABI):
1047
1048 ```
1049 Raw Function:
1050 movdqu xmm0, [rcx]
1051 movdqu xmm1, [rdx]
1052 paddd xmm0, xmm1
1053 movdqu [rax], xmm0
1054
1055 Prolog & Epilog:
1056 mov rax, rdi
1057 mov rcx, rsi
1058 movdqu xmm0, [rcx]
1059 movdqu xmm1, [rdx]
1060 paddd xmm0, xmm1
1061 movdqu [rax], xmm0
1062 ret
1063
1064 {5 8 4 9}
1065 ```
1066
1067 The number of use-cases of **x86::Builder** is not limited and highly depends on your creativity and experience. The previous example can be easily improved to collect all dirty registers inside the function programmatically and to pass them to `frame.setDirtyRegs()`:
1068
1069 ```c++
1070 #include <asmjit/asmjit.h>
1071
1072 using namespace asmjit;
1073
1074 // NOTE: This function doesn't cover all possible constructs. It ignores
1075 // instructions that write to implicit registers that are not part of the
1076 // operand list. It also counts read-only registers. Real implementation
1077 // would be a bit more complicated, but still relatively easy to implement.
1078 static void collectDirtyRegs(const BaseNode* first, const BaseNode* last, uint32_t regMask[BaseReg::kGroupVirt]) {
1079 const BaseNode* node = first;
1080 while (node) {
1081 if (node->actsAsInst()) {
1082 const InstNode* inst = node->as<InstNode>();
1083 const Operand* opArray = inst->operands();
1084
1085 for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) {
1086 const Operand& op = opArray[i];
1087 if (op.isReg()) {
1088 const x86::Reg& reg = op.as<x86::Reg>();
1089 if (reg.group() < BaseReg::kGroupVirt)
1090 regMask[reg.group()] |= 1u << reg.id();
1091 }
1092 }
1093 }
1094
1095 if (node == last) break;
1096 node = node->next();
1097 n}
1098
1099 static void setDirtyRegsOfFuncFrame(const x86::Builder& cb, FuncFrame& frame) {
1100 uint32_t regMask[BaseReg::kGroupVirt] = { 0 };
1101 collectDirtyRegs(cb.firstNode(), cb.lastNode(), regMask);
1102
1103 // X86/X64 ABIs only require to save GP/XMM registers:
1104 frame.setDirtyRegs(x86::Reg::kGroupGp , regMask[x86::Reg::kGroupGp ]);
1105 frame.setDirtyRegs(x86::Reg::kGroupVec, regMask[x86::Reg::kGroupVec]);
1106 }
1107 ```
1108
1109 ### Using x86::Assembler or x86::Builder through X86::Emitter
1110
1111 Even when **Assembler** and **Builder** provide the same interface as defined by **BaseEmitter** their platform dependent variants (**x86::Assembler** and **x86::Builder**, respective) cannot be interchanged or casted to each other by using C++'s `static_cast<>`. The main reason is the inheritance graph of these classes is different and cast-incompatible, as illustrated in the following graph:
1112
1113 ```
1114 +--------------+ +=========================+
1115 +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+
1116 | +--------------+ | +=========================+ |
1117 | (abstract) | (mixin) |
1118 | +--------------+ +~~~~~~~~~~~~~~+ | |
1119 +-->| BaseAssembler|---->|x86::Assembler|<--+ |
1120 | +--------------+ +~~~~~~~~~~~~~~+ | |
1121 | (abstract) (final) | |
1122 +===============+ | +--------------+ +~~~~~~~~~~~~~~+ | |
1123 # BaseEmitter #--+-->| BaseBuilder |--+->| x86::Builder |<--+ |
1124 +===============+ +--------------+ | +~~~~~~~~~~~~~~+ |
1125 (abstract) (abstract) | (final) |
1126 +---------------------+ |
1127 | |
1128 | +--------------+ +~~~~~~~~~~~~~~+ +=========================+ |
1129 +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+
1130 +--------------+ +~~~~~~~~~~~~~~+ +=========================+
1131 (abstract) (final) (mixin)
1132 ```
1133
1134 The graph basically shows that it's not possible to cast `x86::Assembler` to `x86::Builder` and vice versa. However, since both `x86::Assembler` and `x86::Builder` share the same interface defined by both `BaseEmitter` and `x86::EmmiterImplicitT` a class called `x86::Emitter` was introduced to make it possible to write a function that can emit to both `x86::Assembler` and `x86::Builder`. Note that `x86::Emitter` cannot be created, it's abstract and has private constructors and destructors; it was only designed to be casted to and used as an interface.
1135
1136 Each X86 emitter implements a member function called `as<x86::Emitter>()`, which casts the instance to the `x86::Emitter`, as illustrated on the next example:
1137
1138 ```c++
1139 #include <asmjit/asmjit.h>
1140
1141 using namespace asmjit;
1142
1143 static void emitSomething(x86::Emitter* e) {
1144 e->mov(x86::eax, x86::ebx);
1145 }
1146
1147 static void assemble(CodeHolder& code, bool useAsm) {
1148 if (useAsm) {
1149 x86::Assembler a(&code);
1150 emitSomething(a.as<x86::Emitter>());
1151 }
1152 else {
1153 x86::Builder cb(&code);
1154 emitSomething(cb.as<x86::Emitter>());
1155
1156 // IMPORTANT: Builder requires `finalize()` to be called to serialize the
1157 // code to the Assembler (it automatically creates one if not attached).
1158 cb.finalize();
1159 }
1160 }
1161 ```
1162
1163 The example above shows how to create a function that can emit code to either **x86::Assembler** or **x86::Builder** through **x86::Emitter**, which provides emitter-neutral functionality. **x86::Emitter**, however, doesn't provide any emitter **x86::Assembler** or **x86::Builder** specific functionality like **setCursor()**.
1164
1165
1166 Compiler Interface
1167 ------------------
1168
1169 **Compiler** is a high-level code emitter that provides virtual registers and automatically handles function calling conventions. It's still architecture dependent, but makes the code generation much easier by offering a built-in register allocator and function builder. Functions are essential; the first-step to generate some code is to define the signature of the function you want to generate (before generating the function body). Function arguments and return value(s) are handled by assigning virtual registers to them. Similarly, function calls are handled the same way.
1170
1171 **Compiler** also makes the use of passes (introduced by **Builder**) and automatically adds an architecture-dependent register allocator pass to the list of passes when attached to **CodeHolder**.
1172
1173 ### Compiler Basics
1174
1175 The first **Compiler** example shows how to generate a function that simply returns an integer value. It's an analogy to the very first example:
1176
1177 ```c++
1178 #include <asmjit/asmjit.h>
1179 #include <stdio.h>
1180
1181 using namespace asmjit;
1182
1183 // Signature of the generated function.
1184 typedef int (*Func)(void);
1185
1186 int main(int argc, char* argv[]) {
1187 JitRuntime jit; // Runtime specialized for JIT code execution.
1188 CodeHolder code; // Holds code and relocation information.
1189
1190 code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime.
1191 x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`.
1192
1193 cc.addFunc(FuncSignatureT<int>()); // Begin a function of `int fn(void)` signature.
1194
1195 x86::Gp vReg = cc.newGpd(); // Create a 32-bit general purpose register.
1196 cc.mov(vReg, 1); // Move one to our virtual register `vReg`.
1197 cc.ret(vReg); // Return `vReg` from the function.
1198
1199 cc.endFunc(); // End of the function body.
1200 cc.finalize(); // Translate and assemble the whole `cc` content.
1201 // ----> x86::Compiler is no longer needed from here and can be destroyed <----
1202
1203 Func fn;
1204 Error err = jit.add(&fn, &code); // Add the generated code to the runtime.
1205 if (err) return 1; // Handle a possible error returned by AsmJit.
1206 // ----> CodeHolder is no longer needed from here and can be destroyed <----
1207
1208 int result = fn(); // Execute the generated code.
1209 printf("%d\n", result); // Print the resulting "1".
1210
1211 jit.release(fn); // RAII, but let's make it explicit.
1212 return 0;
1213 }
1214 ```
1215
1216 The **addFunc()** and **endFunc()** methods define the body of the function. Both functions must be called per function, but the body doesn't have to be generated in sequence. An example of generating two functions will be shown later. The next example shows more complicated code that contain a loop and generates a **memcpy32()** function:
1217
1218 ```c++
1219 #include <asmjit/asmjit.h>
1220 #include <stdio.h>
1221
1222 using namespace asmjit;
1223
1224 // Signature of the generated function.
1225 typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
1226
1227 int main(int argc, char* argv[]) {
1228 JitRuntime jit; // Runtime specialized for JIT code execution.
1229 CodeHolder code; // Holds code and relocation information.
1230
1231 code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime.
1232 x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`.
1233
1234 cc.addFunc( // Begin the function of the following signature:
1235 FuncSignatureT<void, // Return value - void (no return value).
1236 uint32_t*, // 1st argument - uint32_t* (machine reg-size).
1237 const uint32_t*, // 2nd argument - uint32_t* (machine reg-size).
1238 size_t>()); // 3rd argument - size_t (machine reg-size).
1239
1240 Label L_Loop = cc.newLabel(); // Start of the loop.
1241 Label L_Exit = cc.newLabel(); // Used to exit early.
1242
1243 x86::Gp dst = cc.newIntPtr("dst"); // Create `dst` register (destination pointer).
1244 x86::Gp src = cc.newIntPtr("src"); // Create `src` register (source pointer).
1245 x86::Gp cnt = cc.newUIntPtr("cnt"); // Create `cnt` register (loop counter).
1246
1247 cc.setArg(0, dst); // Assign `dst` argument.
1248 cc.setArg(1, src); // Assign `src` argument.
1249 cc.setArg(2, cnt); // Assign `cnt` argument.
1250
1251 cc.test(cnt, cnt); // Early exit if length is zero.
1252 cc.jz(L_Exit);
1253
1254 cc.bind(L_Loop); // Bind the beginning of the loop here.
1255
1256 x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
1257 cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
1258 cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
1259
1260 cc.add(src, 4); // Increment `src`.
1261 cc.add(dst, 4); // Increment `dst`.
1262
1263 cc.dec(cnt); // Loop until `cnt` is non-zero.
1264 cc.jnz(L_Loop);
1265
1266 cc.bind(L_Exit); // Label used by early exit.
1267 cc.endFunc(); // End of the function body.
1268
1269 cc.finalize(); // Translate and assemble the whole `cc` content.
1270 // ----> x86::Compiler is no longer needed from here and can be destroyed <----
1271
1272 MemCpy32 memcpy32;
1273 Error err = jit.add(&memcpy32, &code); // Add the generated code to the runtime.
1274 if (err) return 1; // Handle a possible error returned by AsmJit.
1275 // ----> CodeHolder is no longer needed from here and can be destroyed <----
1276
1277 // Test the generated code.
1278 uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
1279 uint32_t output[6];
1280 memcpy32(output, input, 6);
1281
1282 for (uint32_t i = 0; i < 6; i++)
1283 printf("%d\n", output[i]);
1284
1285 jit.release(memcpy32); // RAII, but let's make it explicit.
1286 return 0;
1287 }
1288 ```
1289
1290 ### Recursive Functions
1291
1292 It's possible to create more functions by using the same `x86::Compiler` instance and make links between them. In such case it's important to keep the pointer to the `FuncNode` node. The first example creates a simple Fibonacci function that calls itself recursively:
1293
1294 ```c++
1295 #include <asmjit/asmjit.h>
1296 #include <stdio.h>
1297
1298 using namespace asmjit;
1299
1300 // Signature of the generated function.
1301 typedef uint32_t (*Fibonacci)(uint32_t x);
1302
1303 int main(int argc, char* argv[]) {
1304 JitRuntime jit; // Runtime specialized for JIT code execution.
1305 CodeHolder code; // Holds code and relocation information.
1306
1307 code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime.
1308 x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`.
1309
1310 FuncNode* func = cc.addFunc( // Begin of the Fibonacci function, `addFunc()`
1311 FuncSignatureT<int, int>()); // Returns a pointer to the `FuncNode` node.
1312
1313 Label L_Exit = cc.newLabel() // Exit label.
1314 x86::Gp x = cc.newU32(); // Function `x` argument.
1315 x86::Gp y = cc.newU32(); // Temporary.
1316
1317 cc.setArg(0, x);
1318
1319 cc.cmp(x, 3); // Return `x` if less than 3.
1320 cc.jb(L_Exit);
1321
1322 cc.mov(y, x); // Make copy of the original `x`.
1323 cc.dec(x); // Decrease `x`.
1324
1325 FuncCallNode* call = cc.call( // Function call:
1326 func->label(), // Function address or Label.
1327 FuncSignatureT<int, int>()); // Function signature.
1328
1329 call->setArg(0, x); // Assign `x` as the first argument and
1330 call->setRet(0, x); // assign `x` as a return value as well.
1331
1332 cc.add(x, y); // Combine the return value with `y`.
1333
1334 cc.bind(L_Exit);
1335 cc.ret(x); // Return `x`.
1336 cc.endFunc(); // End of the function body.
1337
1338 cc.finalize(); // Translate and assemble the whole `cc` content.
1339 // ----> x86::Compiler is no longer needed from here and can be destroyed <----
1340
1341 Fibonacci fib;
1342 Error err = jit.add(&fib, &code); // Add the generated code to the runtime.
1343 if (err) return 1; // Handle a possible error returned by AsmJit.
1344 // ----> CodeHolder is no longer needed from here and can be destroyed <----
1345
1346 printf("Fib(%u) -> %u\n", 8, fib(8)); // Test the generated code.
1347
1348 jit.release(fib); // RAII, but let's make it explicit.
1349 return 0;
1350 }
1351 ```
1352
1353 ### Stack Management
1354
1355 **Compiler** manages function's stack-frame, which is used by the register allocator to spill virtual registers. It also provides an interface to allocate user-defined block of the stack, which can be used as a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated, filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
1356
1357 ```c++
1358 #include <asmjit/asmjit.h>
1359 #include <stdio.h>
1360
1361 using namespace asmjit;
1362
1363 // Signature of the generated function.
1364 typedef int (*Func)(void);
1365
1366 int main(int argc, char* argv[]) {
1367 JitRuntime jit; // Runtime specialized for JIT code execution.
1368 CodeHolder code; // Holds code and relocation information.
1369
1370 code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime.
1371 x86::Compiler cc(&code); // Create and attach x86::Compiler to `code`.
1372
1373 cc.addFunc(FuncSignatureT<int>()); // Create a function that returns 'int'.
1374
1375 x86::Gp p = cc.newIntPtr("p");
1376 x86::Gp i = cc.newIntPtr("i");
1377
1378 x86::Mem stack = cc.newStack(256, 4); // Allocate 256 bytes on the stack aligned to 4 bytes.
1379 x86::Mem stackIdx(stack); // Copy of `stack` with `i` added.
1380 stackIdx.setIndex(i); // stackIdx <- stack[i].
1381 stackIdx.setSize(1); // stackIdx <- byte ptr stack[i].
1382
1383 // Load a stack address to `p`. This step is purely optional and shows
1384 // that `lea` is useful to load a memory operands address (even absolute)
1385 // to a general purpose register.
1386 cc.lea(p, stack);
1387
1388 // Clear `i` (`xor` as it's C++ keyword, hence `xor_` is used instead).
1389 cc.xor_(i, i);
1390
1391 Label L1 = cc.newLabel();
1392 Label L2 = cc.newLabel();
1393
1394 cc.bind(L1); // First loop, fill the stack.
1395 cc.mov(stackIdx, i.r8()); // stack[i] = uint8_t(i).
1396
1397 cc.inc(i); // i++;
1398 cc.cmp(i, 256); // if (i < 256)
1399 cc.jb(L1); // goto L1;
1400
1401 // Second loop, sum all bytes stored in `stack`.
1402 x86::Gp sum = cc.newI32("sum");
1403 x86::Gp val = cc.newI32("val");
1404
1405 cc.xor_(i, i);
1406 cc.xor_(sum, sum);
1407
1408 cc.bind(L2);
1409
1410 cc.movzx(val, stackIdx); // val = uint32_t(stack[i]);
1411 cc.add(sum, val); // sum += val;
1412
1413 cc.inc(i); // i++;
1414 cc.cmp(i, 256); // if (i < 256)
1415 cc.jb(L2); // goto L2;
1416
1417 cc.ret(sum); // Return the `sum` of all values.
1418 cc.endFunc(); // End of the function body.
1419
1420 cc.finalize(); // Translate and assemble the whole `cc` content.
1421 // ----> x86::Compiler is no longer needed from here and can be destroyed <----
1422
1423 Func func;
1424 Error err = jit.add(&func, &code); // Add the generated code to the runtime.
1425 if (err) return 1; // Handle a possible error returned by AsmJit.
1426 // ----> CodeHolder is no longer needed from here and can be destroyed <----
1427
1428 printf("Func() -> %d\n", func()); // Test the generated code.
1429
1430 jit.release(func); // RAII, but let's make it explicit.
1431 return 0;
1432 }
1433 ```
1434
1435 ### Constant Pool
1436
1437 **Compiler** provides two constant pools for a general purpose code generation - local and global. Local constant pool is related to a single **FuncNode** node and is generally flushed after the function body, and global constant pool is flushed at the end of the generated code by **Compiler::finalize()**.
1438
1439 ```c++
1440 #include <asmjit/asmjit.h>
1441
1442 using namespace asmjit;
1443
1444 static void exampleUseOfConstPool(x86::Compiler& cc) {
1445 cc.addFunc(FuncSignatureT<int>());
1446
1447 x86::Gp v0 = cc.newGpd("v0");
1448 x86::Gp v1 = cc.newGpd("v1");
1449
1450 x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeLocal, 200);
1451 x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeLocal, 33);
1452
1453 cc.mov(v0, c0);
1454 cc.mov(v1, c1);
1455 cc.add(v0, v1);
1456
1457 cc.ret(v0);
1458 cc.endFunc();
1459 }
1460 ```
1461
1462
1463 Advanced Features
1464 -----------------
1465
1466 ### Logging
1467
1468 The initial phase of any project that generates machine code is not always smooth. Failure cases are common especially at the beginning of the project and AsmJit provides a logging functionality to address this issue. AsmJit does already a good job with function overloading to prevent from emitting semantically incorrect instructions, but it can't prevent from emitting machine code that is semantically correct, but doesn't work when it's executed. Logging has always been an important part of AsmJit's infrastructure and looking at logs can sometimes reveal code generation issues quickly.
1469
1470 AsmJit provides API for logging and formatting:
1471 * `Logger` - A logger that you can pass to `CodeHolder` and all emitters that inherit `BaseEmitter`.
1472 * `FormatOptions` - Formatting options that can change how instructions and operands are formatted.
1473
1474 AsmJit's `Logger` serves the following purposes:
1475 * Provides a basic foundation for logging.
1476 * Abstract class leaving the implementation (destination) on users. Two backends are built-in for simplicity:
1477 * `FileLogger` implements logging into a standard `std::FILE` stream.
1478 * `StringLogger` stores the logged text in `StringBuilder` instance.
1479
1480 AsmJit's `FormatOptions` provides the following to customize the formatting of instructions and operands:
1481 * Flags:
1482 * `FormatOptions::kFlagMachineCode` - Show a machine code of each encoded instruction.
1483 * `FormatOptions::kFlagExplainConsts` - Show a text explanation of some immediate values that are used as predicates.
1484 * `FormatOptions::kFlagHexImms` - Use hexadecimal notation to output immediates.
1485 * `FormatOptions::kFlagHexOffsets` - Use hexadecimal notation to output offsets.
1486 * `FormatOptions::kFlagRegCasts` - Show casts between various register types (compiler).
1487 * `FormatOptions::kFlagPositions` - Show positions associated with nodes (compiler).
1488 * Indentation:
1489 * `FormatOptions::kIndentationCode` - Indentation of instructions and directives.
1490 * `FormatOptions::kIndentationLabel` - Indentation of labels.
1491 * `FormatOptions::kIndentationComment` - Indentation of whole-line comments.
1492
1493 **Logger** is typically attached to **CodeHolder** and all attached code emitters automatically use it:
1494
1495 ```c++
1496 #include <asmjit/asmjit.h>
1497 #include <stdio.h>
1498
1499 using namespace asmjit;
1500
1501 int main(int argc, char* argv[]) {
1502 JitRuntime jit; // Runtime specialized for JIT code execution.
1503 FileLogger logger(stdout); // Logger should always survive the CodeHolder.
1504
1505 CodeHolder code; // Holds code and relocation information.
1506 code.init(jit.codeInfo()); // Initialize to the same arch as JIT runtime.
1507 code.setLogger(&logger); // Attach the `logger` to `code` holder.
1508
1509 // ... code as usual, everything you emit will be logged to `stdout` ...
1510
1511 return 0;
1512 }
1513 ```
1514
1515 ### Error Handling
1516
1517 AsmJit uses error codes to represent and return errors. Every function where error can occur returns **Error**. Exceptions are never thrown by AsmJit even in extreme conditions like out-of-memory. Errors should never be ignored, however, checking errors after each asmjit API call would simply overcomplicate the whole code generation experience. To make life simpler AsmJit provides **ErrorHandler**, which provides **handleError()** function:
1518
1519 `virtual bool handleError(Error err, const char* message, BaseEmitter* origin) = 0;`
1520
1521 That can be overridden by AsmJit users and do the following:
1522
1523 * 1. Record the error and continue (the way how the error is user-implemented).
1524 * 2. Throw an exception. AsmJit doesn't use exceptions and is completely exception-safe, but it's perfectly legal to throw an exception from the error handler.
1525 * 3. Use plain old C's `setjmp()` and `longjmp()`. Asmjit always puts `Assembler` and `Compiler` to a consistent state before calling the `handleError()` so `longjmp()` can be used without issues to cancel the code-generation if an error occurred. This method can be used if exception handling in your project is turned off and you still want some comfort. In most cases it should be safe as AsmJit uses Zone memory and the ownership of memory it allocates always ends with the instance that allocated it. If using this approach please never jump outside the life-time of **CodeHolder** and **BaseEmitter**.
1526
1527 **ErrorHandler** can be attached to **CodeHolder** and/or **BaseEmitter** (which has a priority). The first example uses error handler that just prints the error, but lets AsmJit continue:
1528
1529 ```c++
1530 // Error handling #1:
1531 #include <asmjit/asmjit.h>
1532
1533 #include <stdio.h>
1534
1535 // Error handler that just prints the error and lets AsmJit ignore it.
1536 class SimpleErrorHandler : public asmjit::ErrorHandler {
1537 public:
1538 inline SimpleErrorHandler() : lastError(kErrorOk) {}
1539
1540 void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override {
1541 this->err = err;
1542 fprintf(stderr, "ERROR: %s\n", message);
1543 }
1544
1545 Error err;
1546 };
1547
1548 int main(int argc, char* argv[]) {
1549 using namespace asmjit;
1550
1551 JitRuntime jit;
1552 SimpleErrorHandler eh;
1553
1554 CodeHolder code;
1555 code.init(jit.codeInfo());
1556 code.setErrorHandler(&eh);
1557
1558 // Try to emit instruction that doesn't exist.
1559 x86::Assembler a(&code);
1560 a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
1561
1562 if (eh.err) {
1563 // Assembler failed!
1564 }
1565
1566 return 0;
1567 }
1568 ```
1569
1570 If error happens during instruction emitting / encoding the assembler behaves transactionally - the output buffer won't advance if encoding failed, thus either a fully encoded instruction or nothing is emitted. The error handling shown above is useful, but it's still not the best way of dealing with errors in AsmJit. The following example shows how to use exception handling to handle errors in a more C++ way:
1571
1572 ```c++
1573 // Error handling #2:
1574 #include <asmjit/asmjit.h>
1575
1576 #include <exception>
1577 #include <string>
1578 #include <stdio.h>
1579
1580 // Error handler that throws a user-defined `AsmJitException`.
1581 class AsmJitException : public std::exception {
1582 public:
1583 AsmJitException(asmjit::Error err, const char* message) noexcept
1584 : err(err),
1585 message(message) {}
1586
1587 const char* what() const noexcept override { return message.c_str(); }
1588
1589 asmjit::Error err;
1590 std::string message;
1591 };
1592
1593 class ThrowableErrorHandler : public asmjit::ErrorHandler {
1594 public:
1595 // Throw is possible, functions that use ErrorHandler are never 'noexcept'.
1596 void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override {
1597 throw AsmJitException(err, message);
1598 }
1599 };
1600
1601 int main(int argc, char* argv[]) {
1602 using namespace asmjit;
1603
1604 JitRuntime jit;
1605 ThrowableErrorHandler eh;
1606
1607 CodeHolder code;
1608 code.init(jit.codeInfo());
1609 code.setErrorHandler(&eh);
1610
1611 x86::Assembler a(&code);
1612
1613 // Try to emit instruction that doesn't exist.
1614 try {
1615 a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
1616 }
1617 catch (const AsmJitException& ex) {
1618 printf("EXCEPTION THROWN: %s\n", ex.what());
1619 }
1620
1621 return 0;
1622 }
1623 ```
1624
1625 If C++ exceptions are not what you like or your project turns off them completely there is still a way of reducing the error handling to a minimum by using a standard `setjmp/longjmp` approach. AsmJit is exception-safe and cleans up everything before calling the **ErrorHandler**, so any approach is safe. You can simply jump from the error handler without causing any side-effects or memory leaks. The following example demonstrates how it could be done:
1626
1627 ```c++
1628 // Error handling #3:
1629 #include <asmjit/asmjit.h>
1630
1631 #include <setjmp.h>
1632 #include <stdio.h>
1633
1634 class LongJmpErrorHandler : public asmjit::ErrorHandler {
1635 public:
1636 inline LongJmpErrorHandler() : err(asmjit::kErrorOk) {}
1637
1638 void handleError(asmjit::Error err, const char* message, asmjit::BaseEmitter* origin) override {
1639 this->err = err;
1640 longjmp(state, 1);
1641 }
1642
1643 jmp_buf state;
1644 asmjit::Error err;
1645 };
1646
1647 int main(int argc, char* argv[]) {
1648 using namespace asmjit;
1649
1650 JitRuntime jit;
1651 LongJmpErrorHandler eh;
1652
1653 CodeHolder code;
1654 code.init(jit.codeInfo());
1655 code.setErrorHandler(&eh);
1656
1657 x86::Assembler a(&code);
1658
1659 if (!setjmp(eh.state)) {
1660 // Try to emit instruction that doesn't exist.
1661 a.emit(x86::Inst::kIdMov, x86::xmm0, x86::xmm1);
1662 }
1663 else {
1664 Error err = eh.err;
1665 printf("ASMJIT ERROR: 0x%08X [%s]\n", err, DebugUtils::errorAsString(err));
1666 }
1667
1668 return 0;
1669 }
1670 ```
1671
1672 ### Code Injection
1673
1674 Both `Builder` and `Compiler` emitters store their nodes in a double-linked list, which makes it easy to manipulate that list during the code generation or after. Each node is always emitted next to the current `cursor` and the cursor is changed to that newly emitted node. The cursor can be explicitly retrieved and changed by `cursor()` and `setCursor()`, respectively.
1675
1676 The following example shows how to inject code at the beginning of the function by implementing an `XmmConstInjector` helper class.
1677
1678 ```c++
1679 ```
1680
1681 ### TODO
1682
1683 ...More documentation...
1684
1685
1686
1687 Other Topics
1688 ------------
1689
1690 This section provides quick answers to some recurring questions and topics.
1691
1692 ### Instruction Validation
1693
1694 AsmJit by default prefers performance when it comes to instruction encoding. The Assembler implementation would only validate operands that must be validated to select a proper encoding of the instruction. This means that by default it would accept instructions that do not really exist like `mov rax, ebx`. This is great in release mode as it makes the assembler faster, however, it's not that great for development as it allows to silently pass even when the instruction's operands are incorrect. To fix this Asmjit contains a feature called **Strict Validation**, which allows to validate each instruction before the Assembler tries to encode it. This feature can also be used without an Assembler instance through `BaseInst::validate()` API.
1695
1696 Emitter options are configured through CodeHolder:
1697
1698 ```c++
1699 CodeHolder code;
1700
1701 // Enables strict instruction validation for all emitters attached to `code`.
1702 code.addEmitterOptions(BaseEmitter::kOptionStrictValidation);
1703
1704 // Use either ErrorHandler attached to CodeHolder or Error code returned by
1705 // the Assembler.
1706 x86::Assembler a(&code);
1707 Error err = a.emit(x86::Inst::kIdMov, x86::eax, x86::al);
1708 if (err) { /* failed */ }
1709 ```
1710
1711 ### Label Offsets and Links
1712
1713 When you use a label that is not yet bound the Assembler would create a `LabelLink`, which is then added to CodeHolder's `LabelEntry`. These links are also created for labels that are bound but reference some location in a different section. Firstly, here are some functions that can be used to check some basics:
1714
1715 ```c++
1716 CodeHolder code = ...;
1717 Label L = ...;
1718
1719 // Returns whether the Label `L` is bound.
1720 bool bound = code.isLabelBound(L or L.id());
1721
1722 // Returns true if the code contains either referenced, but unbound labels,
1723 // or cross-section label links that are not resolved yet.
1724 bool value = code.hasUnresolvedLinks(); // Boolean answer.
1725 size_t count = code.unresolvedLinkCount(); // Count of links.
1726 ```
1727
1728 Please note that there is not API to return a count of unbound labels as this is completely unimportant from CodeHolder's perspective. If a label is not used then it doesn't matter whether it's bound or not, only used labels matter. After a Label is bound you can query it's offset relative to the start of the section where it was bound:
1729
1730 ```c++
1731 CodeHolder code = ...;
1732 Label L = ...;
1733
1734 // After you are done you can check the offset. The offset provided
1735 // is relative to the start of the section, see below for alternative.
1736 // If the given label is not bound then the offset returned will be zero.
1737 uint64_t offset = code.labelOffset(L or L.id());
1738
1739 // If you use multiple sections and want the offset relative to the base.
1740 // NOTE: This function expects that the section has already an offset and
1741 // the label-link was resolved (if this is not true you will still get an
1742 // offset relative to the start of the section).
1743 uint64_t offset = code.labelOffsetFromBase(L or L.id());
1744 ```
1745
1746 ### Sections
1747
1748 Sections is a relatively new feature that allows to create multiple sections. It's supported by Assembler, Builder, and Compiler. Please note that using multiple sections is advanced and requires more understanding about how AsmJit works. There is a test-case [asmjit_test_x86_sections.cpp](./test/asmjit_test_x86_sections.cpp) that shows how sections can be used.
1749
1750 ```c++
1751 CodeHolder code = ...;
1752
1753 // Text section is always provided as the first section.
1754 Section* text = code.textSection(); // or code.sectionById(0);
1755
1756 // To create another section use `code.newSection()`.
1757 Section* data;
1758 Error err = code.newSection(&data,
1759 ".data", // Section name
1760 SIZE_MAX, // Name length if the name is not null terminated (or SIZE_MAX).
1761 0, // Section flags, see Section::Flags.
1762 8); // Section alignment, must be power of 2.
1763
1764 // When you switch sections in Assembler, Builder, or Compiler the cursor
1765 // will always move to the end of that section. When you create an Assembler
1766 // the cursor would be placed at the end of the first (.text) section, which
1767 // is initially empty.
1768 x86::Assembler a(&code);
1769 Label L_Data = a.newLabel();
1770
1771 a.mov(x86::eax, x86::ebx); // Emits in .text section.
1772
1773 a.section(data); // Switches to the end of .data section.
1774 a.bind(L_Data); // Binds label in this .data section
1775 a.db(0x01); // Emits byte in .data section.
1776
1777 a.section(text); // Switches to the end of .text section.
1778 a.add(x86::ebx, x86::eax); // Emits in .text section.
1779
1780 // References a label bound in .data section in .text section. This
1781 // would create a LabelLink even when the L_Data is already bound,
1782 // because the reference crosses sections. See below...
1783 a.lea(x86::rsi, x86::ptr(L_Data));
1784 ```
1785
1786 The last line in the example above shows that a LabelLink would be created even for bound labels that cross sections. In this case a referenced label was bound in another section, which means that the link couldn't be resolved at that moment. If your code uses sections, but you wish AsmJit to flatten these sections (you don't plan to flatten them manually) then there is an API for that.
1787
1788 ```c++
1789 // ... (continuing the previous example) ...
1790 CodeHolder code = ...;
1791
1792 // Suppose we have some code that contains multiple sections and
1793 // we would like to flatten them by using AsmJit's built-in API:
1794 Error err = code.flatten();
1795 if (err) { /* Error handling is necessary. */ }
1796
1797 // After flattening all sections would contain assigned offsets
1798 // relative to base. Offsets are 64-bit unsigned integers so we
1799 // cast them to `size_t` for simplicity. On 32-bit targets it's
1800 // guaranteed that the offset cannot be greater than `2^32 - 1`.
1801 printf("Data section offset %zu", size_t(data->offset()));
1802
1803 // The flattening doesn't resolve unresolved label links, this
1804 // has to be done manually as flattening can be done separately.
1805 err = code.resolveUnresolvedLinks();
1806 if (err) { /* Error handling is necessary. */ }
1807
1808 if (code.hasUnresolvedLinks()) {
1809 // This would mean either unbound label or some other issue.
1810 printf("FAILED: UnresoledLinkCount=%zu\n", code.unresovedLinkCount());
1811 }
1812 ```
1813
1814 ### Using AsmJit Data Structures
1815
1816 AsmJit stores its data in data structures allocated by `ZoneAllocator`. It's a fast allocator that allows AsmJit to allocate a lot of small data structures fast and without `malloc()` overhead. The most common data structure that you will probably inspect is `ZoneVector<T>`. It's like C++'s `std::vector`. but the implementation doesn't use exceptions and uses the mentioned `ZoneAllocator` for performance reasons. You don't have to worry about allocations as you should not need to add items to data structures that are managed by `CodeHolder` or advanced emitters like Builder/Compiler.
1817
1818 APIs that return `ZoneVector`:
1819
1820 ```c++
1821 CodeHolder code = ...;
1822
1823 // Contains all emitters attached to CodeHolder.
1824 const ZoneVector<BaseEmitter*>& emitters = code.emitters();
1825
1826 // Contains all sections managed by CodeHolder.
1827 const ZoneVector<Section*>& sections = code.sections();
1828
1829 // Contains all LabelEntry records associated with created Labels.
1830 const ZoneVector<LabelEntry*>& labelEntries = code.labelEntries();
1831
1832 // Contains all RelocEntry records that describe relocations.
1833 const ZoneVector<RelocEntry*>& relocEntries = code.relocEntries();
1834 ```
1835
1836 AsmJit's `ZoneVector<T>` has overloaded array access operator to make it possible accessing its elements through operator[]. Some standard functions like `empty()`, `size()`, and `data()` are provided as well. Vectors are also iterable through range-based for loop:
1837
1838 ```c++
1839 CodeHolder code = ...;
1840
1841 for (LabelEntry* le : code.labelEntries()) {
1842 printf("Label #%u {Bound=%s Offset=%llu}",
1843 le->id(),
1844 le->isBound() ? "true" : "false",
1845 (unsigned long long)le->offset());
1846 }
1847 ```
1848
1849
1850 Support
1851 -------
1852
1853 AsmJit is an open-source library released under a permissive ZLIB license, which makes it possible to use it freely in any open-source or commercial product. Free support is available through issues and gitter channel, which is very active. Commercial support is currently individual and can be negotiated on demand. It includes consultation, priority bug fixing, review of code that uses AsmJit, porting code to the latest AsmJit, and implementation of new AsmJit features.
1854
1855 If you use AsmJit in a non-commercial project and would like to appreciate the library in the form of a donation you are welcome to support us. Donations are anonymous unless the donor lets us know otherwise. The order and format of listed donors is not guaranteed and may change in the future. Additionally, donations should be considered as an appreciation of past work and not used to gain special privileges in terms of future development. AsmJit authors reserve the right to remove a donor from the list in extreme cases of disruptive behavior against other community members. Diversity of opinions and constructive criticism will always be welcome in the AsmJit community.
1856
1857 Donation Addresses:
1858
1859 * BTC: 14dEp5h8jYSxgXB9vcjE8eh78uweD76o7W
1860 * ETH: 0xd4f0b9424cF31DF5a5359D029CF3A65c500a581E
1861 * Please contact us if you would like to donate through a different channel or to use a different crypto-currency. Wire transfers and SEPA payments are both possible.
1862
1863 Donors:
1864
1865 * [ZehMatt](https://github.com/ZehMatt)
1866
1867
1868
1869 Authors & Maintainers
1870 ---------------------
1871
1872 * Petr Kobalicek <kobalicek.petr@gmail.com>
0 <?xml version="1.0" encoding="utf-8"?>
1
2 <!-- asmjit visualizer for Visual Studio (natvis) -->
3
4 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
5 <Type Name="asmjit::String">
6 <Intrinsic Name="isSmall" Expression="(_type &lt; 0x1F)"/>
7 <DisplayString Condition="isSmall()">{_small.data, s8}</DisplayString>
8 <DisplayString Condition="!isSmall()">{_large.data, s8}</DisplayString>
9 <Expand HideRawView="true">
10 <Synthetic Name="_type">
11 <DisplayString Condition="(_type &lt; 0x1F)">Small</DisplayString>
12 <DisplayString Condition="(_type == 0x1F)">Large</DisplayString>
13 <DisplayString Condition="(_type &gt; 0x1F)">External</DisplayString>
14 </Synthetic>
15 <Item Name="_size" Condition="isSmall()" ExcludeView="simple">(int)_small.type, d</Item>
16 <Item Name="_size" Condition="!isSmall()" ExcludeView="simple">_large.size, d</Item>
17 <Item Name="_capacity" Condition="isSmall()" ExcludeView="simple">asmjit::String::kSSOCapacity, d</Item>
18 <Item Name="_capacity" Condition="!isSmall()" ExcludeView="simple">_large.capacity, d</Item>
19 <Item Name="_data" Condition="isSmall()" ExcludeView="simple">_small.data, s8</Item>
20 <Item Name="_data" Condition="!isSmall()" ExcludeView="simple">_large.data, s8</Item>
21 </Expand>
22 </Type>
23
24 <Type Name="asmjit::ZoneVector&lt;*&gt;">
25 <DisplayString>{{ [size={_size, d} capacity={_capacity, d}] }}</DisplayString>
26 <Expand>
27 <Item Name="_size" ExcludeView="simple">_size, d</Item>
28 <Item Name="_capacity" ExcludeView="simple">_capacity, d</Item>
29 <ArrayItems>
30 <Size>_size</Size>
31 <ValuePointer>(($T1*)_data)</ValuePointer>
32 </ArrayItems>
33 </Expand>
34 </Type>
35
36 <Type Name="asmjit::Operand_">
37 <Intrinsic Name="opType" Expression="(unsigned int)(_any.signature &amp; 0x7)"/>
38 <DisplayString Condition="opType() == 0">[None]</DisplayString>
39 <DisplayString Condition="opType() == 1">[Reg] {{ id={_reg.id, d} group={(_reg.signature &gt;&gt; 8) &amp; 0xF, d} type={(_reg.signature &gt;&gt; 4) &amp; 0x1F, d} size={(_reg.signature &gt;&gt; 24) &amp; 0xFF, d} }}</DisplayString>
40 <DisplayString Condition="opType() == 2">[Mem] {{ }}</DisplayString>
41 <DisplayString Condition="opType() == 3">[Imm] {{ val={_imm.value.i64, d} hex={_imm.value.u64, X} f64={_imm.value.f64} }}</DisplayString>
42 <DisplayString Condition="opType() == 4">[Label] {{ id={_label.id} }}</DisplayString>
43 <DisplayString Condition="opType() &gt; 4">[Unknown]</DisplayString>
44 <Expand HideRawView="true">
45 <ExpandedItem Condition="opType() == 0">_any</ExpandedItem>
46 <ExpandedItem Condition="opType() == 1">_reg</ExpandedItem>
47 <ExpandedItem Condition="opType() == 2">_mem</ExpandedItem>
48 <ExpandedItem Condition="opType() == 3">_imm</ExpandedItem>
49 <ExpandedItem Condition="opType() == 4">_label</ExpandedItem>
50 <ExpandedItem Condition="opType() &gt; 4">_any</ExpandedItem>
51 </Expand>
52 </Type>
53 </AutoVisualizer>
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_ASMJIT_H
7 #define _ASMJIT_ASMJIT_H
8
9 //! \mainpage API Reference
10 //!
11 //! AsmJit C++ API reference documentation generated by Doxygen.
12 //!
13 //! Introduction provided by the project page at https://github.com/asmjit/asmjit.
14 //!
15 //! \section main_groups Groups
16 //!
17 //! The documentation is split into the following groups:
18 //!
19 //! $$DOCS_GROUP_OVERVIEW$$
20 //!
21 //! \section main_other Other Pages
22 //!
23 //! - <a href="annotated.html">Class List</a> - List of classes sorted alphabetically
24 //! - <a href="namespaceasmjit.html">AsmJit Namespace</a> - List of symbols provided by `asmjit` namespace
25
26 //! \namespace asmjit
27 //!
28 //! Root namespace used by AsmJit.
29
30 #include "./core.h"
31
32 #ifdef ASMJIT_BUILD_X86
33 #include "./x86.h"
34 #endif
35
36 #ifdef ASMJIT_BUILD_ARM
37 #include "./arm.h"
38 #endif
39
40 #endif // _ASMJIT_ASMJIT_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_API_BUILD_P_H
7 #define _ASMJIT_CORE_API_BUILD_P_H
8
9 #define ASMJIT_EXPORTS
10
11 // Dependencies only required for asmjit build, but never exposed through public headers.
12 #ifdef _WIN32
13 #ifndef WIN32_LEAN_AND_MEAN
14 #define WIN32_LEAN_AND_MEAN
15 #endif
16 #ifndef NOMINMAX
17 #define NOMINMAX
18 #endif
19 #include <windows.h>
20 #endif
21
22 #include "./api-config.h"
23
24 #endif // _ASMJIT_CORE_API_BUILD_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_API_CONFIG_H
7 #define _ASMJIT_CORE_API_CONFIG_H
8
9 // ============================================================================
10 // [asmjit::Version]
11 // ============================================================================
12
13 #define ASMJIT_LIBRARY_VERSION 0x010200 /* 1.2.0 */
14
15 // ============================================================================
16 // [asmjit::Options]
17 // ============================================================================
18
19 // AsmJit Static Builds and Embedding
20 // ----------------------------------
21 //
22 // These definitions can be used to enable static library build. Embed is used
23 // when AsmJit's source code is embedded directly in another project, implies
24 // static build as well.
25 //
26 // #define ASMJIT_EMBED // Asmjit is embedded (implies ASMJIT_BUILD_STATIC).
27 // #define ASMJIT_STATIC // Enable static-library build.
28
29 // AsmJit Build Mode
30 // -----------------
31 //
32 // These definitions control the build mode and tracing support. The build mode
33 // should be auto-detected at compile time, but it's possible to override it in
34 // case that the auto-detection fails.
35 //
36 // Tracing is a feature that is never compiled by default and it's only used to
37 // debug AsmJit itself.
38 //
39 // #define ASMJIT_BUILD_DEBUG // Always use debug-mode (ASMJIT_ASSERT enabled).
40 // #define ASMJIT_BUILD_RELEASE // Always use release-mode (ASMJIT_ASSERT disabled).
41
42 // AsmJit Build Backends
43 // ---------------------
44 //
45 // These definitions control which backends to compile. If none of these is
46 // defined AsmJit will use host architecture by default (for JIT code generation).
47 //
48 // #define ASMJIT_BUILD_X86 // Enable X86 targets (X86 and X86_64).
49 // #define ASMJIT_BUILD_ARM // Enable ARM targets (ARM and AArch64).
50 // #define ASMJIT_BUILD_HOST // Enable targets based on target arch (default).
51
52 // AsmJit Build Options
53 // --------------------
54 //
55 // Flags can be defined to disable standard features. These are handy especially
56 // when building AsmJit statically and some features are not needed or unwanted
57 // (like BaseCompiler).
58 //
59 // AsmJit features are enabled by default.
60 // #define ASMJIT_NO_BUILDER // Disable Builder (completely).
61 // #define ASMJIT_NO_COMPILER // Disable Compiler (completely).
62 // #define ASMJIT_NO_JIT // Disable JIT memory manager and JitRuntime.
63 // #define ASMJIT_NO_LOGGING // Disable logging and formatting (completely).
64 // #define ASMJIT_NO_TEXT // Disable everything that contains text
65 // // representation (instructions, errors, ...).
66 // #define ASMJIT_NO_VALIDATION // Disable validation API and options.
67 // #define ASMJIT_NO_INTROSPECTION // Disable API related to instruction database
68 // // (validation, cpu features, rw-info, etc).
69
70 // Prevent compile-time errors caused by misconfiguration.
71 #if defined(ASMJIT_NO_TEXT) && !defined(ASMJIT_NO_LOGGING)
72 #pragma "ASMJIT_NO_TEXT can only be defined when ASMJIT_NO_LOGGING is defined."
73 #undef ASMJIT_NO_TEXT
74 #endif
75
76 #if defined(ASMJIT_NO_INTROSPECTION) && !defined(ASMJIT_NO_COMPILER)
77 #pragma message("ASMJIT_NO_INTROSPECTION can only be defined when ASMJIT_NO_COMPILER is defined")
78 #undef ASMJIT_NO_INTROSPECTION
79 #endif
80
81 // ============================================================================
82 // [asmjit::Dependencies]
83 // ============================================================================
84
85 // We really want std-types as globals.
86 #include <stdarg.h>
87 #include <stddef.h>
88 #include <stdint.h>
89 #include <stdio.h>
90 #include <stdlib.h>
91 #include <string.h>
92
93 #include <new>
94 #include <limits>
95 #include <type_traits>
96 #include <utility>
97
98 #if !defined(_WIN32) && !defined(__EMSCRIPTEN__)
99 #include <pthread.h>
100 #endif
101
102 // ============================================================================
103 // [asmjit::Build - Globals - Deprecated]
104 // ============================================================================
105
106 // DEPRECATED: Will be removed in the future.
107 #if defined(ASMJIT_BUILD_EMBED) || defined(ASMJIT_BUILD_STATIC)
108 #if defined(ASMJIT_BUILD_EMBED)
109 #pragma message("'ASMJIT_BUILD_EMBED' is deprecated, use 'ASMJIT_STATIC'")
110 #endif
111 #if defined(ASMJIT_BUILD_STATIC)
112 #pragma message("'ASMJIT_BUILD_STATIC' is deprecated, use 'ASMJIT_STATIC'")
113 #endif
114
115 #if !defined(ASMJIT_STATIC)
116 #define ASMJIT_STATIC
117 #endif
118 #endif
119
120 // ============================================================================
121 // [asmjit::Build - Globals - Build Mode]
122 // ============================================================================
123
124 // Detect ASMJIT_BUILD_DEBUG and ASMJIT_BUILD_RELEASE if not defined.
125 #if !defined(ASMJIT_BUILD_DEBUG) && !defined(ASMJIT_BUILD_RELEASE)
126 #if !defined(NDEBUG)
127 #define ASMJIT_BUILD_DEBUG
128 #else
129 #define ASMJIT_BUILD_RELEASE
130 #endif
131 #endif
132
133 // ============================================================================
134 // [asmjit::Build - Globals - Target Architecture]
135 // ============================================================================
136
137 #if defined(_M_X64) || defined(__x86_64__)
138 #define ASMJIT_ARCH_X86 64
139 #elif defined(_M_IX86) || defined(__X86__) || defined(__i386__)
140 #define ASMJIT_ARCH_X86 32
141 #else
142 #define ASMJIT_ARCH_X86 0
143 #endif
144
145 #if defined(__arm64__) || defined(__aarch64__)
146 # define ASMJIT_ARCH_ARM 64
147 #elif defined(_M_ARM) || defined(_M_ARMT) || defined(__arm__) || defined(__thumb__) || defined(__thumb2__)
148 #define ASMJIT_ARCH_ARM 32
149 #else
150 #define ASMJIT_ARCH_ARM 0
151 #endif
152
153 #if defined(_MIPS_ARCH_MIPS64) || defined(__mips64)
154 #define ASMJIT_ARCH_MIPS 64
155 #elif defined(_MIPS_ARCH_MIPS32) || defined(_M_MRX000) || defined(__mips__)
156 #define ASMJIT_ARCH_MIPS 32
157 #else
158 #define ASMJIT_ARCH_MIPS 0
159 #endif
160
161 #define ASMJIT_ARCH_BITS (ASMJIT_ARCH_X86 | ASMJIT_ARCH_ARM | ASMJIT_ARCH_MIPS)
162 #if ASMJIT_ARCH_BITS == 0
163 #undef ASMJIT_ARCH_BITS
164 #if defined (__LP64__) || defined(_LP64)
165 #define ASMJIT_ARCH_BITS 64
166 #else
167 #define ASMJIT_ARCH_BITS 32
168 #endif
169 #endif
170
171 #if (defined(__ARMEB__)) || \
172 (defined(__MIPSEB__)) || \
173 (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
174 #define ASMJIT_ARCH_LE 0
175 #define ASMJIT_ARCH_BE 1
176 #else
177 #define ASMJIT_ARCH_LE 1
178 #define ASMJIT_ARCH_BE 0
179 #endif
180
181 // Build host architecture if no architecture is selected.
182 #if !defined(ASMJIT_BUILD_HOST) && \
183 !defined(ASMJIT_BUILD_X86) && \
184 !defined(ASMJIT_BUILD_ARM)
185 #define ASMJIT_BUILD_HOST
186 #endif
187
188 // Detect host architecture if building only for host.
189 #if ASMJIT_ARCH_X86 && defined(ASMJIT_BUILD_HOST) && !defined(ASMJIT_BUILD_X86)
190 #define ASMJIT_BUILD_X86
191 #endif
192
193 #if ASMJIT_ARCH_ARM && defined(ASMJIT_BUILD_HOST) && !defined(ASMJIT_BUILD_ARM)
194 #define ASMJIT_BUILD_ARM
195 #endif
196
197 // ============================================================================
198 // [asmjit::Build - Globals - C++ Compiler and Features Detection]
199 // ============================================================================
200
201 #define ASMJIT_CXX_CLANG 0
202 #define ASMJIT_CXX_GNU 0
203 #define ASMJIT_CXX_INTEL 0
204 #define ASMJIT_CXX_MSC 0
205 #define ASMJIT_CXX_MAKE_VER(MAJOR, MINOR, PATCH) ((MAJOR) * 10000000 + (MINOR) * 100000 + (PATCH))
206
207 // Intel Compiler [pretends to be GNU or MSC, so it must be checked first]:
208 // - https://software.intel.com/en-us/articles/c0x-features-supported-by-intel-c-compiler
209 // - https://software.intel.com/en-us/articles/c14-features-supported-by-intel-c-compiler
210 // - https://software.intel.com/en-us/articles/c17-features-supported-by-intel-c-compiler
211 #if defined(__INTEL_COMPILER)
212
213 #undef ASMJIT_CXX_INTEL
214 #define ASMJIT_CXX_INTEL ASMJIT_CXX_MAKE_VER(__INTEL_COMPILER / 100, (__INTEL_COMPILER / 10) % 10, __INTEL_COMPILER % 10)
215
216 // MSC Compiler:
217 // - https://msdn.microsoft.com/en-us/library/hh567368.aspx
218 //
219 // Version List:
220 // - 16.00.0 == VS2010
221 // - 17.00.0 == VS2012
222 // - 18.00.0 == VS2013
223 // - 19.00.0 == VS2015
224 // - 19.10.0 == VS2017
225 #elif defined(_MSC_VER) && defined(_MSC_FULL_VER)
226
227 #undef ASMJIT_CXX_MSC
228 #if _MSC_VER == _MSC_FULL_VER / 10000
229 #define ASMJIT_CXX_MSC ASMJIT_CXX_MAKE_VER(_MSC_VER / 100, _MSC_VER % 100, _MSC_FULL_VER % 10000)
230 #else
231 #define ASMJIT_CXX_MSC ASMJIT_CXX_MAKE_VER(_MSC_VER / 100, (_MSC_FULL_VER / 100000) % 100, _MSC_FULL_VER % 100000)
232 #endif
233
234 // Clang Compiler [Pretends to be GNU, so it must be checked before]:
235 // - https://clang.llvm.org/cxx_status.html
236 #elif defined(__clang_major__) && defined(__clang_minor__) && defined(__clang_patchlevel__)
237
238 #undef ASMJIT_CXX_CLANG
239 #define ASMJIT_CXX_CLANG ASMJIT_CXX_MAKE_VER(__clang_major__, __clang_minor__, __clang_patchlevel__)
240
241 // GNU Compiler:
242 // - https://gcc.gnu.org/projects/cxx-status.html
243 #elif defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
244
245 #undef ASMJIT_CXX_GNU
246 #define ASMJIT_CXX_GNU ASMJIT_CXX_MAKE_VER(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
247
248 #endif
249
250 // Compiler features detection macros.
251 #if ASMJIT_CXX_CLANG && defined(__has_builtin)
252 #define ASMJIT_CXX_HAS_BUILTIN(NAME, CHECK) (__has_builtin(NAME))
253 #else
254 #define ASMJIT_CXX_HAS_BUILTIN(NAME, CHECK) (!(!(CHECK)))
255 #endif
256
257 #if ASMJIT_CXX_CLANG && defined(__has_extension)
258 #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (__has_extension(NAME))
259 #elif ASMJIT_CXX_CLANG && defined(__has_feature)
260 #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (__has_feature(NAME))
261 #else
262 #define ASMJIT_CXX_HAS_FEATURE(NAME, CHECK) (!(!(CHECK)))
263 #endif
264
265 #if ASMJIT_CXX_CLANG && defined(__has_attribute)
266 #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (__has_attribute(NAME))
267 #else
268 #define ASMJIT_CXX_HAS_ATTRIBUTE(NAME, CHECK) (!(!(CHECK)))
269 #endif
270
271 #if ASMJIT_CXX_CLANG && defined(__has_cpp_attribute)
272 #define ASMJIT_CXX_HAS_CPP_ATTRIBUTE(NAME, CHECK) (__has_cpp_attribute(NAME))
273 #else
274 #define ASMJIT_CXX_HAS_CPP_ATTRIBUTE(NAME, CHECK) (!(!(CHECK)))
275 #endif
276
277 // Compiler features by vendor.
278 #if defined(_MSC_VER) && !defined(_NATIVE_WCHAR_T_DEFINED)
279 #define ASMJIT_CXX_HAS_NATIVE_WCHAR_T 0
280 #else
281 #define ASMJIT_CXX_HAS_NATIVE_WCHAR_T 1
282 #endif
283
284 #if ASMJIT_CXX_HAS_FEATURE(cxx_unicode_literals, ( \
285 (ASMJIT_CXX_INTEL >= ASMJIT_CXX_MAKE_VER(14, 0, 0)) || \
286 (ASMJIT_CXX_MSC >= ASMJIT_CXX_MAKE_VER(19, 0, 0)) || \
287 (ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4 , 5, 0) && __cplusplus >= 201103L) ))
288 #define ASMJIT_CXX_HAS_UNICODE_LITERALS 1
289 #else
290 #define ASMJIT_CXX_HAS_UNICODE_LITERALS 0
291 #endif
292
293 // ============================================================================
294 // [asmjit::Build - Globals - API Decorators & Language Extensions]
295 // ============================================================================
296
297 // API (Export / Import).
298 #if !defined(ASMJIT_STATIC)
299 #if defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__))
300 #ifdef ASMJIT_EXPORTS
301 #define ASMJIT_API __declspec(dllexport)
302 #else
303 #define ASMJIT_API __declspec(dllimport)
304 #endif
305 #elif defined(_WIN32) && defined(__GNUC__)
306 #ifdef ASMJIT_EXPORTS
307 #define ASMJIT_API __attribute__((__dllexport__))
308 #else
309 #define ASMJIT_API __attribute__((__dllimport__))
310 #endif
311 #elif defined(__GNUC__)
312 #define ASMJIT_API __attribute__((__visibility__("default")))
313 #endif
314 #endif
315
316 #if !defined(ASMJIT_API)
317 #define ASMJIT_API
318 #endif
319
320 #if !defined(ASMJIT_VARAPI)
321 #define ASMJIT_VARAPI extern ASMJIT_API
322 #endif
323
324 // This is basically a workaround. When using MSVC and marking class as DLL
325 // export everything gets exported, which is unwanted in most projects. MSVC
326 // automatically exports typeinfo and vtable if at least one symbol of the
327 // class is exported. However, GCC has some strange behavior that even if
328 // one or more symbol is exported it doesn't export typeinfo unless the
329 // class itself is decorated with "visibility(default)" (i.e. ASMJIT_API).
330 #if !defined(_WIN32) && defined(__GNUC__)
331 #define ASMJIT_VIRTAPI ASMJIT_API
332 #else
333 #define ASMJIT_VIRTAPI
334 #endif
335
336 // Function attributes.
337 #if !defined(ASMJIT_BUILD_DEBUG) && defined(__GNUC__)
338 #define ASMJIT_INLINE inline __attribute__((__always_inline__))
339 #elif !defined(ASMJIT_BUILD_DEBUG) && defined(_MSC_VER)
340 #define ASMJIT_INLINE __forceinline
341 #else
342 #define ASMJIT_INLINE inline
343 #endif
344
345 #if defined(__GNUC__)
346 #define ASMJIT_NOINLINE __attribute__((__noinline__))
347 #define ASMJIT_NORETURN __attribute__((__noreturn__))
348 #elif defined(_MSC_VER)
349 #define ASMJIT_NOINLINE __declspec(noinline)
350 #define ASMJIT_NORETURN __declspec(noreturn)
351 #else
352 #define ASMJIT_NOINLINE
353 #define ASMJIT_NORETURN
354 #endif
355
356 // Calling conventions.
357 #if ASMJIT_ARCH_X86 == 32 && defined(__GNUC__)
358 #define ASMJIT_CDECL __attribute__((__cdecl__))
359 #define ASMJIT_STDCALL __attribute__((__stdcall__))
360 #define ASMJIT_FASTCALL __attribute__((__fastcall__))
361 #define ASMJIT_REGPARM(N) __attribute__((__regparm__(N)))
362 #elif ASMJIT_ARCH_X86 == 32 && defined(_MSC_VER)
363 #define ASMJIT_CDECL __cdecl
364 #define ASMJIT_STDCALL __stdcall
365 #define ASMJIT_FASTCALL __fastcall
366 #define ASMJIT_REGPARM(N)
367 #else
368 #define ASMJIT_CDECL
369 #define ASMJIT_STDCALL
370 #define ASMJIT_FASTCALL
371 #define ASMJIT_REGPARM(N)
372 #endif
373
374 // Type alignment (not allowed by C++11 'alignas' keyword).
375 #if defined(__GNUC__)
376 #define ASMJIT_ALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE
377 #elif defined(_MSC_VER)
378 #define ASMJIT_ALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE
379 #else
380 #define ASMJIT_ALIGN_TYPE(TYPE, N) TYPE
381 #endif
382
383 #if defined(__GNUC__)
384 #define ASMJIT_MAY_ALIAS __attribute__((__may_alias__))
385 #else
386 #define ASMJIT_MAY_ALIAS
387 #endif
388
389 // Annotations.
390 #if defined(__GNUC__)
391 #define ASMJIT_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), 1)
392 #define ASMJIT_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), 0)
393 #else
394 #define ASMJIT_LIKELY(...) (__VA_ARGS__)
395 #define ASMJIT_UNLIKELY(...) (__VA_ARGS__)
396 #endif
397
398 #if defined(__clang__) && __cplusplus >= 201103L
399 #define ASMJIT_FALLTHROUGH [[clang::fallthrough]]
400 #elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(7, 0, 0)
401 #define ASMJIT_FALLTHROUGH __attribute__((__fallthrough__))
402 #else
403 #define ASMJIT_FALLTHROUGH ((void)0) /* fallthrough */
404 #endif
405
406 #define ASMJIT_UNUSED(X) (void)(X)
407
408 // Utilities.
409 #define ASMJIT_OFFSET_OF(STRUCT, MEMBER) ((int)(intptr_t)((const char*)&((const STRUCT*)0x100)->MEMBER) - 0x100)
410 #define ASMJIT_ARRAY_SIZE(X) uint32_t(sizeof(X) / sizeof(X[0]))
411
412 #if ASMJIT_CXX_HAS_ATTRIBUTE(attribute_deprecated_with_message, ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 5, 0))
413 #define ASMJIT_DEPRECATED(DECL, MESSAGE) DECL __attribute__((__deprecated__(MESSAGE)))
414 #elif ASMJIT_MSC
415 #define ASMJIT_DEPRECATED(DECL, MESSAGE) __declspec(deprecated(MESSAGE)) DECL
416 #else
417 #define ASMJIT_DEPRECATED(DECL, MESSAGE) DECL
418 #endif
419
420 #if ASMJIT_CXX_HAS_ATTRIBUTE(no_sanitize, 0)
421 #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize__("undefined")))
422 #elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 9, 0)
423 #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF __attribute__((__no_sanitize_undefined__))
424 #else
425 #define ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF
426 #endif
427
428 // ============================================================================
429 // [asmjit::Build - Globals - Begin-Namespace / End-Namespace]
430 // ============================================================================
431
432 #if defined(__clang__)
433 #define ASMJIT_BEGIN_NAMESPACE \
434 namespace asmjit { \
435 _Pragma("clang diagnostic push") \
436 _Pragma("clang diagnostic ignored \"-Wconstant-logical-operand\"") \
437 _Pragma("clang diagnostic ignored \"-Wunnamed-type-template-args\"")
438 #define ASMJIT_END_NAMESPACE \
439 _Pragma("clang diagnostic pop") \
440 }
441 #elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 0, 0) && \
442 ASMJIT_CXX_GNU < ASMJIT_CXX_MAKE_VER(5, 0, 0)
443 #define ASMJIT_BEGIN_NAMESPACE \
444 namespace asmjit { \
445 _Pragma("GCC diagnostic push") \
446 _Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
447 #define ASMJIT_END_NAMESPACE \
448 _Pragma("GCC diagnostic pop") \
449 }
450 #elif ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(8, 0, 0)
451 #define ASMJIT_BEGIN_NAMESPACE \
452 namespace asmjit { \
453 _Pragma("GCC diagnostic push") \
454 _Pragma("GCC diagnostic ignored \"-Wclass-memaccess\"")
455 #define ASMJIT_END_NAMESPACE \
456 _Pragma("GCC diagnostic pop") \
457 }
458 #elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
459 #define ASMJIT_BEGIN_NAMESPACE \
460 namespace asmjit { \
461 __pragma(warning(push)) \
462 __pragma(warning(disable: 4127)) /* conditional expression is constant*/\
463 __pragma(warning(disable: 4201)) /* nameless struct/union */
464 #define ASMJIT_END_NAMESPACE \
465 __pragma(warning(pop)) \
466 }
467 #endif
468
469 #if !defined(ASMJIT_BEGIN_NAMESPACE) && !defined(ASMJIT_END_NAMESPACE)
470 #define ASMJIT_BEGIN_NAMESPACE namespace asmjit {
471 #define ASMJIT_END_NAMESPACE }
472 #endif
473
474 #define ASMJIT_BEGIN_SUB_NAMESPACE(NAMESPACE) \
475 ASMJIT_BEGIN_NAMESPACE \
476 namespace NAMESPACE {
477
478 #define ASMJIT_END_SUB_NAMESPACE \
479 } \
480 ASMJIT_END_NAMESPACE
481
482 // ============================================================================
483 // [asmjit::Build - Globals - Utilities]
484 // ============================================================================
485
486 #define ASMJIT_NONCOPYABLE(...) \
487 private: \
488 __VA_ARGS__(const __VA_ARGS__& other) = delete; \
489 __VA_ARGS__& operator=(const __VA_ARGS__& other) = delete; \
490 public:
491
492 #define ASMJIT_NONCONSTRUCTIBLE(...) \
493 private: \
494 __VA_ARGS__() = delete; \
495 __VA_ARGS__(const __VA_ARGS__& other) = delete; \
496 __VA_ARGS__& operator=(const __VA_ARGS__& other) = delete; \
497 public:
498
499 // ============================================================================
500 // [asmjit::Build - Globals - Build-Only]
501 // ============================================================================
502
503 // Internal macros that are only used when building AsmJit itself.
504 #ifdef ASMJIT_EXPORTS
505 #if !defined(ASMJIT_BUILD_DEBUG) && ASMJIT_CXX_GNU >= ASMJIT_CXX_MAKE_VER(4, 4, 0)
506 #define ASMJIT_FAVOR_SIZE __attribute__((__optimize__("Os")))
507 #define ASMJIT_FAVOR_SPEED __attribute__((__optimize__("O3")))
508 #elif ASMJIT_CXX_HAS_ATTRIBUTE(__minsize__, 0)
509 #define ASMJIT_FAVOR_SIZE __attribute__((__minsize__))
510 #define ASMJIT_FAVOR_SPEED
511 #else
512 #define ASMJIT_FAVOR_SIZE
513 #define ASMJIT_FAVOR_SPEED
514 #endif
515
516 // Only turn-off these warnings when building asmjit itself.
517 #ifdef _MSC_VER
518 #ifndef _CRT_SECURE_NO_DEPRECATE
519 #define _CRT_SECURE_NO_DEPRECATE
520 #endif
521 #ifndef _CRT_SECURE_NO_WARNINGS
522 #define _CRT_SECURE_NO_WARNINGS
523 #endif
524 #endif
525 #endif
526
527 // ============================================================================
528 // [asmjit::Build - Globals - Cleanup]
529 // ============================================================================
530
531 // Undefine everything that is not used by AsmJit outside of `build.h` and that
532 // is considered private.
533 #undef ASMJIT_CXX_CLANG
534 #undef ASMJIT_CXX_GNU
535 #undef ASMJIT_CXX_INTEL
536 #undef ASMJIT_CXX_MSC
537 #undef ASMJIT_CXX_MAKE_VER
538
539 // ============================================================================
540 // [asmjit::Build - Globals - Unit Testing Boilerplate]
541 // ============================================================================
542
543 // IDE: Make sure '#ifdef'ed unit tests are properly highlighted.
544 #if defined(__INTELLISENSE__) && !defined(ASMJIT_TEST)
545 #define ASMJIT_TEST
546 #endif
547
548 // IDE: Make sure '#ifdef'ed unit tests are not disabled by IDE.
549 #if defined(ASMJIT_TEST)
550 #include "../../../test/broken.h"
551 #endif
552
553 #endif // _ASMJIT_CORE_API_CONFIG_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/arch.h"
8 #include "../core/support.h"
9 #include "../core/type.h"
10
11 #ifdef ASMJIT_BUILD_X86
12 #include "../x86/x86operand.h"
13 #endif
14
15 #ifdef ASMJIT_BUILD_ARM
16 #include "../arm/armoperand.h"
17 #endif
18
19 ASMJIT_BEGIN_NAMESPACE
20
21 // ============================================================================
22 // [asmjit::ArchInfo]
23 // ============================================================================
24
25 // NOTE: Keep `const constexpr` otherwise MSC would not compile this code correctly.
26 static const constexpr uint32_t archInfoTable[] = {
27 // <--------------------+---------------------+-------------------+-------+
28 // | Type | SubType | GPInfo|
29 // <--------------------+---------------------+-------------------+-------+
30 Support::bytepack32_4x8(ArchInfo::kIdNone , ArchInfo::kSubIdNone, 0, 0),
31 Support::bytepack32_4x8(ArchInfo::kIdX86 , ArchInfo::kSubIdNone, 4, 8),
32 Support::bytepack32_4x8(ArchInfo::kIdX64 , ArchInfo::kSubIdNone, 8, 16),
33 Support::bytepack32_4x8(ArchInfo::kIdA32 , ArchInfo::kSubIdNone, 4, 16),
34 Support::bytepack32_4x8(ArchInfo::kIdA64 , ArchInfo::kSubIdNone, 8, 32)
35 };
36
37 ASMJIT_FAVOR_SIZE void ArchInfo::init(uint32_t id, uint32_t subId) noexcept {
38 uint32_t index = id < ASMJIT_ARRAY_SIZE(archInfoTable) ? id : uint32_t(0);
39
40 // Make sure the `archInfoTable` array is correctly indexed.
41 _signature = archInfoTable[index];
42 ASMJIT_ASSERT(_id == index);
43
44 // Even if the architecture is not known we setup its id and sub-id,
45 // however, such architecture is not really useful.
46 _id = uint8_t(id);
47 _subId = uint8_t(subId);
48 }
49
50 // ============================================================================
51 // [asmjit::ArchUtils]
52 // ============================================================================
53
54 ASMJIT_FAVOR_SIZE Error ArchUtils::typeIdToRegInfo(uint32_t archId, uint32_t& typeIdInOut, RegInfo& regInfo) noexcept {
55 uint32_t typeId = typeIdInOut;
56
57 // Zero the signature so it's clear in case that typeId is not invalid.
58 regInfo._signature = 0;
59
60 // TODO: Move to X86 backend.
61 #ifdef ASMJIT_BUILD_X86
62 if (ArchInfo::isX86Family(archId)) {
63 // Passed RegType instead of TypeId?
64 if (typeId <= BaseReg::kTypeMax)
65 typeId = x86::opData.archRegs.regTypeToTypeId[typeId];
66
67 if (ASMJIT_UNLIKELY(!Type::isValid(typeId)))
68 return DebugUtils::errored(kErrorInvalidTypeId);
69
70 // First normalize architecture dependent types.
71 if (Type::isAbstract(typeId)) {
72 if (typeId == Type::kIdIntPtr)
73 typeId = (archId == ArchInfo::kIdX86) ? Type::kIdI32 : Type::kIdI64;
74 else
75 typeId = (archId == ArchInfo::kIdX86) ? Type::kIdU32 : Type::kIdU64;
76 }
77
78 // Type size helps to construct all groupss of registers. If the size is zero
79 // then the TypeId is invalid.
80 uint32_t size = Type::sizeOf(typeId);
81 if (ASMJIT_UNLIKELY(!size))
82 return DebugUtils::errored(kErrorInvalidTypeId);
83
84 if (ASMJIT_UNLIKELY(typeId == Type::kIdF80))
85 return DebugUtils::errored(kErrorInvalidUseOfF80);
86
87 uint32_t regType = 0;
88
89 switch (typeId) {
90 case Type::kIdI8:
91 case Type::kIdU8:
92 regType = x86::Reg::kTypeGpbLo;
93 break;
94
95 case Type::kIdI16:
96 case Type::kIdU16:
97 regType = x86::Reg::kTypeGpw;
98 break;
99
100 case Type::kIdI32:
101 case Type::kIdU32:
102 regType = x86::Reg::kTypeGpd;
103 break;
104
105 case Type::kIdI64:
106 case Type::kIdU64:
107 if (archId == ArchInfo::kIdX86)
108 return DebugUtils::errored(kErrorInvalidUseOfGpq);
109
110 regType = x86::Reg::kTypeGpq;
111 break;
112
113 // F32 and F64 are always promoted to use vector registers.
114 case Type::kIdF32:
115 typeId = Type::kIdF32x1;
116 regType = x86::Reg::kTypeXmm;
117 break;
118
119 case Type::kIdF64:
120 typeId = Type::kIdF64x1;
121 regType = x86::Reg::kTypeXmm;
122 break;
123
124 // Mask registers {k}.
125 case Type::kIdMask8:
126 case Type::kIdMask16:
127 case Type::kIdMask32:
128 case Type::kIdMask64:
129 regType = x86::Reg::kTypeKReg;
130 break;
131
132 // MMX registers.
133 case Type::kIdMmx32:
134 case Type::kIdMmx64:
135 regType = x86::Reg::kTypeMm;
136 break;
137
138 // XMM|YMM|ZMM registers.
139 default:
140 if (size <= 16)
141 regType = x86::Reg::kTypeXmm;
142 else if (size == 32)
143 regType = x86::Reg::kTypeYmm;
144 else
145 regType = x86::Reg::kTypeZmm;
146 break;
147 }
148
149 typeIdInOut = typeId;
150 regInfo._signature = x86::opData.archRegs.regInfo[regType].signature();
151 return kErrorOk;
152 }
153 #endif
154
155 return DebugUtils::errored(kErrorInvalidArch);
156 }
157
158 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ARCH_H
7 #define _ASMJIT_CORE_ARCH_H
8
9 #include "../core/globals.h"
10 #include "../core/operand.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_core
15 //! \{
16
17 // ============================================================================
18 // [asmjit::ArchInfo]
19 // ============================================================================
20
21 class ArchInfo {
22 public:
23 union {
24 struct {
25 //! Architecture id.
26 uint8_t _id;
27 //! Architecture sub-id.
28 uint8_t _subId;
29 //! Default size of a general purpose register.
30 uint8_t _gpSize;
31 //! Count of all general purpose registers.
32 uint8_t _gpCount;
33 };
34 //! Architecture signature (32-bit int).
35 uint32_t _signature;
36 };
37
38 //! Architecture id.
39 enum Id : uint32_t {
40 kIdNone = 0, //!< No/Unknown architecture.
41
42 // X86 architectures.
43 kIdX86 = 1, //!< X86 architecture (32-bit).
44 kIdX64 = 2, //!< X64 architecture (64-bit) (AMD64).
45
46 // ARM architectures.
47 kIdA32 = 3, //!< ARM 32-bit architecture (AArch32/ARM/THUMB).
48 kIdA64 = 4, //!< ARM 64-bit architecture (AArch64).
49
50 //! Architecture detected at compile-time (architecture of the host).
51 kIdHost = ASMJIT_ARCH_X86 == 32 ? kIdX86 :
52 ASMJIT_ARCH_X86 == 64 ? kIdX64 :
53 ASMJIT_ARCH_ARM == 32 ? kIdA32 :
54 ASMJIT_ARCH_ARM == 64 ? kIdA64 : kIdNone
55 };
56
57 //! Architecture sub-type or execution mode.
58 enum SubType : uint32_t {
59 kSubIdNone = 0, //!< Default mode (or no specific mode).
60
61 // X86 sub-types.
62 kSubIdX86_AVX = 1, //!< Code generation uses AVX by default (VEC instructions).
63 kSubIdX86_AVX2 = 2, //!< Code generation uses AVX2 by default (VEC instructions).
64 kSubIdX86_AVX512 = 3, //!< Code generation uses AVX-512F by default (+32 vector regs).
65 kSubIdX86_AVX512VL = 4, //!< Code generation uses AVX-512F-VL by default (+VL extensions).
66
67 // ARM sub-types.
68 kSubIdA32_Thumb = 8, //!< THUMB|THUMBv2 sub-type (only ARM in 32-bit mode).
69
70 #if (ASMJIT_ARCH_X86) && defined(__AVX512VL__)
71 kSubIdHost = kSubIdX86_AVX512VL
72 #elif (ASMJIT_ARCH_X86) && defined(__AVX512F__)
73 kSubIdHost = kSubIdX86_AVX512
74 #elif (ASMJIT_ARCH_X86) && defined(__AVX2__)
75 kSubIdHost = kSubIdX86_AVX2
76 #elif (ASMJIT_ARCH_X86) && defined(__AVX__)
77 kSubIdHost = kSubIdX86_AVX
78 #elif (ASMJIT_ARCH_ARM == 32) && (defined(_M_ARMT) || defined(__thumb__) || defined(__thumb2__))
79 kSubIdHost = kSubIdA32_Thumb
80 #else
81 kSubIdHost = 0
82 #endif
83 };
84
85 //! \name Construction & Destruction
86 //! \{
87
88 inline ArchInfo() noexcept : _signature(0) {}
89 inline ArchInfo(const ArchInfo& other) noexcept : _signature(other._signature) {}
90 inline explicit ArchInfo(uint32_t type, uint32_t subType = kSubIdNone) noexcept { init(type, subType); }
91 inline explicit ArchInfo(Globals::NoInit_) noexcept {}
92
93 inline static ArchInfo host() noexcept { return ArchInfo(kIdHost, kSubIdHost); }
94
95 inline bool isInitialized() const noexcept { return _id != kIdNone; }
96
97 ASMJIT_API void init(uint32_t type, uint32_t subType = kSubIdNone) noexcept;
98 inline void reset() noexcept { _signature = 0; }
99
100 //! \}
101
102 //! \name Overloaded Operators
103 //! \{
104
105 inline ArchInfo& operator=(const ArchInfo& other) noexcept = default;
106
107 inline bool operator==(const ArchInfo& other) const noexcept { return _signature == other._signature; }
108 inline bool operator!=(const ArchInfo& other) const noexcept { return _signature != other._signature; }
109
110 //! \}
111
112 //! \name Accessors
113 //! \{
114
115 //! Returns the architecture id, see `Id`.
116 inline uint32_t archId() const noexcept { return _id; }
117
118 //! Returns the architecture sub-id, see `SubType`.
119 //!
120 //! X86 & X64
121 //! ---------
122 //!
123 //! Architecture subtype describe the highest instruction-set level that can
124 //! be used.
125 //!
126 //! A32 & A64
127 //! ---------
128 //!
129 //! Architecture mode means the instruction encoding to be used when generating
130 //! machine code, thus mode can be used to force generation of THUMB and THUMBv2
131 //! encoding or regular ARM encoding.
132 inline uint32_t archSubId() const noexcept { return _subId; }
133
134 //! Tests whether this architecture is 32-bit.
135 inline bool is32Bit() const noexcept { return _gpSize == 4; }
136 //! Tests whether this architecture is 64-bit.
137 inline bool is64Bit() const noexcept { return _gpSize == 8; }
138
139 //! Tests whether this architecture is X86, X64.
140 inline bool isX86Family() const noexcept { return isX86Family(_id); }
141 //! Tests whether this architecture is ARM32 or ARM64.
142 inline bool isArmFamily() const noexcept { return isArmFamily(_id); }
143
144 //! Returns the native size of a general-purpose register.
145 inline uint32_t gpSize() const noexcept { return _gpSize; }
146 //! Returns number of general-purpose registers.
147 inline uint32_t gpCount() const noexcept { return _gpCount; }
148
149 //! \}
150
151 //! \name Static Functions
152 //! \{
153
154 static inline bool isX86Family(uint32_t archId) noexcept { return archId >= kIdX86 && archId <= kIdX64; }
155 static inline bool isArmFamily(uint32_t archId) noexcept { return archId >= kIdA32 && archId <= kIdA64; }
156
157 //! \}
158 };
159
160 // ============================================================================
161 // [asmjit::ArchRegs]
162 // ============================================================================
163
164 //! Information about all architecture registers.
165 struct ArchRegs {
166 //! Register information and signatures indexed by `BaseReg::RegType`.
167 RegInfo regInfo[BaseReg::kTypeMax + 1];
168 //! Count (maximum) of registers per `BaseReg::RegType`.
169 uint8_t regCount[BaseReg::kTypeMax + 1];
170 //! Converts RegType to TypeId, see `Type::Id`.
171 uint8_t regTypeToTypeId[BaseReg::kTypeMax + 1];
172 };
173
174 // ============================================================================
175 // [asmjit::ArchUtils]
176 // ============================================================================
177
178 struct ArchUtils {
179 ASMJIT_API static Error typeIdToRegInfo(uint32_t archId, uint32_t& typeIdInOut, RegInfo& regInfo) noexcept;
180 };
181
182 //! \}
183
184 ASMJIT_END_NAMESPACE
185
186 #endif // _ASMJIT_CORE_ARCH_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/assembler.h"
8 #include "../core/codebufferwriter_p.h"
9 #include "../core/constpool.h"
10 #include "../core/logging.h"
11 #include "../core/support.h"
12
13 ASMJIT_BEGIN_NAMESPACE
14
15 // ============================================================================
16 // [asmjit::BaseAssembler - Construction / Destruction]
17 // ============================================================================
18
19 BaseAssembler::BaseAssembler() noexcept
20 : BaseEmitter(kTypeAssembler),
21 _section(nullptr),
22 _bufferData(nullptr),
23 _bufferEnd(nullptr),
24 _bufferPtr(nullptr),
25 _op4(),
26 _op5() {}
27 BaseAssembler::~BaseAssembler() noexcept {}
28
29 // ============================================================================
30 // [asmjit::BaseAssembler - Buffer Management]
31 // ============================================================================
32
33 Error BaseAssembler::setOffset(size_t offset) {
34 if (ASMJIT_UNLIKELY(!_code))
35 return DebugUtils::errored(kErrorNotInitialized);
36
37 size_t size = Support::max<size_t>(_section->bufferSize(), this->offset());
38 if (ASMJIT_UNLIKELY(offset > size))
39 return reportError(DebugUtils::errored(kErrorInvalidArgument));
40
41 _bufferPtr = _bufferData + offset;
42 return kErrorOk;
43 }
44
45 // ============================================================================
46 // [asmjit::BaseAssembler - Logging]
47 // ============================================================================
48
49 #ifndef ASMJIT_NO_LOGGING
50 static void BaseAssembler_logLabel(BaseAssembler* self, const Label& label) noexcept {
51 Logger* logger = self->_code->_logger;
52
53 StringTmp<512> sb;
54 size_t binSize = logger->hasFlag(FormatOptions::kFlagMachineCode) ? size_t(0) : std::numeric_limits<size_t>::max();
55
56 sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationLabel));
57 Logging::formatLabel(sb, logger->flags(), self, label.id());
58 sb.appendChar(':');
59 Logging::formatLine(sb, nullptr, binSize, 0, 0, self->_inlineComment);
60 logger->log(sb.data(), sb.size());
61 }
62 #endif
63
64 // ============================================================================
65 // [asmjit::BaseAssembler - Section Management]
66 // ============================================================================
67
68 static void BaseAssembler_initSection(BaseAssembler* self, Section* section) noexcept {
69 uint8_t* p = section->_buffer._data;
70
71 self->_section = section;
72 self->_bufferData = p;
73 self->_bufferPtr = p + section->_buffer._size;
74 self->_bufferEnd = p + section->_buffer._capacity;
75 }
76
77 Error BaseAssembler::section(Section* section) {
78 if (ASMJIT_UNLIKELY(!_code))
79 return reportError(DebugUtils::errored(kErrorNotInitialized));
80
81 if (!_code->isSectionValid(section->id()) || _code->_sections[section->id()] != section)
82 return reportError(DebugUtils::errored(kErrorInvalidSection));
83
84 #ifndef ASMJIT_NO_LOGGING
85 if (hasEmitterOption(kOptionLoggingEnabled))
86 _code->_logger->logf(".section %s {#%u}\n", section->name(), section->id());
87 #endif
88
89 BaseAssembler_initSection(this, section);
90 return kErrorOk;
91 }
92
93 // ============================================================================
94 // [asmjit::BaseAssembler - Label Management]
95 // ============================================================================
96
97 Label BaseAssembler::newLabel() {
98 uint32_t labelId = Globals::kInvalidId;
99 if (ASMJIT_LIKELY(_code)) {
100 LabelEntry* le;
101 Error err = _code->newLabelEntry(&le);
102 if (ASMJIT_UNLIKELY(err))
103 reportError(err);
104 labelId = le->id();
105 }
106 return Label(labelId);
107 }
108
109 Label BaseAssembler::newNamedLabel(const char* name, size_t nameSize, uint32_t type, uint32_t parentId) {
110 uint32_t labelId = Globals::kInvalidId;
111 if (ASMJIT_LIKELY(_code)) {
112 LabelEntry* le;
113 Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId);
114 if (ASMJIT_UNLIKELY(err))
115 reportError(err);
116 labelId = le->id();
117 }
118 return Label(labelId);
119 }
120
121 Error BaseAssembler::bind(const Label& label) {
122 if (ASMJIT_UNLIKELY(!_code))
123 return DebugUtils::errored(kErrorNotInitialized);
124
125 Error err = _code->bindLabel(label, _section->id(), offset());
126
127 #ifndef ASMJIT_NO_LOGGING
128 if (hasEmitterOption(kOptionLoggingEnabled))
129 BaseAssembler_logLabel(this, label);
130 #endif
131
132 resetInlineComment();
133 if (err)
134 return reportError(err);
135
136 return kErrorOk;
137 }
138
139 // ============================================================================
140 // [asmjit::BaseAssembler - Emit (Low-Level)]
141 // ============================================================================
142
143 Error BaseAssembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) {
144 _op4 = o4;
145 _op5 = o5;
146 _instOptions |= BaseInst::kOptionOp4Op5Used;
147 return _emit(instId, o0, o1, o2, o3);
148 }
149
150 Error BaseAssembler::_emitOpArray(uint32_t instId, const Operand_* operands, size_t count) {
151 const Operand_* o0 = &operands[0];
152 const Operand_* o1 = &operands[1];
153 const Operand_* o2 = &operands[2];
154 const Operand_* o3 = &operands[3];
155
156 switch (count) {
157 case 0: o0 = &Globals::none; ASMJIT_FALLTHROUGH;
158 case 1: o1 = &Globals::none; ASMJIT_FALLTHROUGH;
159 case 2: o2 = &Globals::none; ASMJIT_FALLTHROUGH;
160 case 3: o3 = &Globals::none; ASMJIT_FALLTHROUGH;
161 case 4:
162 return _emit(instId, *o0, *o1, *o2, *o3);
163
164 case 5:
165 _op4 = operands[4];
166 _op5.reset();
167 _instOptions |= BaseInst::kOptionOp4Op5Used;
168 return _emit(instId, *o0, *o1, *o2, *o3);
169
170 case 6:
171 _op4 = operands[4];
172 _op5 = operands[5];
173 _instOptions |= BaseInst::kOptionOp4Op5Used;
174 return _emit(instId, *o0, *o1, *o2, *o3);
175
176 default:
177 return DebugUtils::errored(kErrorInvalidArgument);
178 }
179 }
180
181 #ifndef ASMJIT_NO_LOGGING
182 void BaseAssembler::_emitLog(
183 uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3,
184 uint32_t relSize, uint32_t immSize, uint8_t* afterCursor) {
185
186 Logger* logger = _code->logger();
187 ASMJIT_ASSERT(logger != nullptr);
188 ASMJIT_ASSERT(options & BaseEmitter::kOptionLoggingEnabled);
189
190 StringTmp<256> sb;
191 uint32_t flags = logger->flags();
192
193 uint8_t* beforeCursor = _bufferPtr;
194 intptr_t emittedSize = (intptr_t)(afterCursor - beforeCursor);
195
196 Operand_ operands[Globals::kMaxOpCount];
197 operands[0].copyFrom(o0);
198 operands[1].copyFrom(o1);
199 operands[2].copyFrom(o2);
200 operands[3].copyFrom(o3);
201
202 if (options & BaseInst::kOptionOp4Op5Used) {
203 operands[4].copyFrom(_op4);
204 operands[5].copyFrom(_op5);
205 }
206 else {
207 operands[4].reset();
208 operands[5].reset();
209 }
210
211 sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationCode));
212 Logging::formatInstruction(sb, flags, this, archId(), BaseInst(instId, options, _extraReg), operands, Globals::kMaxOpCount);
213
214 if ((flags & FormatOptions::kFlagMachineCode) != 0)
215 Logging::formatLine(sb, _bufferPtr, size_t(emittedSize), relSize, immSize, inlineComment());
216 else
217 Logging::formatLine(sb, nullptr, std::numeric_limits<size_t>::max(), 0, 0, inlineComment());
218 logger->log(sb);
219 }
220
221 Error BaseAssembler::_emitFailed(
222 Error err,
223 uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
224
225 StringTmp<256> sb;
226 sb.appendString(DebugUtils::errorAsString(err));
227 sb.appendString(": ");
228
229 Operand_ operands[Globals::kMaxOpCount];
230 operands[0].copyFrom(o0);
231 operands[1].copyFrom(o1);
232 operands[2].copyFrom(o2);
233 operands[3].copyFrom(o3);
234
235 if (options & BaseInst::kOptionOp4Op5Used) {
236 operands[4].copyFrom(_op4);
237 operands[5].copyFrom(_op5);
238 }
239 else {
240 operands[4].reset();
241 operands[5].reset();
242 }
243
244 Logging::formatInstruction(sb, 0, this, archId(), BaseInst(instId, options, _extraReg), operands, Globals::kMaxOpCount);
245
246 if (inlineComment()) {
247 sb.appendString(" ; ");
248 sb.appendString(inlineComment());
249 }
250
251 resetInstOptions();
252 resetExtraReg();
253 resetInlineComment();
254 return reportError(err, sb.data());
255 }
256 #endif
257
258 // ============================================================================
259 // [asmjit::BaseAssembler - Embed]
260 // ============================================================================
261
262 struct DataSizeByPower {
263 char str[4];
264 };
265
266 static const DataSizeByPower dataSizeByPowerTable[] = {
267 { "db" },
268 { "dw" },
269 { "dd" },
270 { "dq" }
271 };
272
273 Error BaseAssembler::embed(const void* data, uint32_t dataSize) {
274 if (ASMJIT_UNLIKELY(!_code))
275 return DebugUtils::errored(kErrorNotInitialized);
276
277 if (dataSize == 0)
278 return DebugUtils::errored(kErrorInvalidArgument);
279
280 CodeBufferWriter writer(this);
281 ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
282
283 writer.emitData(data, dataSize);
284
285 #ifndef ASMJIT_NO_LOGGING
286 if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled)))
287 _code->_logger->logBinary(data, dataSize);
288 #endif
289
290 writer.done(this);
291 return kErrorOk;
292 }
293
294 Error BaseAssembler::embedLabel(const Label& label) {
295 if (ASMJIT_UNLIKELY(!_code))
296 return DebugUtils::errored(kErrorNotInitialized);
297
298 ASMJIT_ASSERT(_code != nullptr);
299 RelocEntry* re;
300 LabelEntry* le = _code->labelEntry(label);
301
302 if (ASMJIT_UNLIKELY(!le))
303 return reportError(DebugUtils::errored(kErrorInvalidLabel));
304
305 uint32_t dataSize = gpSize();
306 ASMJIT_ASSERT(dataSize <= 8);
307
308 CodeBufferWriter writer(this);
309 ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
310
311 #ifndef ASMJIT_NO_LOGGING
312 if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) {
313 StringTmp<256> sb;
314 sb.appendFormat(".%s ", dataSizeByPowerTable[Support::ctz(dataSize)].str);
315 Logging::formatLabel(sb, 0, this, label.id());
316 sb.appendChar('\n');
317 _code->_logger->log(sb);
318 }
319 #endif
320
321 // TODO: Does it make sense to calculate the address here if everything is known?
322 /*
323 if (_code->hasBaseAddress() && currentSection() == _code->textSection() && le->isBound()) {
324 uint64_t addr = _code->baseAddress() + _code->textSection()->offset() + le->offset();
325 writer.emitValueLE(addr, dataSize);
326 }
327 */
328
329 Error err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, dataSize);
330 if (ASMJIT_UNLIKELY(err))
331 return reportError(err);
332
333 re->_sourceSectionId = _section->id();
334 re->_sourceOffset = offset();
335
336 if (le->isBound()) {
337 re->_targetSectionId = le->section()->id();
338 re->_payload = le->offset();
339 }
340 else {
341 LabelLink* link = _code->newLabelLink(le, _section->id(), offset(), 0);
342 if (ASMJIT_UNLIKELY(!link))
343 return reportError(DebugUtils::errored(kErrorOutOfMemory));
344 link->relocId = re->id();
345 }
346
347 // Emit dummy DWORD/QWORD depending on the data size.
348 writer.emitZeros(dataSize);
349 writer.done(this);
350
351 return kErrorOk;
352 }
353
354 Error BaseAssembler::embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) {
355 if (ASMJIT_UNLIKELY(!_code))
356 return DebugUtils::errored(kErrorNotInitialized);
357
358 LabelEntry* labelEntry = _code->labelEntry(label);
359 LabelEntry* baseEntry = _code->labelEntry(base);
360
361 if (ASMJIT_UNLIKELY(!labelEntry || !baseEntry))
362 return reportError(DebugUtils::errored(kErrorInvalidLabel));
363
364 if (dataSize == 0)
365 dataSize = gpSize();
366
367 if (ASMJIT_UNLIKELY(!Support::isPowerOf2(dataSize) || dataSize > 8))
368 return reportError(DebugUtils::errored(kErrorInvalidOperandSize));
369
370 CodeBufferWriter writer(this);
371 ASMJIT_PROPAGATE(writer.ensureSpace(this, dataSize));
372
373 #ifndef ASMJIT_NO_LOGGING
374 if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled))) {
375 StringTmp<256> sb;
376 sb.appendFormat(".%s (", dataSizeByPowerTable[Support::ctz(dataSize)].str);
377 Logging::formatLabel(sb, 0, this, label.id());
378 sb.appendString(" - ");
379 Logging::formatLabel(sb, 0, this, base.id());
380 sb.appendString(")\n");
381 _code->_logger->log(sb);
382 }
383 #endif
384
385 // If both labels are bound within the same section it means the delta can be calculated now.
386 if (labelEntry->isBound() && baseEntry->isBound() && labelEntry->section() == baseEntry->section()) {
387 uint64_t delta = labelEntry->offset() - baseEntry->offset();
388 writer.emitValueLE(delta, dataSize);
389 }
390 else {
391 RelocEntry* re;
392 Error err = _code->newRelocEntry(&re, RelocEntry::kTypeExpression, dataSize);
393 if (ASMJIT_UNLIKELY(err))
394 return reportError(err);
395
396 Expression* exp = _code->_zone.newT<Expression>();
397 if (ASMJIT_UNLIKELY(!exp))
398 return reportError(DebugUtils::errored(kErrorOutOfMemory));
399
400 exp->reset();
401 exp->opType = Expression::kOpSub;
402 exp->setValueAsLabel(0, labelEntry);
403 exp->setValueAsLabel(1, baseEntry);
404
405 re->_sourceSectionId = _section->id();
406 re->_sourceOffset = offset();
407 re->_payload = (uint64_t)(uintptr_t)exp;
408
409 writer.emitZeros(dataSize);
410 }
411
412 writer.done(this);
413 return kErrorOk;
414 }
415
416 Error BaseAssembler::embedConstPool(const Label& label, const ConstPool& pool) {
417 if (ASMJIT_UNLIKELY(!_code))
418 return DebugUtils::errored(kErrorNotInitialized);
419
420 if (ASMJIT_UNLIKELY(!isLabelValid(label)))
421 return DebugUtils::errored(kErrorInvalidLabel);
422
423 ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment())));
424 ASMJIT_PROPAGATE(bind(label));
425
426 size_t size = pool.size();
427 CodeBufferWriter writer(this);
428 ASMJIT_PROPAGATE(writer.ensureSpace(this, size));
429
430 pool.fill(writer.cursor());
431
432 #ifndef ASMJIT_NO_LOGGING
433 if (ASMJIT_UNLIKELY(hasEmitterOption(kOptionLoggingEnabled)))
434 _code->_logger->logBinary(writer.cursor(), size);
435 #endif
436
437 writer.advance(size);
438 writer.done(this);
439
440 return kErrorOk;
441 }
442
443 // ============================================================================
444 // [asmjit::BaseAssembler - Comment]
445 // ============================================================================
446
447 Error BaseAssembler::comment(const char* data, size_t size) {
448 if (ASMJIT_UNLIKELY(!_code))
449 return DebugUtils::errored(kErrorNotInitialized);
450
451 #ifndef ASMJIT_NO_LOGGING
452 if (hasEmitterOption(kOptionLoggingEnabled)) {
453 Logger* logger = _code->logger();
454 logger->log(data, size);
455 logger->log("\n", 1);
456 return kErrorOk;
457 }
458 #else
459 ASMJIT_UNUSED(data);
460 ASMJIT_UNUSED(size);
461 #endif
462
463 return kErrorOk;
464 }
465
466 // ============================================================================
467 // [asmjit::BaseAssembler - Events]
468 // ============================================================================
469
470 Error BaseAssembler::onAttach(CodeHolder* code) noexcept {
471 ASMJIT_PROPAGATE(Base::onAttach(code));
472
473 // Attach to the end of the .text section.
474 BaseAssembler_initSection(this, code->_sections[0]);
475
476 // And reset everything that is used temporarily.
477 _op4.reset();
478 _op5.reset();
479
480 return kErrorOk;
481 }
482
483 Error BaseAssembler::onDetach(CodeHolder* code) noexcept {
484 _section = nullptr;
485 _bufferData = nullptr;
486 _bufferEnd = nullptr;
487 _bufferPtr = nullptr;
488
489 _op4.reset();
490 _op5.reset();
491
492 return Base::onDetach(code);
493 }
494
495 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ASSEMBLER_H
7 #define _ASMJIT_CORE_ASSEMBLER_H
8
9 #include "../core/codeholder.h"
10 #include "../core/datatypes.h"
11 #include "../core/emitter.h"
12 #include "../core/operand.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \addtogroup asmjit_core
17 //! \{
18
19 // ============================================================================
20 // [asmjit::BaseAssembler]
21 // ============================================================================
22
23 //! Base encoder (assembler).
24 class ASMJIT_VIRTAPI BaseAssembler : public BaseEmitter {
25 public:
26 ASMJIT_NONCOPYABLE(BaseAssembler)
27 typedef BaseEmitter Base;
28
29 //! Current section where the assembling happens.
30 Section* _section;
31 //! Start of the CodeBuffer of the current section.
32 uint8_t* _bufferData;
33 //! End (first invalid byte) of the current section.
34 uint8_t* _bufferEnd;
35 //! Pointer in the CodeBuffer of the current section.
36 uint8_t* _bufferPtr;
37 //! 5th operand data, used only temporarily.
38 Operand_ _op4;
39 //! 6th operand data, used only temporarily.
40 Operand_ _op5;
41
42 //! \name Construction & Destruction
43 //! \{
44
45 //! Creates a new `BaseAssembler` instance.
46 ASMJIT_API BaseAssembler() noexcept;
47 //! Destroys the `BaseAssembler` instance.
48 ASMJIT_API virtual ~BaseAssembler() noexcept;
49
50 //! \}
51
52 //! \name Code-Buffer Management
53 //! \{
54
55 //! Returns the capacity of the current CodeBuffer.
56 inline size_t bufferCapacity() const noexcept { return (size_t)(_bufferEnd - _bufferData); }
57 //! Returns the number of remaining bytes in the current CodeBuffer.
58 inline size_t remainingSpace() const noexcept { return (size_t)(_bufferEnd - _bufferPtr); }
59
60 //! Returns the current position in the CodeBuffer.
61 inline size_t offset() const noexcept { return (size_t)(_bufferPtr - _bufferData); }
62 //! Sets the current position in the CodeBuffer to `offset`.
63 //!
64 //! \note The `offset` cannot be outside of the buffer size (even if it's
65 //! within buffer's capacity).
66 ASMJIT_API Error setOffset(size_t offset);
67
68 //! Returns the start of the CodeBuffer in the current section.
69 inline uint8_t* bufferData() const noexcept { return _bufferData; }
70 //! Returns the end (first invalid byte) in the current section.
71 inline uint8_t* bufferEnd() const noexcept { return _bufferEnd; }
72 //! Returns the current pointer in the CodeBuffer in the current section.
73 inline uint8_t* bufferPtr() const noexcept { return _bufferPtr; }
74
75 //! \}
76
77 //! \name Section Management
78 //! \{
79
80 inline Section* currentSection() const noexcept { return _section; }
81
82 ASMJIT_API Error section(Section* section) override;
83
84 //! \}
85
86 //! \name Label Management
87 //! \{
88
89 ASMJIT_API Label newLabel() override;
90 ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, uint32_t type = Label::kTypeGlobal, uint32_t parentId = Globals::kInvalidId) override;
91 ASMJIT_API Error bind(const Label& label) override;
92
93 //! \}
94
95 //! \cond INTERNAL
96 //! \name Emit
97 //! \{
98
99 using BaseEmitter::_emit;
100
101 ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) override;
102 ASMJIT_API Error _emitOpArray(uint32_t instId, const Operand_* operands, size_t count) override;
103
104 protected:
105 #ifndef ASMJIT_NO_LOGGING
106 void _emitLog(
107 uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3,
108 uint32_t relSize, uint32_t immSize, uint8_t* afterCursor);
109
110 Error _emitFailed(
111 Error err,
112 uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3);
113 #else
114 inline Error _emitFailed(
115 uint32_t err,
116 uint32_t instId, uint32_t options, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
117
118 ASMJIT_UNUSED(instId);
119 ASMJIT_UNUSED(options);
120 ASMJIT_UNUSED(o0);
121 ASMJIT_UNUSED(o1);
122 ASMJIT_UNUSED(o2);
123 ASMJIT_UNUSED(o3);
124
125 resetInstOptions();
126 resetInlineComment();
127 return reportError(err);
128 }
129 #endif
130 public:
131 //! \}
132 //! \endcond
133
134 //! \name Embed
135 //! \{
136
137 ASMJIT_API Error embed(const void* data, uint32_t dataSize) override;
138 ASMJIT_API Error embedLabel(const Label& label) override;
139 ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) override;
140 ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
141
142 //! \}
143
144 //! \name Comment
145 //! \{
146
147 ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
148
149 //! \}
150
151 //! \name Events
152 //! \{
153
154 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
155 ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
156
157 //! \}
158 };
159
160 //! \}
161
162 ASMJIT_END_NAMESPACE
163
164 #endif // _ASMJIT_CORE_ASSEMBLER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_BUILDER
8
9 #include "../core/builder.h"
10 #include "../core/logging.h"
11 #include "../core/support.h"
12
13 ASMJIT_BEGIN_NAMESPACE
14
15 // ============================================================================
16 // [asmjit::PostponedErrorHandler (Internal)]
17 // ============================================================================
18
19 //! Postponed error handler that never throws. Used as a temporal error handler
20 //! to run passes. If error occurs, the caller is notified and will call the
21 //! real error handler, that can throw.
22 class PostponedErrorHandler : public ErrorHandler {
23 public:
24 void handleError(Error err, const char* message, BaseEmitter* origin) override {
25 ASMJIT_UNUSED(err);
26 ASMJIT_UNUSED(origin);
27
28 _message.assignString(message);
29 }
30
31 StringTmp<128> _message;
32 };
33
34 // ============================================================================
35 // [asmjit::BaseBuilder - Construction / Destruction]
36 // ============================================================================
37
38 BaseBuilder::BaseBuilder() noexcept
39 : BaseEmitter(kTypeBuilder),
40 _codeZone(32768 - Zone::kBlockOverhead),
41 _dataZone(16384 - Zone::kBlockOverhead),
42 _passZone(65536 - Zone::kBlockOverhead),
43 _allocator(&_codeZone),
44 _passes(),
45 _labelNodes(),
46 _cursor(nullptr),
47 _firstNode(nullptr),
48 _lastNode(nullptr),
49 _nodeFlags(0) {}
50 BaseBuilder::~BaseBuilder() noexcept {}
51
52 // ============================================================================
53 // [asmjit::BaseBuilder - Node Management]
54 // ============================================================================
55
56 LabelNode* BaseBuilder::newLabelNode() noexcept {
57 LabelNode* node = newNodeT<LabelNode>();
58 if (!node || registerLabelNode(node) != kErrorOk)
59 return nullptr;
60 return node;
61 }
62
63 AlignNode* BaseBuilder::newAlignNode(uint32_t alignMode, uint32_t alignment) noexcept {
64 return newNodeT<AlignNode>(alignMode, alignment);
65 }
66
67 EmbedDataNode* BaseBuilder::newEmbedDataNode(const void* data, uint32_t size) noexcept {
68 if (size > EmbedDataNode::kInlineBufferSize) {
69 void* cloned = _dataZone.alloc(size);
70 if (ASMJIT_UNLIKELY(!cloned))
71 return nullptr;
72
73 if (data)
74 memcpy(cloned, data, size);
75 data = cloned;
76 }
77
78 return newNodeT<EmbedDataNode>(const_cast<void*>(data), size);
79 }
80
81 ConstPoolNode* BaseBuilder::newConstPoolNode() noexcept {
82 ConstPoolNode* node = newNodeT<ConstPoolNode>();
83 if (!node || registerLabelNode(node) != kErrorOk)
84 return nullptr;
85 return node;
86 }
87
88 CommentNode* BaseBuilder::newCommentNode(const char* data, size_t size) noexcept {
89 if (data) {
90 if (size == SIZE_MAX)
91 size = strlen(data);
92
93 if (size > 0) {
94 data = static_cast<char*>(_dataZone.dup(data, size, true));
95 if (!data) return nullptr;
96 }
97 }
98
99 return newNodeT<CommentNode>(data);
100 }
101
102 InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0) noexcept {
103 uint32_t opCount = 1;
104 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
105 ASMJIT_ASSERT(opCapacity >= 4);
106
107 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
108 if (ASMJIT_UNLIKELY(!node))
109 return nullptr;
110
111 node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
112 node->setOp(0, o0);
113 for (uint32_t i = opCount; i < opCapacity; i++) node->resetOp(i);
114 return node;
115 }
116
117 InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1) noexcept {
118 uint32_t opCount = 2;
119 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
120 ASMJIT_ASSERT(opCapacity >= 4);
121
122 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
123 if (ASMJIT_UNLIKELY(!node))
124 return nullptr;
125
126 node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
127 node->setOp(0, o0);
128 node->setOp(1, o1);
129 for (uint32_t i = opCount; i < opCapacity; i++) node->resetOp(i);
130 return node;
131 }
132
133 InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept {
134 uint32_t opCount = 3;
135 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
136 ASMJIT_ASSERT(opCapacity >= 4);
137
138 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
139 if (ASMJIT_UNLIKELY(!node))
140 return nullptr;
141
142 node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
143 node->setOp(0, o0);
144 node->setOp(1, o1);
145 node->setOp(2, o2);
146 for (uint32_t i = opCount; i < opCapacity; i++) node->resetOp(i);
147 return node;
148 }
149
150 InstNode* BaseBuilder::newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept {
151 uint32_t opCount = 4;
152 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
153 ASMJIT_ASSERT(opCapacity >= 4);
154
155 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
156 if (ASMJIT_UNLIKELY(!node))
157 return nullptr;
158
159 node = new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
160 node->setOp(0, o0);
161 node->setOp(1, o1);
162 node->setOp(2, o2);
163 node->setOp(3, o3);
164 for (uint32_t i = opCount; i < opCapacity; i++) node->resetOp(i);
165 return node;
166 }
167
168 InstNode* BaseBuilder::newInstNodeRaw(uint32_t instId, uint32_t instOptions, uint32_t opCount) noexcept {
169 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
170 ASMJIT_ASSERT(opCapacity >= 4);
171
172 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
173 if (ASMJIT_UNLIKELY(!node))
174 return nullptr;
175 return new(node) InstNode(this, instId, instOptions, opCount, opCapacity);
176 }
177
178 BaseNode* BaseBuilder::addNode(BaseNode* node) noexcept {
179 ASMJIT_ASSERT(node);
180 ASMJIT_ASSERT(!node->_prev);
181 ASMJIT_ASSERT(!node->_next);
182 ASMJIT_ASSERT(!node->isActive());
183
184 if (!_cursor) {
185 if (!_firstNode) {
186 _firstNode = node;
187 _lastNode = node;
188 }
189 else {
190 node->_next = _firstNode;
191 _firstNode->_prev = node;
192 _firstNode = node;
193 }
194 }
195 else {
196 BaseNode* prev = _cursor;
197 BaseNode* next = _cursor->next();
198
199 node->_prev = prev;
200 node->_next = next;
201
202 prev->_next = node;
203 if (next)
204 next->_prev = node;
205 else
206 _lastNode = node;
207 }
208
209 node->addFlags(BaseNode::kFlagIsActive);
210 if (node->isSection())
211 _dirtySectionLinks = true;
212
213 _cursor = node;
214 return node;
215 }
216
217 BaseNode* BaseBuilder::addAfter(BaseNode* node, BaseNode* ref) noexcept {
218 ASMJIT_ASSERT(node);
219 ASMJIT_ASSERT(ref);
220
221 ASMJIT_ASSERT(!node->_prev);
222 ASMJIT_ASSERT(!node->_next);
223
224 BaseNode* prev = ref;
225 BaseNode* next = ref->next();
226
227 node->_prev = prev;
228 node->_next = next;
229
230 node->addFlags(BaseNode::kFlagIsActive);
231 if (node->isSection())
232 _dirtySectionLinks = true;
233
234 prev->_next = node;
235 if (next)
236 next->_prev = node;
237 else
238 _lastNode = node;
239
240 return node;
241 }
242
243 BaseNode* BaseBuilder::addBefore(BaseNode* node, BaseNode* ref) noexcept {
244 ASMJIT_ASSERT(node != nullptr);
245 ASMJIT_ASSERT(!node->_prev);
246 ASMJIT_ASSERT(!node->_next);
247 ASMJIT_ASSERT(!node->isActive());
248 ASMJIT_ASSERT(ref != nullptr);
249 ASMJIT_ASSERT(ref->isActive());
250
251 BaseNode* prev = ref->prev();
252 BaseNode* next = ref;
253
254 node->_prev = prev;
255 node->_next = next;
256
257 node->addFlags(BaseNode::kFlagIsActive);
258 if (node->isSection())
259 _dirtySectionLinks = true;
260
261 next->_prev = node;
262 if (prev)
263 prev->_next = node;
264 else
265 _firstNode = node;
266
267 return node;
268 }
269
270 BaseNode* BaseBuilder::removeNode(BaseNode* node) noexcept {
271 if (!node->isActive())
272 return node;
273
274 BaseNode* prev = node->prev();
275 BaseNode* next = node->next();
276
277 if (_firstNode == node)
278 _firstNode = next;
279 else
280 prev->_next = next;
281
282 if (_lastNode == node)
283 _lastNode = prev;
284 else
285 next->_prev = prev;
286
287 node->_prev = nullptr;
288 node->_next = nullptr;
289 node->clearFlags(BaseNode::kFlagIsActive);
290 if (node->isSection())
291 _dirtySectionLinks = true;
292
293 if (_cursor == node)
294 _cursor = prev;
295
296 return node;
297 }
298
299 void BaseBuilder::removeNodes(BaseNode* first, BaseNode* last) noexcept {
300 if (first == last) {
301 removeNode(first);
302 return;
303 }
304
305 if (!first->isActive())
306 return;
307
308 BaseNode* prev = first->prev();
309 BaseNode* next = last->next();
310
311 if (_firstNode == first)
312 _firstNode = next;
313 else
314 prev->_next = next;
315
316 if (_lastNode == last)
317 _lastNode = prev;
318 else
319 next->_prev = prev;
320
321 BaseNode* node = first;
322 uint32_t didRemoveSection = false;
323
324 for (;;) {
325 next = node->next();
326 ASMJIT_ASSERT(next != nullptr);
327
328 node->_prev = nullptr;
329 node->_next = nullptr;
330 node->clearFlags(BaseNode::kFlagIsActive);
331 didRemoveSection |= uint32_t(node->isSection());
332
333 if (_cursor == node)
334 _cursor = prev;
335
336 if (node == last)
337 break;
338 node = next;
339 }
340
341 if (didRemoveSection)
342 _dirtySectionLinks = true;
343 }
344
345 BaseNode* BaseBuilder::setCursor(BaseNode* node) noexcept {
346 BaseNode* old = _cursor;
347 _cursor = node;
348 return old;
349 }
350
351 // ============================================================================
352 // [asmjit::BaseBuilder - Section]
353 // ============================================================================
354
355 Error BaseBuilder::sectionNodeOf(SectionNode** pOut, uint32_t sectionId) noexcept {
356 if (ASMJIT_UNLIKELY(!_code))
357 return DebugUtils::errored(kErrorNotInitialized);
358
359 if (ASMJIT_UNLIKELY(!_code->isSectionValid(sectionId)))
360 return DebugUtils::errored(kErrorInvalidSection);
361
362 if (sectionId >= _sectionNodes.size())
363 ASMJIT_PROPAGATE(_sectionNodes.resize(&_allocator, sectionId + 1));
364
365 SectionNode* node = _sectionNodes[sectionId];
366 if (!node) {
367 node = newNodeT<SectionNode>(sectionId);
368 if (ASMJIT_UNLIKELY(!node))
369 return DebugUtils::errored(kErrorOutOfMemory);
370 _sectionNodes[sectionId] = node;
371 }
372
373 *pOut = node;
374 return kErrorOk;
375 }
376
377 Error BaseBuilder::section(Section* section) {
378 SectionNode* node;
379 Error err = sectionNodeOf(&node, section->id());
380
381 if (ASMJIT_UNLIKELY(err))
382 return reportError(err);
383
384 if (!node->isActive()) {
385 // Insert the section at the end if it was not part of the code.
386 addAfter(node, lastNode());
387 _cursor = node;
388 }
389 else {
390 // This is a bit tricky. We cache section links to make sure that
391 // switching sections doesn't involve traversal in linked-list unless
392 // the position of the section has changed.
393 if (hasDirtySectionLinks())
394 updateSectionLinks();
395
396 if (node->_nextSection)
397 _cursor = node->_nextSection->_prev;
398 else
399 _cursor = _lastNode;
400 }
401
402 return kErrorOk;
403 }
404
405 void BaseBuilder::updateSectionLinks() noexcept {
406 if (!_dirtySectionLinks)
407 return;
408
409 BaseNode* node_ = _firstNode;
410 SectionNode* currentSection = nullptr;
411
412 while (node_) {
413 if (node_->isSection()) {
414 if (currentSection)
415 currentSection->_nextSection = node_->as<SectionNode>();
416 currentSection = node_->as<SectionNode>();
417 }
418 node_ = node_->next();
419 }
420
421 if (currentSection)
422 currentSection->_nextSection = nullptr;
423
424 _dirtySectionLinks = false;
425 }
426
427 // ============================================================================
428 // [asmjit::BaseBuilder - Labels]
429 // ============================================================================
430
431 Error BaseBuilder::labelNodeOf(LabelNode** pOut, uint32_t labelId) noexcept {
432 if (ASMJIT_UNLIKELY(!_code))
433 return DebugUtils::errored(kErrorNotInitialized);
434
435 uint32_t index = labelId;
436 if (ASMJIT_UNLIKELY(index >= _code->labelCount()))
437 return DebugUtils::errored(kErrorInvalidLabel);
438
439 if (index >= _labelNodes.size())
440 ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, index + 1));
441
442 LabelNode* node = _labelNodes[index];
443 if (!node) {
444 node = newNodeT<LabelNode>(labelId);
445 if (ASMJIT_UNLIKELY(!node))
446 return DebugUtils::errored(kErrorOutOfMemory);
447 _labelNodes[index] = node;
448 }
449
450 *pOut = node;
451 return kErrorOk;
452 }
453
454 Error BaseBuilder::registerLabelNode(LabelNode* node) noexcept {
455 if (ASMJIT_UNLIKELY(!_code))
456 return DebugUtils::errored(kErrorNotInitialized);
457
458 // Don't call `reportError()` from here, we are noexcept and we are called
459 // by `newLabelNode()` and `newFuncNode()`, which are noexcept as well.
460 LabelEntry* le;
461 ASMJIT_PROPAGATE(_code->newLabelEntry(&le));
462 uint32_t labelId = le->id();
463
464 // We just added one label so it must be true.
465 ASMJIT_ASSERT(_labelNodes.size() < labelId + 1);
466 ASMJIT_PROPAGATE(_labelNodes.resize(&_allocator, labelId + 1));
467
468 _labelNodes[labelId] = node;
469 node->_id = labelId;
470
471 return kErrorOk;
472 }
473
474 static Error BaseBuilder_newLabelInternal(BaseBuilder* self, uint32_t labelId) noexcept {
475 ASMJIT_ASSERT(self->_labelNodes.size() < labelId + 1);
476 LabelNode* node = self->newNodeT<LabelNode>(labelId);
477
478 if (ASMJIT_UNLIKELY(!node))
479 return DebugUtils::errored(kErrorOutOfMemory);
480
481 ASMJIT_PROPAGATE(self->_labelNodes.resize(&self->_allocator, labelId + 1));
482 self->_labelNodes[labelId] = node;
483 node->_id = labelId;
484 return kErrorOk;
485 }
486
487 Label BaseBuilder::newLabel() {
488 uint32_t labelId = Globals::kInvalidId;
489 if (_code) {
490 LabelEntry* le;
491 Error err = _code->newLabelEntry(&le);
492 if (ASMJIT_UNLIKELY(err)) {
493 reportError(err);
494 }
495 else {
496 err = BaseBuilder_newLabelInternal(this, le->id());
497 if (ASMJIT_UNLIKELY(err))
498 reportError(err);
499 else
500 labelId = le->id();
501 }
502 }
503 return Label(labelId);
504 }
505
506 Label BaseBuilder::newNamedLabel(const char* name, size_t nameSize, uint32_t type, uint32_t parentId) {
507 uint32_t labelId = Globals::kInvalidId;
508 if (_code) {
509 LabelEntry* le;
510 Error err = _code->newNamedLabelEntry(&le, name, nameSize, type, parentId);
511 if (ASMJIT_UNLIKELY(err)) {
512 reportError(err);
513 }
514 else {
515 err = BaseBuilder_newLabelInternal(this, le->id());
516 if (ASMJIT_UNLIKELY(err))
517 reportError(err);
518 else
519 labelId = le->id();
520 }
521 }
522 return Label(labelId);
523 }
524
525 Error BaseBuilder::bind(const Label& label) {
526 LabelNode* node;
527 Error err = labelNodeOf(&node, label);
528
529 if (ASMJIT_UNLIKELY(err))
530 return reportError(err);
531
532 addNode(node);
533 return kErrorOk;
534 }
535
536 // ============================================================================
537 // [asmjit::BaseBuilder - Passes]
538 // ============================================================================
539
540 ASMJIT_FAVOR_SIZE Pass* BaseBuilder::passByName(const char* name) const noexcept {
541 for (Pass* pass : _passes)
542 if (strcmp(pass->name(), name) == 0)
543 return pass;
544 return nullptr;
545 }
546
547 ASMJIT_FAVOR_SIZE Error BaseBuilder::addPass(Pass* pass) noexcept {
548 if (ASMJIT_UNLIKELY(!_code))
549 return DebugUtils::errored(kErrorNotInitialized);
550
551 if (ASMJIT_UNLIKELY(pass == nullptr)) {
552 // Since this is directly called by `addPassT()` we treat `null` argument
553 // as out-of-memory condition. Otherwise it would be API misuse.
554 return DebugUtils::errored(kErrorOutOfMemory);
555 }
556 else if (ASMJIT_UNLIKELY(pass->_cb)) {
557 // Kinda weird, but okay...
558 if (pass->_cb == this)
559 return kErrorOk;
560 return DebugUtils::errored(kErrorInvalidState);
561 }
562
563 ASMJIT_PROPAGATE(_passes.append(&_allocator, pass));
564 pass->_cb = this;
565 return kErrorOk;
566 }
567
568 ASMJIT_FAVOR_SIZE Error BaseBuilder::deletePass(Pass* pass) noexcept {
569 if (ASMJIT_UNLIKELY(!_code))
570 return DebugUtils::errored(kErrorNotInitialized);
571
572 if (ASMJIT_UNLIKELY(pass == nullptr))
573 return DebugUtils::errored(kErrorInvalidArgument);
574
575 if (pass->_cb != nullptr) {
576 if (pass->_cb != this)
577 return DebugUtils::errored(kErrorInvalidState);
578
579 uint32_t index = _passes.indexOf(pass);
580 ASMJIT_ASSERT(index != Globals::kNotFound);
581
582 pass->_cb = nullptr;
583 _passes.removeAt(index);
584 }
585
586 pass->~Pass();
587 return kErrorOk;
588 }
589
590 Error BaseBuilder::runPasses() {
591 if (ASMJIT_UNLIKELY(!_code))
592 return DebugUtils::errored(kErrorNotInitialized);
593
594 if (_passes.empty())
595 return kErrorOk;
596
597 Logger* logger = code()->logger();
598 ErrorHandler* prev = errorHandler();
599 PostponedErrorHandler postponed;
600
601 Error err = kErrorOk;
602 setErrorHandler(&postponed);
603
604 for (Pass* pass : _passes) {
605 _passZone.reset();
606 err = pass->run(&_passZone, logger);
607 if (err) break;
608 }
609 _passZone.reset();
610 setErrorHandler(prev);
611
612 if (ASMJIT_UNLIKELY(err))
613 return reportError(err, !postponed._message.empty() ? postponed._message.data() : nullptr);
614
615 return kErrorOk;
616 }
617
618 // ============================================================================
619 // [asmjit::BaseBuilder - Emit]
620 // ============================================================================
621
622 Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
623 uint32_t opCount = 4;
624
625 if (o3.isNone()) {
626 opCount = 3;
627 if (o2.isNone()) {
628 opCount = 2;
629 if (o1.isNone()) {
630 opCount = 1;
631 if (o0.isNone())
632 opCount = 0;
633 }
634 }
635 }
636
637 uint32_t options = instOptions() | globalInstOptions();
638 if (options & BaseInst::kOptionReserved) {
639 if (ASMJIT_UNLIKELY(!_code))
640 return DebugUtils::errored(kErrorNotInitialized);
641
642 // Strict validation.
643 #ifndef ASMJIT_NO_VALIDATION
644 if (hasEmitterOption(kOptionStrictValidation)) {
645 Operand_ opArray[4];
646 opArray[0].copyFrom(o0);
647 opArray[1].copyFrom(o1);
648 opArray[2].copyFrom(o2);
649 opArray[3].copyFrom(o3);
650
651 Error err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, opCount);
652 if (ASMJIT_UNLIKELY(err)) {
653 resetInstOptions();
654 resetExtraReg();
655 resetInlineComment();
656 return reportError(err);
657 }
658 }
659 #endif
660
661 // Clear options that should never be part of `InstNode`.
662 options &= ~BaseInst::kOptionReserved;
663 }
664
665 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
666 ASMJIT_ASSERT(opCapacity >= 4);
667
668 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
669 if (ASMJIT_UNLIKELY(!node)) {
670 resetInstOptions();
671 resetExtraReg();
672 resetInlineComment();
673 return reportError(DebugUtils::errored(kErrorOutOfMemory));
674 }
675
676 node = new(node) InstNode(this, instId, options, opCount, opCapacity);
677 node->setExtraReg(extraReg());
678 node->setOp(0, o0);
679 node->setOp(1, o1);
680 node->setOp(2, o2);
681 node->setOp(3, o3);
682
683 for (uint32_t i = 4; i < InstNode::kBaseOpCapacity; i++)
684 node->resetOp(i);
685
686 const char* comment = inlineComment();
687 if (comment)
688 node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
689
690 resetInstOptions();
691 resetExtraReg();
692 resetInlineComment();
693
694 addNode(node);
695 return kErrorOk;
696 }
697
698 Error BaseBuilder::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) {
699 uint32_t opCount = Globals::kMaxOpCount;
700 if (o5.isNone()) {
701 opCount = 5;
702 if (o4.isNone())
703 return _emit(instId, o0, o1, o2, o3);
704 }
705
706 uint32_t options = instOptions() | globalInstOptions();
707 if (ASMJIT_UNLIKELY(options & BaseInst::kOptionReserved)) {
708 if (ASMJIT_UNLIKELY(!_code))
709 return DebugUtils::errored(kErrorNotInitialized);
710
711 // Strict validation.
712 #ifndef ASMJIT_NO_VALIDATION
713 if (hasEmitterOption(kOptionStrictValidation)) {
714 Operand_ opArray[Globals::kMaxOpCount];
715 opArray[0].copyFrom(o0);
716 opArray[1].copyFrom(o1);
717 opArray[2].copyFrom(o2);
718 opArray[3].copyFrom(o3);
719 opArray[4].copyFrom(o4);
720 opArray[5].copyFrom(o5);
721
722 Error err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, opCount);
723 if (ASMJIT_UNLIKELY(err)) {
724 resetInstOptions();
725 resetExtraReg();
726 resetInlineComment();
727 return reportError(err);
728 }
729 }
730 #endif
731
732 // Clear options that should never be part of `InstNode`.
733 options &= ~BaseInst::kOptionReserved;
734 }
735
736 uint32_t opCapacity = InstNode::capacityOfOpCount(opCount);
737 ASMJIT_ASSERT(opCapacity >= opCount);
738
739 InstNode* node = _allocator.allocT<InstNode>(InstNode::nodeSizeOfOpCapacity(opCapacity));
740 if (ASMJIT_UNLIKELY(!node)) {
741 resetInstOptions();
742 resetExtraReg();
743 resetInlineComment();
744 return reportError(DebugUtils::errored(kErrorOutOfMemory));
745 }
746
747 node = new(node) InstNode(this, instId, options, opCount, opCapacity);
748 node->setExtraReg(extraReg());
749 node->setOp(0, o0);
750 node->setOp(1, o1);
751 node->setOp(2, o2);
752 node->setOp(3, o3);
753 node->setOp(4, o4);
754
755 if (opCapacity > 5)
756 node->setOp(5, o5);
757
758 const char* comment = inlineComment();
759 if (comment)
760 node->setInlineComment(static_cast<char*>(_dataZone.dup(comment, strlen(comment), true)));
761
762 resetInstOptions();
763 resetExtraReg();
764 resetInlineComment();
765
766 addNode(node);
767 return kErrorOk;
768 }
769
770 // ============================================================================
771 // [asmjit::BaseBuilder - Align]
772 // ============================================================================
773
774 Error BaseBuilder::align(uint32_t alignMode, uint32_t alignment) {
775 if (ASMJIT_UNLIKELY(!_code))
776 return DebugUtils::errored(kErrorNotInitialized);
777
778 AlignNode* node = newAlignNode(alignMode, alignment);
779 if (ASMJIT_UNLIKELY(!node))
780 return reportError(DebugUtils::errored(kErrorOutOfMemory));
781
782 addNode(node);
783 return kErrorOk;
784 }
785
786 // ============================================================================
787 // [asmjit::BaseBuilder - Embed]
788 // ============================================================================
789
790 Error BaseBuilder::embed(const void* data, uint32_t dataSize) {
791 if (ASMJIT_UNLIKELY(!_code))
792 return DebugUtils::errored(kErrorNotInitialized);
793
794 EmbedDataNode* node = newEmbedDataNode(data, dataSize);
795 if (ASMJIT_UNLIKELY(!node))
796 return reportError(DebugUtils::errored(kErrorOutOfMemory));
797
798 addNode(node);
799 return kErrorOk;
800 }
801
802 Error BaseBuilder::embedLabel(const Label& label) {
803 if (ASMJIT_UNLIKELY(!_code))
804 return DebugUtils::errored(kErrorNotInitialized);
805
806 EmbedLabelNode* node = newNodeT<EmbedLabelNode>(label.id());
807 if (ASMJIT_UNLIKELY(!node))
808 return reportError(DebugUtils::errored(kErrorOutOfMemory));
809
810 addNode(node);
811 return kErrorOk;
812 }
813
814 Error BaseBuilder::embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) {
815 if (ASMJIT_UNLIKELY(!_code))
816 return DebugUtils::errored(kErrorNotInitialized);
817
818 EmbedLabelDeltaNode* node = newNodeT<EmbedLabelDeltaNode>(label.id(), base.id(), dataSize);
819 if (ASMJIT_UNLIKELY(!node))
820 return reportError(DebugUtils::errored(kErrorOutOfMemory));
821
822 addNode(node);
823 return kErrorOk;
824 }
825
826 Error BaseBuilder::embedConstPool(const Label& label, const ConstPool& pool) {
827 if (ASMJIT_UNLIKELY(!_code))
828 return DebugUtils::errored(kErrorNotInitialized);
829
830 if (!isLabelValid(label))
831 return reportError(DebugUtils::errored(kErrorInvalidLabel));
832
833 ASMJIT_PROPAGATE(align(kAlignData, uint32_t(pool.alignment())));
834 ASMJIT_PROPAGATE(bind(label));
835
836 EmbedDataNode* node = newEmbedDataNode(nullptr, uint32_t(pool.size()));
837 if (ASMJIT_UNLIKELY(!node))
838 return reportError(DebugUtils::errored(kErrorOutOfMemory));
839
840 pool.fill(node->data());
841 addNode(node);
842 return kErrorOk;
843 }
844
845 // ============================================================================
846 // [asmjit::BaseBuilder - Comment]
847 // ============================================================================
848
849 Error BaseBuilder::comment(const char* data, size_t size) {
850 if (ASMJIT_UNLIKELY(!_code))
851 return DebugUtils::errored(kErrorNotInitialized);
852
853 CommentNode* node = newCommentNode(data, size);
854 if (ASMJIT_UNLIKELY(!node))
855 return reportError(DebugUtils::errored(kErrorOutOfMemory));
856
857 addNode(node);
858 return kErrorOk;
859 }
860
861 // ============================================================================
862 // [asmjit::BaseBuilder - Serialize]
863 // ============================================================================
864
865 Error BaseBuilder::serialize(BaseEmitter* dst) {
866 Error err = kErrorOk;
867 BaseNode* node_ = _firstNode;
868
869 do {
870 dst->setInlineComment(node_->inlineComment());
871
872 if (node_->isInst()) {
873 InstNode* node = node_->as<InstNode>();
874 err = dst->emitInst(node->baseInst(), node->operands(), node->opCount());
875 }
876 else if (node_->isLabel()) {
877 if (node_->isConstPool()) {
878 ConstPoolNode* node = node_->as<ConstPoolNode>();
879 err = dst->embedConstPool(node->label(), node->constPool());
880 }
881 else {
882 LabelNode* node = node_->as<LabelNode>();
883 err = dst->bind(node->label());
884 }
885 }
886 else if (node_->isAlign()) {
887 AlignNode* node = node_->as<AlignNode>();
888 err = dst->align(node->alignMode(), node->alignment());
889 }
890 else if (node_->isEmbedData()) {
891 EmbedDataNode* node = node_->as<EmbedDataNode>();
892 err = dst->embed(node->data(), node->size());
893 }
894 else if (node_->isEmbedLabel()) {
895 EmbedLabelNode* node = node_->as<EmbedLabelNode>();
896 err = dst->embedLabel(node->label());
897 }
898 else if (node_->isEmbedLabelDelta()) {
899 EmbedLabelDeltaNode* node = node_->as<EmbedLabelDeltaNode>();
900 err = dst->embedLabelDelta(node->label(), node->baseLabel(), node->dataSize());
901 }
902 else if (node_->isSection()) {
903 SectionNode* node = node_->as<SectionNode>();
904 err = dst->section(_code->sectionById(node->id()));
905 }
906 else if (node_->isComment()) {
907 CommentNode* node = node_->as<CommentNode>();
908 err = dst->comment(node->inlineComment());
909 }
910
911 if (err) break;
912 node_ = node_->next();
913 } while (node_);
914
915 return err;
916 }
917
918 // ============================================================================
919 // [asmjit::BaseBuilder - Logging]
920 // ============================================================================
921
922 #ifndef ASMJIT_NO_LOGGING
923 Error BaseBuilder::dump(String& sb, uint32_t flags) const noexcept {
924 BaseNode* node = _firstNode;
925 while (node) {
926 ASMJIT_PROPAGATE(Logging::formatNode(sb, flags, this, node));
927 sb.appendChar('\n');
928 node = node->next();
929 }
930
931 return kErrorOk;
932 }
933 #endif
934
935 // ============================================================================
936 // [asmjit::BaseBuilder - Events]
937 // ============================================================================
938
939 Error BaseBuilder::onAttach(CodeHolder* code) noexcept {
940 ASMJIT_PROPAGATE(Base::onAttach(code));
941
942 SectionNode* initialSection;
943 Error err = sectionNodeOf(&initialSection, 0);
944
945 if (!err)
946 err = _passes.willGrow(&_allocator, 8);
947
948 if (ASMJIT_UNLIKELY(err)) {
949 onDetach(code);
950 return err;
951 }
952
953
954 _cursor = initialSection;
955 _firstNode = initialSection;
956 _lastNode = initialSection;
957 initialSection->setFlags(BaseNode::kFlagIsActive);
958
959 return kErrorOk;
960 }
961
962 Error BaseBuilder::onDetach(CodeHolder* code) noexcept {
963 _passes.reset();
964 _sectionNodes.reset();
965 _labelNodes.reset();
966
967 _allocator.reset(&_codeZone);
968 _codeZone.reset();
969 _dataZone.reset();
970 _passZone.reset();
971
972 _nodeFlags = 0;
973
974 _cursor = nullptr;
975 _firstNode = nullptr;
976 _lastNode = nullptr;
977
978 return Base::onDetach(code);
979 }
980
981 // ============================================================================
982 // [asmjit::Pass - Construction / Destruction]
983 // ============================================================================
984
985 Pass::Pass(const char* name) noexcept
986 : _cb(nullptr),
987 _name(name) {}
988 Pass::~Pass() noexcept {}
989
990 ASMJIT_END_NAMESPACE
991
992 #endif // !ASMJIT_NO_BUILDER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_BUILDER_H
7 #define _ASMJIT_CORE_BUILDER_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_BUILDER
11
12 #include "../core/assembler.h"
13 #include "../core/codeholder.h"
14 #include "../core/constpool.h"
15 #include "../core/inst.h"
16 #include "../core/operand.h"
17 #include "../core/string.h"
18 #include "../core/support.h"
19 #include "../core/zone.h"
20 #include "../core/zonevector.h"
21
22 ASMJIT_BEGIN_NAMESPACE
23
24 //! \addtogroup asmjit_builder
25 //! \{
26
27 // ============================================================================
28 // [Forward Declarations]
29 // ============================================================================
30
31 class BaseBuilder;
32 class Pass;
33
34 class BaseNode;
35 class InstNode;
36 class SectionNode;
37 class LabelNode;
38 class AlignNode;
39 class EmbedDataNode;
40 class EmbedLabelNode;
41 class ConstPoolNode;
42 class CommentNode;
43 class SentinelNode;
44 class LabelDeltaNode;
45
46 // ============================================================================
47 // [asmjit::BaseBuilder]
48 // ============================================================================
49
50 class ASMJIT_VIRTAPI BaseBuilder : public BaseEmitter {
51 public:
52 ASMJIT_NONCOPYABLE(BaseBuilder)
53 typedef BaseEmitter Base;
54
55 //! Base zone used to allocate nodes and passes.
56 Zone _codeZone;
57 //! Data zone used to allocate data and names.
58 Zone _dataZone;
59 //! Pass zone, passed to `Pass::run()`.
60 Zone _passZone;
61 //! Allocator that uses `_codeZone`.
62 ZoneAllocator _allocator;
63
64 //! Array of `Pass` objects.
65 ZoneVector<Pass*> _passes;
66 //! Maps section indexes to `LabelNode` nodes.
67 ZoneVector<SectionNode*> _sectionNodes;
68 //! Maps label indexes to `LabelNode` nodes.
69 ZoneVector<LabelNode*> _labelNodes;
70
71 //! Current node (cursor).
72 BaseNode* _cursor;
73 //! First node of the current section.
74 BaseNode* _firstNode;
75 //! Last node of the current section.
76 BaseNode* _lastNode;
77
78 //! Flags assigned to each new node.
79 uint32_t _nodeFlags;
80 //! The sections links are dirty (used internally).
81 bool _dirtySectionLinks;
82
83 //! \name Construction & Destruction
84 //! \{
85
86 //! Creates a new `BaseBuilder` instance.
87 ASMJIT_API BaseBuilder() noexcept;
88 //! Destroys the `BaseBuilder` instance.
89 ASMJIT_API virtual ~BaseBuilder() noexcept;
90
91 //! \}
92
93 //! \name Node Management
94 //! \{
95
96 //! Returns the first node.
97 inline BaseNode* firstNode() const noexcept { return _firstNode; }
98 //! Returns the last node.
99 inline BaseNode* lastNode() const noexcept { return _lastNode; }
100
101 //! Allocates and instantiates a new node of type `T` and returns its instance.
102 //! If the allocation fails `nullptr` is returned.
103 //!
104 //! The template argument `T` must be a type that is extends \ref BaseNode.
105 //!
106 //! \remarks The pointer returned (if non-null) is owned by the Builder or
107 //! Compiler. When the Builder/Compiler is destroyed it destroys all nodes
108 //! it created so no manual memory management is required.
109 template<typename T>
110 inline T* newNodeT() noexcept {
111 return _allocator.newT<T>(this);
112 }
113
114 //! \overload
115 template<typename T, typename... Args>
116 inline T* newNodeT(Args&&... args) noexcept {
117 return _allocator.newT<T>(this, std::forward<Args>(args)...);
118 }
119
120 //! Creates a new `LabelNode`.
121 ASMJIT_API LabelNode* newLabelNode() noexcept;
122 //! Creates a new `AlignNode`.
123 ASMJIT_API AlignNode* newAlignNode(uint32_t alignMode, uint32_t alignment) noexcept;
124 //! Creates a new `EmbedDataNode`.
125 ASMJIT_API EmbedDataNode* newEmbedDataNode(const void* data, uint32_t size) noexcept;
126 //! Creates a new `ConstPoolNode`.
127 ASMJIT_API ConstPoolNode* newConstPoolNode() noexcept;
128 //! Creates a new `CommentNode`.
129 ASMJIT_API CommentNode* newCommentNode(const char* data, size_t size) noexcept;
130
131 ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0) noexcept;
132 ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1) noexcept;
133 ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2) noexcept;
134 ASMJIT_API InstNode* newInstNode(uint32_t instId, uint32_t instOptions, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) noexcept;
135 ASMJIT_API InstNode* newInstNodeRaw(uint32_t instId, uint32_t instOptions, uint32_t opCount) noexcept;
136
137 //! Adds `node` after the current and sets the current node to the given `node`.
138 ASMJIT_API BaseNode* addNode(BaseNode* node) noexcept;
139 //! Inserts the given `node` after `ref`.
140 ASMJIT_API BaseNode* addAfter(BaseNode* node, BaseNode* ref) noexcept;
141 //! Inserts the given `node` before `ref`.
142 ASMJIT_API BaseNode* addBefore(BaseNode* node, BaseNode* ref) noexcept;
143 //! Removes the given `node`.
144 ASMJIT_API BaseNode* removeNode(BaseNode* node) noexcept;
145 //! Removes multiple nodes.
146 ASMJIT_API void removeNodes(BaseNode* first, BaseNode* last) noexcept;
147
148 //! Returns the cursor.
149 //!
150 //! When the Builder/Compiler is created it automatically creates a '.text'
151 //! \ref SectionNode, which will be the initial one. When instructions are
152 //! added they are always added after the cursor and the cursor is changed
153 //! to be that newly added node. Use `setCursor()` to change where new nodes
154 //! are inserted.
155 inline BaseNode* cursor() const noexcept { return _cursor; }
156
157 //! Sets the current node to `node` and return the previous one.
158 ASMJIT_API BaseNode* setCursor(BaseNode* node) noexcept;
159
160 //! Sets the current node without returning the previous node.
161 //!
162 //! Only use this function if you are concerned about performance and want
163 //! this inlined (for example if you set the cursor in a loop, etc...).
164 inline void _setCursor(BaseNode* node) noexcept { _cursor = node; }
165
166 //! \}
167
168 //! \name Section Management
169 //! \{
170
171 //! Returns a vector of SectionNode objects.
172 //!
173 //! \note If a section of some id is not associated with the Builder/Compiler
174 //! it would be null, so always check for nulls if you iterate over the vector.
175 inline const ZoneVector<SectionNode*>& sectionNodes() const noexcept { return _sectionNodes; }
176
177 //! Tests whether the `SectionNode` of the given `sectionId` was registered.
178 inline bool hasRegisteredSectionNode(uint32_t sectionId) const noexcept {
179 return sectionId < _sectionNodes.size() && _sectionNodes[sectionId] != nullptr;
180 }
181
182 //! Returns or creates a `SectionNode` that matches the given `sectionId`.
183 //!
184 //! \remarks This function will either get the existing `SectionNode` or create
185 //! it in case it wasn't created before. You can check whether a section has a
186 //! registered `SectionNode` by using `BaseBuilder::hasRegisteredSectionNode()`.
187 ASMJIT_API Error sectionNodeOf(SectionNode** pOut, uint32_t sectionId) noexcept;
188
189 ASMJIT_API Error section(Section* section) override;
190
191 //! Returns whether the section links of active section nodes are dirty. You can
192 //! update these links by calling `updateSectionLinks()` in such case.
193 inline bool hasDirtySectionLinks() const noexcept { return _dirtySectionLinks; }
194
195 //! Updates links of all active section nodes.
196 ASMJIT_API void updateSectionLinks() noexcept;
197
198 //! \}
199
200 //! \name Label Management
201 //! \{
202
203 //! Returns a vector of LabelNode nodes.
204 //!
205 //! \note If a label of some id is not associated with the Builder/Compiler
206 //! it would be null, so always check for nulls if you iterate over the vector.
207 inline const ZoneVector<LabelNode*>& labelNodes() const noexcept { return _labelNodes; }
208
209 //! Tests whether the `LabelNode` of the given `labelId` was registered.
210 inline bool hasRegisteredLabelNode(uint32_t labelId) const noexcept {
211 return labelId < _labelNodes.size() && _labelNodes[labelId] != nullptr;
212 }
213
214 //! \overload
215 inline bool hasRegisteredLabelNode(const Label& label) const noexcept {
216 return hasRegisteredLabelNode(label.id());
217 }
218
219 //! Gets or creates a `LabelNode` that matches the given `labelId`.
220 //!
221 //! \remarks This function will either get the existing `LabelNode` or create
222 //! it in case it wasn't created before. You can check whether a label has a
223 //! registered `LabelNode` by using `BaseBuilder::hasRegisteredLabelNode()`.
224 ASMJIT_API Error labelNodeOf(LabelNode** pOut, uint32_t labelId) noexcept;
225
226 //! \overload
227 inline Error labelNodeOf(LabelNode** pOut, const Label& label) noexcept {
228 return labelNodeOf(pOut, label.id());
229 }
230
231 //! Registers this label node [Internal].
232 //!
233 //! This function is used internally to register a newly created `LabelNode`
234 //! with this instance of Builder/Compiler. Use `labelNodeOf()` functions to
235 //! get back `LabelNode` from a label or its identifier.
236 ASMJIT_API Error registerLabelNode(LabelNode* node) noexcept;
237
238 ASMJIT_API Label newLabel() override;
239 ASMJIT_API Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, uint32_t type = Label::kTypeGlobal, uint32_t parentId = Globals::kInvalidId) override;
240 ASMJIT_API Error bind(const Label& label) override;
241
242 //! \}
243
244 //! \name Passes
245 //! \{
246
247 //! Returns a vector of `Pass` instances that will be executed by `runPasses()`.
248 inline const ZoneVector<Pass*>& passes() const noexcept { return _passes; }
249
250 //! Allocates and instantiates a new pass of type `T` and returns its instance.
251 //! If the allocation fails `nullptr` is returned.
252 //!
253 //! The template argument `T` must be a type that is extends \ref Pass.
254 //!
255 //! \remarks The pointer returned (if non-null) is owned by the Builder or
256 //! Compiler. When the Builder/Compiler is destroyed it destroys all passes
257 //! it created so no manual memory management is required.
258 template<typename T>
259 inline T* newPassT() noexcept { return _codeZone.newT<T>(); }
260
261 //! \overload
262 template<typename T, typename... Args>
263 inline T* newPassT(Args&&... args) noexcept { return _codeZone.newT<T>(std::forward<Args>(args)...); }
264
265 template<typename T>
266 inline Error addPassT() noexcept { return addPass(newPassT<T>()); }
267
268 template<typename T, typename... Args>
269 inline Error addPassT(Args&&... args) noexcept { return addPass(newPassT<T, Args...>(std::forward<Args>(args)...)); }
270
271 //! Returns `Pass` by name.
272 ASMJIT_API Pass* passByName(const char* name) const noexcept;
273 //! Adds `pass` to the list of passes.
274 ASMJIT_API Error addPass(Pass* pass) noexcept;
275 //! Removes `pass` from the list of passes and delete it.
276 ASMJIT_API Error deletePass(Pass* pass) noexcept;
277
278 //! Runs all passes in order.
279 ASMJIT_API Error runPasses();
280
281 //! \}
282
283 //! \name Emit
284 //! \{
285
286 ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) override;
287 ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) override;
288
289 //! \}
290
291 //! \name Align
292 //! \{
293
294 ASMJIT_API Error align(uint32_t alignMode, uint32_t alignment) override;
295
296 //! \}
297
298 //! \name Embed
299 //! \{
300
301 ASMJIT_API Error embed(const void* data, uint32_t dataSize) override;
302 ASMJIT_API Error embedLabel(const Label& label) override;
303 ASMJIT_API Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) override;
304 ASMJIT_API Error embedConstPool(const Label& label, const ConstPool& pool) override;
305
306 //! \}
307
308 //! \name Comment
309 //! \{
310
311 ASMJIT_API Error comment(const char* data, size_t size = SIZE_MAX) override;
312
313 //! \}
314
315 //! \name Serialization
316 //! \{
317
318 //! Serializes everything the given emitter `dst`.
319 //!
320 //! Although not explicitly required the emitter will most probably be of
321 //! Assembler type. The reason is that there is no known use of serializing
322 //! nodes held by Builder/Compiler into another Builder-like emitter.
323 ASMJIT_API Error serialize(BaseEmitter* dst);
324
325 //! \}
326
327 //! \name Logging
328 //! \{
329
330 #ifndef ASMJIT_NO_LOGGING
331 ASMJIT_API Error dump(String& sb, uint32_t flags = 0) const noexcept;
332 #endif
333
334 //! \}
335
336 //! \name Events
337 //! \{
338
339 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
340 ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
341
342 //! \}
343 };
344
345 // ============================================================================
346 // [asmjit::BaseNode]
347 // ============================================================================
348
349 //! Base node.
350 //!
351 //! Every node represents a building-block used by `BaseBuilder`. It can be
352 //! instruction, data, label, comment, directive, or any other high-level
353 //! representation that can be transformed to the building blocks mentioned.
354 //! Every class that inherits `BaseBuilder` can define its own nodes that it
355 //! can lower to basic nodes.
356 class BaseNode {
357 public:
358 ASMJIT_NONCOPYABLE(BaseNode)
359
360 union {
361 struct {
362 //! Previous node.
363 BaseNode* _prev;
364 //! Next node.
365 BaseNode* _next;
366 };
367 //! Links (previous and next nodes).
368 BaseNode* _links[2];
369 };
370
371 //! Data shared between all types of nodes.
372 struct AnyData {
373 //! Node type, see \ref NodeType.
374 uint8_t _nodeType;
375 //! Node flags, see \ref Flags.
376 uint8_t _nodeFlags;
377 //! Not used by BaseNode.
378 uint8_t _reserved0;
379 //! Not used by BaseNode.
380 uint8_t _reserved1;
381 };
382
383 struct InstData {
384 //! Node type, see \ref NodeType.
385 uint8_t _nodeType;
386 //! Node flags, see \ref Flags.
387 uint8_t _nodeFlags;
388 //! Instruction operands count (used).
389 uint8_t _opCount;
390 //! Instruction operands capacity (allocated).
391 uint8_t _opCapacity;
392 };
393
394 struct SentinelData {
395 //! Node type, see \ref NodeType.
396 uint8_t _nodeType;
397 //! Node flags, see \ref Flags.
398 uint8_t _nodeFlags;
399 //! Sentinel type.
400 uint8_t _sentinelType;
401 //! Not used by BaseNode.
402 uint8_t _reserved1;
403 };
404
405 union {
406 AnyData _any;
407 InstData _inst;
408 SentinelData _sentinel;
409 };
410
411 //! Node position in code (should be unique).
412 uint32_t _position;
413
414 //! Value reserved for AsmJit users never touched by AsmJit itself.
415 union {
416 uint64_t _userDataU64;
417 void* _userDataPtr;
418 };
419
420 //! Data used exclusively by the current `Pass`.
421 void* _passData;
422
423 //! Inline comment/annotation or nullptr if not used.
424 const char* _inlineComment;
425
426 //! Type of `BaseNode`.
427 enum NodeType : uint32_t {
428 //! Invalid node (internal, don't use).
429 kNodeNone = 0,
430
431 // [BaseBuilder]
432
433 //! Node is `InstNode` or `InstExNode`.
434 kNodeInst = 1,
435 //! Node is `SectionNode`.
436 kNodeSection = 2,
437 //! Node is `LabelNode`.
438 kNodeLabel = 3,
439 //! Node is `AlignNode`.
440 kNodeAlign = 4,
441 //! Node is `EmbedDataNode`.
442 kNodeEmbedData = 5,
443 //! Node is `EmbedLabelNode`.
444 kNodeEmbedLabel = 6,
445 //! Node is `EmbedLabelDeltaNode`.
446 kNodeEmbedLabelDelta = 7,
447 //! Node is `ConstPoolNode`.
448 kNodeConstPool = 8,
449 //! Node is `CommentNode`.
450 kNodeComment = 9,
451 //! Node is `SentinelNode`.
452 kNodeSentinel = 10,
453
454 // [BaseCompiler]
455
456 //! Node is `FuncNode` (acts as LabelNode).
457 kNodeFunc = 16,
458 //! Node is `FuncRetNode` (acts as InstNode).
459 kNodeFuncRet = 17,
460 //! Node is `FuncCallNode` (acts as InstNode).
461 kNodeFuncCall = 18,
462
463 // [UserDefined]
464
465 //! First id of a user-defined node.
466 kNodeUser = 32
467 };
468
469 //! Node flags, specify what the node is and/or does.
470 enum Flags : uint32_t {
471 kFlagIsCode = 0x01u, //!< Node is code that can be executed (instruction, label, align, etc...).
472 kFlagIsData = 0x02u, //!< Node is data that cannot be executed (data, const-pool, etc...).
473 kFlagIsInformative = 0x04u, //!< Node is informative, can be removed and ignored.
474 kFlagIsRemovable = 0x08u, //!< Node can be safely removed if unreachable.
475 kFlagHasNoEffect = 0x10u, //!< Node does nothing when executed (label, align, explicit nop).
476 kFlagActsAsInst = 0x20u, //!< Node is an instruction or acts as it.
477 kFlagActsAsLabel = 0x40u, //!< Node is a label or acts as it.
478 kFlagIsActive = 0x80u //!< Node is active (part of the code).
479 };
480
481 //! \name Construction & Destruction
482 //! \{
483
484 //! Creates a new `BaseNode` - always use `BaseBuilder` to allocate nodes.
485 ASMJIT_INLINE BaseNode(BaseBuilder* cb, uint32_t type, uint32_t flags = 0) noexcept {
486 _prev = nullptr;
487 _next = nullptr;
488 _any._nodeType = uint8_t(type);
489 _any._nodeFlags = uint8_t(flags | cb->_nodeFlags);
490 _any._reserved0 = 0;
491 _any._reserved1 = 0;
492 _position = 0;
493 _userDataU64 = 0;
494 _passData = nullptr;
495 _inlineComment = nullptr;
496 }
497
498 //! \}
499
500 //! \name Accessors
501 //! \{
502
503 //! Casts this node to `T*`.
504 template<typename T>
505 inline T* as() noexcept { return static_cast<T*>(this); }
506 //! Casts this node to `const T*`.
507 template<typename T>
508 inline const T* as() const noexcept { return static_cast<const T*>(this); }
509
510 //! Returns previous node or `nullptr` if this node is either first or not
511 //! part of Builder/Compiler node-list.
512 inline BaseNode* prev() const noexcept { return _prev; }
513 //! Returns next node or `nullptr` if this node is either last or not part
514 //! of Builder/Compiler node-list.
515 inline BaseNode* next() const noexcept { return _next; }
516
517 //! Returns the type of the node, see `NodeType`.
518 inline uint32_t type() const noexcept { return _any._nodeType; }
519
520 //! Sets the type of the node, see `NodeType` (internal).
521 //!
522 //! \remarks You should never set a type of a node to anything else than the
523 //! initial value. This function is only provided for users that use custom
524 //! nodes and need to change the type either during construction or later.
525 inline void setType(uint32_t type) noexcept { _any._nodeType = uint8_t(type); }
526
527 //! Tests whether this node is either `InstNode` or extends it.
528 inline bool isInst() const noexcept { return hasFlag(kFlagActsAsInst); }
529 //! Tests whether this node is `SectionNode`.
530 inline bool isSection() const noexcept { return type() == kNodeSection; }
531 //! Tests whether this node is either `LabelNode` or extends it.
532 inline bool isLabel() const noexcept { return hasFlag(kFlagActsAsLabel); }
533 //! Tests whether this node is `AlignNode`.
534 inline bool isAlign() const noexcept { return type() == kNodeAlign; }
535 //! Tests whether this node is `EmbedDataNode`.
536 inline bool isEmbedData() const noexcept { return type() == kNodeEmbedData; }
537 //! Tests whether this node is `EmbedLabelNode`.
538 inline bool isEmbedLabel() const noexcept { return type() == kNodeEmbedLabel; }
539 //! Tests whether this node is `EmbedLabelDeltaNode`.
540 inline bool isEmbedLabelDelta() const noexcept { return type() == kNodeEmbedLabelDelta; }
541 //! Tests whether this node is `ConstPoolNode`.
542 inline bool isConstPool() const noexcept { return type() == kNodeConstPool; }
543 //! Tests whether this node is `CommentNode`.
544 inline bool isComment() const noexcept { return type() == kNodeComment; }
545 //! Tests whether this node is `SentinelNode`.
546 inline bool isSentinel() const noexcept { return type() == kNodeSentinel; }
547
548 //! Tests whether this node is `FuncNode`.
549 inline bool isFunc() const noexcept { return type() == kNodeFunc; }
550 //! Tests whether this node is `FuncRetNode`.
551 inline bool isFuncRet() const noexcept { return type() == kNodeFuncRet; }
552 //! Tests whether this node is `FuncCallNode`.
553 inline bool isFuncCall() const noexcept { return type() == kNodeFuncCall; }
554
555 //! Returns the node flags, see \ref Flags.
556 inline uint32_t flags() const noexcept { return _any._nodeFlags; }
557 //! Tests whether the node has the given `flag` set.
558 inline bool hasFlag(uint32_t flag) const noexcept { return (uint32_t(_any._nodeFlags) & flag) != 0; }
559 //! Replaces node flags with `flags`.
560 inline void setFlags(uint32_t flags) noexcept { _any._nodeFlags = uint8_t(flags); }
561 //! Adds the given `flags` to node flags.
562 inline void addFlags(uint32_t flags) noexcept { _any._nodeFlags = uint8_t(_any._nodeFlags | flags); }
563 //! Clears the given `flags` from node flags.
564 inline void clearFlags(uint32_t flags) noexcept { _any._nodeFlags = uint8_t(_any._nodeFlags & (flags ^ 0xFF)); }
565
566 //! Tests whether the node is code that can be executed.
567 inline bool isCode() const noexcept { return hasFlag(kFlagIsCode); }
568 //! Tests whether the node is data that cannot be executed.
569 inline bool isData() const noexcept { return hasFlag(kFlagIsData); }
570 //! Tests whether the node is informative only (is never encoded like comment, etc...).
571 inline bool isInformative() const noexcept { return hasFlag(kFlagIsInformative); }
572 //! Tests whether the node is removable if it's in an unreachable code block.
573 inline bool isRemovable() const noexcept { return hasFlag(kFlagIsRemovable); }
574 //! Tests whether the node has no effect when executed (label, .align, nop, ...).
575 inline bool hasNoEffect() const noexcept { return hasFlag(kFlagHasNoEffect); }
576 //! Tests whether the node is part of the code.
577 inline bool isActive() const noexcept { return hasFlag(kFlagIsActive); }
578
579 //! Tests whether the node has a position assigned.
580 //!
581 //! \remarks Returns `true` if node position is non-zero.
582 inline bool hasPosition() const noexcept { return _position != 0; }
583 //! Returns node position.
584 inline uint32_t position() const noexcept { return _position; }
585 //! Sets node position.
586 //!
587 //! Node position is a 32-bit unsigned integer that is used by Compiler to
588 //! track where the node is relatively to the start of the function. It doesn't
589 //! describe a byte position in a binary, instead it's just a pseudo position
590 //! used by liveness analysis and other tools around Compiler.
591 //!
592 //! If you don't use Compiler then you may use `position()` and `setPosition()`
593 //! freely for your own purposes if the 32-bit value limit is okay for you.
594 inline void setPosition(uint32_t position) noexcept { _position = position; }
595
596 //! Returns user data casted to `T*`.
597 //!
598 //! User data is decicated to be used only by AsmJit users and not touched
599 //! by the library. The data has a pointer size so you can either store a
600 //! pointer or `intptr_t` value through `setUserDataAsIntPtr()`.
601 template<typename T>
602 inline T* userDataAsPtr() const noexcept { return static_cast<T*>(_userDataPtr); }
603 //! Returns user data casted to `int64_t`.
604 inline int64_t userDataAsInt64() const noexcept { return int64_t(_userDataU64); }
605 //! Returns user data casted to `uint64_t`.
606 inline uint64_t userDataAsUInt64() const noexcept { return _userDataU64; }
607
608 //! Sets user data to `data`.
609 template<typename T>
610 inline void setUserDataAsPtr(T* data) noexcept { _userDataPtr = static_cast<void*>(data); }
611 //! Sets used data to the given 64-bit signed `value`.
612 inline void setUserDataAsInt64(int64_t value) noexcept { _userDataU64 = uint64_t(value); }
613 //! Sets used data to the given 64-bit unsigned `value`.
614 inline void setUserDataAsUInt64(uint64_t value) noexcept { _userDataU64 = value; }
615
616 //! Resets user data to zero / nullptr.
617 inline void resetUserData() noexcept { _userDataU64 = 0; }
618
619 //! Tests whether the node has an associated pass data.
620 inline bool hasPassData() const noexcept { return _passData != nullptr; }
621 //! Returns the node pass data - data used during processing & transformations.
622 template<typename T>
623 inline T* passData() const noexcept { return (T*)_passData; }
624 //! Sets the node pass data to `data`.
625 template<typename T>
626 inline void setPassData(T* data) noexcept { _passData = (void*)data; }
627 //! Resets the node pass data to nullptr.
628 inline void resetPassData() noexcept { _passData = nullptr; }
629
630 //! Tests whether the node has an inline comment/annotation.
631 inline bool hasInlineComment() const noexcept { return _inlineComment != nullptr; }
632 //! Returns an inline comment/annotation string.
633 inline const char* inlineComment() const noexcept { return _inlineComment; }
634 //! Sets an inline comment/annotation string to `s`.
635 inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
636 //! Resets an inline comment/annotation string to nullptr.
637 inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
638
639 //! \}
640 };
641
642 // ============================================================================
643 // [asmjit::InstNode]
644 // ============================================================================
645
646 //! Instruction node.
647 //!
648 //! Wraps an instruction with its options and operands.
649 class InstNode : public BaseNode {
650 public:
651 ASMJIT_NONCOPYABLE(InstNode)
652
653 enum : uint32_t {
654 //! Count of embedded operands per `InstNode` that are always allocated as
655 //! a part of the instruction. Minimum embedded operands is 4, but in 32-bit
656 //! more pointers are smaller and we can embed 5. The rest (up to 6 operands)
657 //! is always stored in `InstExNode`.
658 kBaseOpCapacity = uint32_t((128 - sizeof(BaseNode) - sizeof(BaseInst)) / sizeof(Operand_))
659 };
660
661 //! Base instruction data.
662 BaseInst _baseInst;
663 //! First 4 or 5 operands (indexed from 0).
664 Operand_ _opArray[kBaseOpCapacity];
665
666 //! \name Construction & Destruction
667 //! \{
668
669 //! Creates a new `InstNode` instance.
670 ASMJIT_INLINE InstNode(BaseBuilder* cb, uint32_t instId, uint32_t options, uint32_t opCount, uint32_t opCapacity = kBaseOpCapacity) noexcept
671 : BaseNode(cb, kNodeInst, kFlagIsCode | kFlagIsRemovable | kFlagActsAsInst),
672 _baseInst(instId, options) {
673 _inst._opCapacity = uint8_t(opCapacity);
674 _inst._opCount = uint8_t(opCount);
675 }
676
677 //! Reset all built-in operands, including `extraReg`.
678 inline void _resetOps() noexcept {
679 _baseInst.resetExtraReg();
680 for (uint32_t i = 0, count = opCapacity(); i < count; i++)
681 _opArray[i].reset();
682 }
683
684 //! \}
685
686 //! \name Accessors
687 //! \{
688
689 inline BaseInst& baseInst() noexcept { return _baseInst; }
690 inline const BaseInst& baseInst() const noexcept { return _baseInst; }
691
692 //! Returns the instruction id, see `BaseInst::Id`.
693 inline uint32_t id() const noexcept { return _baseInst.id(); }
694 //! Sets the instruction id to `id`, see `BaseInst::Id`.
695 inline void setId(uint32_t id) noexcept { _baseInst.setId(id); }
696
697 //! Returns instruction options.
698 inline uint32_t instOptions() const noexcept { return _baseInst.options(); }
699 //! Sets instruction options.
700 inline void setInstOptions(uint32_t options) noexcept { _baseInst.setOptions(options); }
701 //! Adds instruction options.
702 inline void addInstOptions(uint32_t options) noexcept { _baseInst.addOptions(options); }
703 //! Clears instruction options.
704 inline void clearInstOptions(uint32_t options) noexcept { _baseInst.clearOptions(options); }
705
706 //! Tests whether the node has an extra register operand.
707 inline bool hasExtraReg() const noexcept { return _baseInst.hasExtraReg(); }
708 //! Returns extra register operand.
709 inline RegOnly& extraReg() noexcept { return _baseInst.extraReg(); }
710 //! \overload
711 inline const RegOnly& extraReg() const noexcept { return _baseInst.extraReg(); }
712 //! Sets extra register operand to `reg`.
713 inline void setExtraReg(const BaseReg& reg) noexcept { _baseInst.setExtraReg(reg); }
714 //! Sets extra register operand to `reg`.
715 inline void setExtraReg(const RegOnly& reg) noexcept { _baseInst.setExtraReg(reg); }
716 //! Resets extra register operand.
717 inline void resetExtraReg() noexcept { _baseInst.resetExtraReg(); }
718
719 //! Returns operands count.
720 inline uint32_t opCount() const noexcept { return _inst._opCount; }
721 //! Returns operands capacity.
722 inline uint32_t opCapacity() const noexcept { return _inst._opCapacity; }
723
724 //! Sets operands count.
725 inline void setOpCount(uint32_t opCount) noexcept { _inst._opCount = uint8_t(opCount); }
726
727 //! Returns operands array.
728 inline Operand* operands() noexcept { return (Operand*)_opArray; }
729 //! Returns operands array (const).
730 inline const Operand* operands() const noexcept { return (const Operand*)_opArray; }
731
732 inline Operand& opType(uint32_t index) noexcept {
733 ASMJIT_ASSERT(index < opCapacity());
734 return _opArray[index].as<Operand>();
735 }
736
737 inline const Operand& opType(uint32_t index) const noexcept {
738 ASMJIT_ASSERT(index < opCapacity());
739 return _opArray[index].as<Operand>();
740 }
741
742 inline void setOp(uint32_t index, const Operand_& op) noexcept {
743 ASMJIT_ASSERT(index < opCapacity());
744 _opArray[index].copyFrom(op);
745 }
746
747 inline void resetOp(uint32_t index) noexcept {
748 ASMJIT_ASSERT(index < opCapacity());
749 _opArray[index].reset();
750 }
751
752 //! \}
753
754 //! \name Utilities
755 //! \{
756
757 inline bool hasOpType(uint32_t opType) const noexcept {
758 for (uint32_t i = 0, count = opCount(); i < count; i++)
759 if (_opArray[i].opType() == opType)
760 return true;
761 return false;
762 }
763
764 inline bool hasRegOp() const noexcept { return hasOpType(Operand::kOpReg); }
765 inline bool hasMemOp() const noexcept { return hasOpType(Operand::kOpMem); }
766 inline bool hasImmOp() const noexcept { return hasOpType(Operand::kOpImm); }
767 inline bool hasLabelOp() const noexcept { return hasOpType(Operand::kOpLabel); }
768
769 inline uint32_t indexOfOpType(uint32_t opType) const noexcept {
770 uint32_t i = 0;
771 uint32_t count = opCount();
772
773 while (i < count) {
774 if (_opArray[i].opType() == opType)
775 break;
776 i++;
777 }
778
779 return i;
780 }
781
782 inline uint32_t indexOfMemOp() const noexcept { return indexOfOpType(Operand::kOpMem); }
783 inline uint32_t indexOfImmOp() const noexcept { return indexOfOpType(Operand::kOpImm); }
784 inline uint32_t indexOfLabelOp() const noexcept { return indexOfOpType(Operand::kOpLabel); }
785
786 //! \}
787
788 //! \name Rewriting
789 //! \{
790
791 inline uint32_t* _getRewriteArray() noexcept { return &_baseInst._extraReg._id; }
792 inline const uint32_t* _getRewriteArray() const noexcept { return &_baseInst._extraReg._id; }
793
794 ASMJIT_INLINE uint32_t getRewriteIndex(const uint32_t* id) const noexcept {
795 const uint32_t* array = _getRewriteArray();
796 ASMJIT_ASSERT(array <= id);
797
798 size_t index = (size_t)(id - array);
799 ASMJIT_ASSERT(index < 32);
800
801 return uint32_t(index);
802 }
803
804 ASMJIT_INLINE void rewriteIdAtIndex(uint32_t index, uint32_t id) noexcept {
805 uint32_t* array = _getRewriteArray();
806 array[index] = id;
807 }
808
809 //! \}
810
811 //! \name Static Functions
812 //! \{
813
814 static inline uint32_t capacityOfOpCount(uint32_t opCount) noexcept {
815 return opCount <= kBaseOpCapacity ? kBaseOpCapacity : Globals::kMaxOpCount;
816 }
817
818 static inline size_t nodeSizeOfOpCapacity(uint32_t opCapacity) noexcept {
819 size_t base = sizeof(InstNode) - kBaseOpCapacity * sizeof(Operand);
820 return base + opCapacity * sizeof(Operand);
821 }
822
823 //! \}
824 };
825
826 // ============================================================================
827 // [asmjit::InstExNode]
828 // ============================================================================
829
830 //! Instruction node with maximum number of operands..
831 //!
832 //! This node is created automatically by Builder/Compiler in case that the
833 //! required number of operands exceeds the default capacity of `InstNode`.
834 class InstExNode : public InstNode {
835 public:
836 ASMJIT_NONCOPYABLE(InstExNode)
837
838 //! Continued `_opArray[]` to hold up to `kMaxOpCount` operands.
839 Operand_ _opArrayEx[Globals::kMaxOpCount - kBaseOpCapacity];
840
841 //! \name Construction & Destruction
842 //! \{
843
844 //! Creates a new `InstExNode` instance.
845 inline InstExNode(BaseBuilder* cb, uint32_t instId, uint32_t options, uint32_t opCapacity = Globals::kMaxOpCount) noexcept
846 : InstNode(cb, instId, options, opCapacity) {}
847
848 //! \}
849 };
850
851 // ============================================================================
852 // [asmjit::SectionNode]
853 // ============================================================================
854
855 //! Section node.
856 class SectionNode : public BaseNode {
857 public:
858 ASMJIT_NONCOPYABLE(SectionNode)
859
860 //! Section id.
861 uint32_t _id;
862
863 //! Next section node that follows this section.
864 //!
865 //! This link is only valid when the section is active (is part of the code)
866 //! and when `Builder::hasDirtySectionLinks()` returns `false`. If you intend
867 //! to use this field you should always call `Builder::updateSectionLinks()`
868 //! before you do so.
869 SectionNode* _nextSection;
870
871 //! \name Construction & Destruction
872 //! \{
873
874 //! Creates a new `SectionNode` instance.
875 inline SectionNode(BaseBuilder* cb, uint32_t id = 0) noexcept
876 : BaseNode(cb, kNodeSection, kFlagHasNoEffect),
877 _id(id),
878 _nextSection(nullptr) {}
879
880 //! \}
881
882 //! \name Accessors
883 //! \{
884
885 //! Returns the section id.
886 inline uint32_t id() const noexcept { return _id; }
887
888 //! \}
889 };
890
891 // ============================================================================
892 // [asmjit::LabelNode]
893 // ============================================================================
894
895 //! Label node.
896 class LabelNode : public BaseNode {
897 public:
898 ASMJIT_NONCOPYABLE(LabelNode)
899
900 uint32_t _id;
901
902 //! \name Construction & Destruction
903 //! \{
904
905 //! Creates a new `LabelNode` instance.
906 inline LabelNode(BaseBuilder* cb, uint32_t id = 0) noexcept
907 : BaseNode(cb, kNodeLabel, kFlagHasNoEffect | kFlagActsAsLabel),
908 _id(id) {}
909
910 //! \}
911
912 //! \name Accessors
913 //! \{
914
915 //! Returns the id of the label.
916 inline uint32_t id() const noexcept { return _id; }
917 //! Returns the label as `Label` operand.
918 inline Label label() const noexcept { return Label(_id); }
919
920 //! \}
921 };
922
923 // ============================================================================
924 // [asmjit::AlignNode]
925 // ============================================================================
926
927 //! Align directive (BaseBuilder).
928 //!
929 //! Wraps `.align` directive.
930 class AlignNode : public BaseNode {
931 public:
932 ASMJIT_NONCOPYABLE(AlignNode)
933
934 //! Align mode, see `AlignMode`.
935 uint32_t _alignMode;
936 //! Alignment (in bytes).
937 uint32_t _alignment;
938
939 //! \name Construction & Destruction
940 //! \{
941
942 //! Creates a new `AlignNode` instance.
943 inline AlignNode(BaseBuilder* cb, uint32_t alignMode, uint32_t alignment) noexcept
944 : BaseNode(cb, kNodeAlign, kFlagIsCode | kFlagHasNoEffect),
945 _alignMode(alignMode),
946 _alignment(alignment) {}
947
948 //! \}
949
950 //! \name Accessors
951 //! \{
952
953 //! Returns align mode.
954 inline uint32_t alignMode() const noexcept { return _alignMode; }
955 //! Sets align mode to `alignMode`.
956 inline void setAlignMode(uint32_t alignMode) noexcept { _alignMode = alignMode; }
957
958 //! Returns align offset in bytes.
959 inline uint32_t alignment() const noexcept { return _alignment; }
960 //! Sets align offset in bytes to `offset`.
961 inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
962
963 //! \}
964 };
965
966 // ============================================================================
967 // [asmjit::EmbedDataNode]
968 // ============================================================================
969
970 //! Embed data node.
971 //!
972 //! Wraps `.data` directive. The node contains data that will be placed at the
973 //! node's position in the assembler stream. The data is considered to be RAW;
974 //! no analysis nor byte-order conversion is performed on RAW data.
975 class EmbedDataNode : public BaseNode {
976 public:
977 ASMJIT_NONCOPYABLE(EmbedDataNode)
978
979 enum : uint32_t {
980 kInlineBufferSize = uint32_t(64 - sizeof(BaseNode) - 4)
981 };
982
983 union {
984 struct {
985 //! Embedded data buffer.
986 uint8_t _buf[kInlineBufferSize];
987 //! Size of the data.
988 uint32_t _size;
989 };
990 struct {
991 //! Pointer to external data.
992 uint8_t* _externalPtr;
993 };
994 };
995
996 //! \name Construction & Destruction
997 //! \{
998
999 //! Creates a new `EmbedDataNode` instance.
1000 inline EmbedDataNode(BaseBuilder* cb, void* data, uint32_t size) noexcept
1001 : BaseNode(cb, kNodeEmbedData, kFlagIsData) {
1002
1003 if (size <= kInlineBufferSize) {
1004 if (data)
1005 memcpy(_buf, data, size);
1006 }
1007 else {
1008 _externalPtr = static_cast<uint8_t*>(data);
1009 }
1010 _size = size;
1011 }
1012
1013 //! \}
1014
1015 //! \name Accessors
1016 //! \{
1017
1018 //! Returns pointer to the data.
1019 inline uint8_t* data() const noexcept { return _size <= kInlineBufferSize ? const_cast<uint8_t*>(_buf) : _externalPtr; }
1020 //! Returns size of the data.
1021 inline uint32_t size() const noexcept { return _size; }
1022
1023 //! \}
1024 };
1025
1026 // ============================================================================
1027 // [asmjit::EmbedLabelNode]
1028 // ============================================================================
1029
1030 //! Label data node.
1031 class EmbedLabelNode : public BaseNode {
1032 public:
1033 ASMJIT_NONCOPYABLE(EmbedLabelNode)
1034
1035 uint32_t _id;
1036
1037 //! \name Construction & Destruction
1038 //! \{
1039
1040 //! Creates a new `EmbedLabelNode` instance.
1041 inline EmbedLabelNode(BaseBuilder* cb, uint32_t id = 0) noexcept
1042 : BaseNode(cb, kNodeEmbedLabel, kFlagIsData),
1043 _id(id) {}
1044
1045 //! \}
1046
1047 //! \name Accessors
1048 //! \{
1049
1050 //! Returns the id of the label.
1051 inline uint32_t id() const noexcept { return _id; }
1052 //! Sets the label id (use with caution, improper use can break a lot of things).
1053 inline void setId(uint32_t id) noexcept { _id = id; }
1054
1055 //! Returns the label as `Label` operand.
1056 inline Label label() const noexcept { return Label(_id); }
1057 //! Sets the label id from `label` operand.
1058 inline void setLabel(const Label& label) noexcept { setId(label.id()); }
1059
1060 //! \}
1061 };
1062
1063 // ============================================================================
1064 // [asmjit::EmbedLabelDeltaNode]
1065 // ============================================================================
1066
1067 //! Label data node.
1068 class EmbedLabelDeltaNode : public BaseNode {
1069 public:
1070 ASMJIT_NONCOPYABLE(EmbedLabelDeltaNode)
1071
1072 uint32_t _id;
1073 uint32_t _baseId;
1074 uint32_t _dataSize;
1075
1076 //! \name Construction & Destruction
1077 //! \{
1078
1079 //! Creates a new `EmbedLabelDeltaNode` instance.
1080 inline EmbedLabelDeltaNode(BaseBuilder* cb, uint32_t id = 0, uint32_t baseId = 0, uint32_t dataSize = 0) noexcept
1081 : BaseNode(cb, kNodeEmbedLabelDelta, kFlagIsData),
1082 _id(id),
1083 _baseId(baseId),
1084 _dataSize(dataSize) {}
1085
1086 //! \}
1087
1088 //! \name Accessors
1089 //! \{
1090
1091 //! Returns the id of the label.
1092 inline uint32_t id() const noexcept { return _id; }
1093 //! Sets the label id.
1094 inline void setId(uint32_t id) noexcept { _id = id; }
1095 //! Returns the label as `Label` operand.
1096 inline Label label() const noexcept { return Label(_id); }
1097 //! Sets the label id from `label` operand.
1098 inline void setLabel(const Label& label) noexcept { setId(label.id()); }
1099
1100 //! Returns the id of the base label.
1101 inline uint32_t baseId() const noexcept { return _baseId; }
1102 //! Sets the base label id.
1103 inline void setBaseId(uint32_t baseId) noexcept { _baseId = baseId; }
1104 //! Returns the base label as `Label` operand.
1105 inline Label baseLabel() const noexcept { return Label(_baseId); }
1106 //! Sets the base label id from `label` operand.
1107 inline void setBaseLabel(const Label& baseLabel) noexcept { setBaseId(baseLabel.id()); }
1108
1109 inline uint32_t dataSize() const noexcept { return _dataSize; }
1110 inline void setDataSize(uint32_t dataSize) noexcept { _dataSize = dataSize; }
1111
1112 //! \}
1113 };
1114
1115 // ============================================================================
1116 // [asmjit::ConstPoolNode]
1117 // ============================================================================
1118
1119 //! A node that wraps `ConstPool`.
1120 class ConstPoolNode : public LabelNode {
1121 public:
1122 ASMJIT_NONCOPYABLE(ConstPoolNode)
1123
1124 ConstPool _constPool;
1125
1126 //! \name Construction & Destruction
1127 //! \{
1128
1129 //! Creates a new `ConstPoolNode` instance.
1130 inline ConstPoolNode(BaseBuilder* cb, uint32_t id = 0) noexcept
1131 : LabelNode(cb, id),
1132 _constPool(&cb->_codeZone) {
1133
1134 setType(kNodeConstPool);
1135 addFlags(kFlagIsData);
1136 clearFlags(kFlagIsCode | kFlagHasNoEffect);
1137 }
1138
1139 //! \}
1140
1141 //! \name Accessors
1142 //! \{
1143
1144 //! Tests whether the constant-pool is empty.
1145 inline bool empty() const noexcept { return _constPool.empty(); }
1146 //! Returns the size of the constant-pool in bytes.
1147 inline size_t size() const noexcept { return _constPool.size(); }
1148 //! Returns minimum alignment.
1149 inline size_t alignment() const noexcept { return _constPool.alignment(); }
1150
1151 //! Returns the wrapped `ConstPool` instance.
1152 inline ConstPool& constPool() noexcept { return _constPool; }
1153 //! Returns the wrapped `ConstPool` instance (const).
1154 inline const ConstPool& constPool() const noexcept { return _constPool; }
1155
1156 //! \}
1157
1158 //! \name Utilities
1159 //! \{
1160
1161 //! See `ConstPool::add()`.
1162 inline Error add(const void* data, size_t size, size_t& dstOffset) noexcept {
1163 return _constPool.add(data, size, dstOffset);
1164 }
1165
1166 //! \}
1167 };
1168
1169 // ============================================================================
1170 // [asmjit::CommentNode]
1171 // ============================================================================
1172
1173 //! Comment node.
1174 class CommentNode : public BaseNode {
1175 public:
1176 ASMJIT_NONCOPYABLE(CommentNode)
1177
1178 //! \name Construction & Destruction
1179 //! \{
1180
1181 //! Creates a new `CommentNode` instance.
1182 inline CommentNode(BaseBuilder* cb, const char* comment) noexcept
1183 : BaseNode(cb, kNodeComment, kFlagIsInformative | kFlagHasNoEffect | kFlagIsRemovable) {
1184 _inlineComment = comment;
1185 }
1186
1187 //! \}
1188 };
1189
1190 // ============================================================================
1191 // [asmjit::SentinelNode]
1192 // ============================================================================
1193
1194 //! Sentinel node.
1195 //!
1196 //! Sentinel is a marker that is completely ignored by the code builder. It's
1197 //! used to remember a position in a code as it never gets removed by any pass.
1198 class SentinelNode : public BaseNode {
1199 public:
1200 ASMJIT_NONCOPYABLE(SentinelNode)
1201
1202 //! Type of the sentinel (purery informative purpose).
1203 enum SentinelType : uint32_t {
1204 kSentinelUnknown = 0u,
1205 kSentinelFuncEnd = 1u
1206 };
1207
1208 //! \name Construction & Destruction
1209 //! \{
1210
1211 //! Creates a new `SentinelNode` instance.
1212 inline SentinelNode(BaseBuilder* cb, uint32_t sentinelType = kSentinelUnknown) noexcept
1213 : BaseNode(cb, kNodeSentinel, kFlagIsInformative | kFlagHasNoEffect) {
1214
1215 _sentinel._sentinelType = uint8_t(sentinelType);
1216 }
1217
1218 //! \}
1219
1220 //! \name Accessors
1221 //! \{
1222
1223 inline uint32_t sentinelType() const noexcept { return _sentinel._sentinelType; }
1224 inline void setSentinelType(uint32_t type) noexcept { _sentinel._sentinelType = uint8_t(type); }
1225
1226 //! \}
1227 };
1228
1229 // ============================================================================
1230 // [asmjit::Pass]
1231 // ============================================================================
1232
1233 //! Pass can be used to implement code transformations, analysis, and lowering.
1234 class ASMJIT_VIRTAPI Pass {
1235 public:
1236 ASMJIT_BASE_CLASS(Pass)
1237 ASMJIT_NONCOPYABLE(Pass)
1238
1239 //! BaseBuilder this pass is assigned to.
1240 BaseBuilder* _cb;
1241 //! Name of the pass.
1242 const char* _name;
1243
1244 //! \name Construction & Destruction
1245 //! \{
1246
1247 ASMJIT_API Pass(const char* name) noexcept;
1248 ASMJIT_API virtual ~Pass() noexcept;
1249
1250 //! \}
1251
1252 //! \name Accessors
1253 //! \{
1254
1255 inline const BaseBuilder* cb() const noexcept { return _cb; }
1256 inline const char* name() const noexcept { return _name; }
1257
1258 //! \}
1259
1260 //! \name Pass Interface
1261 //! \{
1262
1263 //! Processes the code stored in Builder or Compiler.
1264 //!
1265 //! This is the only function that is called by the `BaseBuilder` to process
1266 //! the code. It passes `zone`, which will be reset after the `run()` finishes.
1267 virtual Error run(Zone* zone, Logger* logger) noexcept = 0;
1268
1269 //! \}
1270 };
1271
1272 //! \}
1273
1274 ASMJIT_END_NAMESPACE
1275
1276 #endif // !ASMJIT_NO_BUILDER
1277 #endif // _ASMJIT_CORE_BUILDER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/arch.h"
8 #include "../core/func.h"
9 #include "../core/type.h"
10
11 #ifdef ASMJIT_BUILD_X86
12 #include "../x86/x86callconv_p.h"
13 #endif
14
15 #ifdef ASMJIT_BUILD_ARM
16 #include "../arm/armcallconv_p.h"
17 #endif
18
19 ASMJIT_BEGIN_NAMESPACE
20
21 // ============================================================================
22 // [asmjit::CallConv - Init / Reset]
23 // ============================================================================
24
25 ASMJIT_FAVOR_SIZE Error CallConv::init(uint32_t ccId) noexcept {
26 reset();
27
28 #ifdef ASMJIT_BUILD_X86
29 if (CallConv::isX86Family(ccId))
30 return x86::CallConvInternal::init(*this, ccId);
31 #endif
32
33 #ifdef ASMJIT_BUILD_ARM
34 if (CallConv::isArmFamily(ccId))
35 return arm::CallConvInternal::init(*this, ccId);
36 #endif
37
38 return DebugUtils::errored(kErrorInvalidArgument);
39 }
40
41 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_CALLCONV_H
7 #define _ASMJIT_CORE_CALLCONV_H
8
9 #include "../core/arch.h"
10 #include "../core/operand.h"
11 #include "../core/support.h"
12
13 ASMJIT_BEGIN_NAMESPACE
14
15 //! \addtogroup asmjit_func
16 //! \{
17
18 // ============================================================================
19 // [asmjit::CallConv]
20 // ============================================================================
21
22 //! Function calling convention.
23 //!
24 //! Function calling convention is a scheme that defines how function parameters
25 //! are passed and how function returns its result. AsmJit defines a variety of
26 //! architecture and OS specific calling conventions and also provides a compile
27 //! time detection to make the code-generation easier.
28 struct CallConv {
29 //! Calling convention id, see `Id`.
30 uint8_t _id;
31 //! Architecture id (see `ArchInfo::Id`).
32 uint8_t _archId;
33 //! Register assignment strategy.
34 uint8_t _strategy;
35 //! Flags.
36 uint8_t _flags;
37
38 //! Red zone size (AMD64 == 128 bytes).
39 uint8_t _redZoneSize;
40 //! Spill zone size (WIN64 == 32 bytes).
41 uint8_t _spillZoneSize;
42 //! Natural stack alignment as defined by OS/ABI.
43 uint8_t _naturalStackAlignment;
44 uint8_t _reserved[1];
45
46 //! Mask of all passed registers, per group.
47 uint32_t _passedRegs[BaseReg::kGroupVirt];
48 //! Mask of all preserved registers, per group.
49 uint32_t _preservedRegs[BaseReg::kGroupVirt];
50
51 //! Internal limits of AsmJit's CallConv.
52 enum Limits : uint32_t {
53 kMaxRegArgsPerGroup = 16
54 };
55
56 //! Passed registers' order.
57 union RegOrder {
58 //! Passed registers, ordered.
59 uint8_t id[kMaxRegArgsPerGroup];
60 uint32_t packed[(kMaxRegArgsPerGroup + 3) / 4];
61 };
62
63 //! Passed registers' order, per group.
64 RegOrder _passedOrder[BaseReg::kGroupVirt];
65
66 //! Calling convention id.
67 enum Id : uint32_t {
68 //! None or invalid (can't be used).
69 kIdNone = 0,
70
71 // ------------------------------------------------------------------------
72 // [Universal]
73 // ------------------------------------------------------------------------
74
75 // TODO: To make this possible we need to know target ARCH and ABI.
76
77 /*
78
79 // Universal calling conventions are applicable to any target and are
80 // converted to target dependent conventions at runtime. The purpose of
81 // these conventions is to make using functions less target dependent.
82
83 kIdCDecl = 1,
84 kIdStdCall = 2,
85 kIdFastCall = 3,
86
87 //! AsmJit specific calling convention designed for calling functions
88 //! inside a multimedia code that don't use many registers internally,
89 //! but are long enough to be called and not inlined. These functions are
90 //! usually used to calculate trigonometric functions, logarithms, etc...
91 kIdLightCall2 = 10,
92 kIdLightCall3 = 11,
93 kIdLightCall4 = 12,
94 */
95
96 // ------------------------------------------------------------------------
97 // [X86]
98 // ------------------------------------------------------------------------
99
100 //! X86 `__cdecl` calling convention (used by C runtime and libraries).
101 kIdX86CDecl = 16,
102 //! X86 `__stdcall` calling convention (used mostly by WinAPI).
103 kIdX86StdCall = 17,
104 //! X86 `__thiscall` calling convention (MSVC/Intel).
105 kIdX86MsThisCall = 18,
106 //! X86 `__fastcall` convention (MSVC/Intel).
107 kIdX86MsFastCall = 19,
108 //! X86 `__fastcall` convention (GCC and Clang).
109 kIdX86GccFastCall = 20,
110 //! X86 `regparm(1)` convention (GCC and Clang).
111 kIdX86GccRegParm1 = 21,
112 //! X86 `regparm(2)` convention (GCC and Clang).
113 kIdX86GccRegParm2 = 22,
114 //! X86 `regparm(3)` convention (GCC and Clang).
115 kIdX86GccRegParm3 = 23,
116
117 kIdX86LightCall2 = 29,
118 kIdX86LightCall3 = 30,
119 kIdX86LightCall4 = 31,
120
121 //! X64 calling convention - WIN64-ABI.
122 kIdX86Win64 = 32,
123 //! X64 calling convention - SystemV / AMD64-ABI.
124 kIdX86SysV64 = 33,
125
126 kIdX64LightCall2 = 45,
127 kIdX64LightCall3 = 46,
128 kIdX64LightCall4 = 47,
129
130 // ------------------------------------------------------------------------
131 // [ARM]
132 // ------------------------------------------------------------------------
133
134 //! Legacy calling convention, floating point arguments are passed via GP registers.
135 kIdArm32SoftFP = 48,
136 //! Modern calling convention, uses VFP registers to pass floating point arguments.
137 kIdArm32HardFP = 49,
138
139 // ------------------------------------------------------------------------
140 // [Internal]
141 // ------------------------------------------------------------------------
142
143 //! \cond INTERNAL
144
145 _kIdX86Start = 16,
146 _kIdX86End = 31,
147
148 _kIdX64Start = 32,
149 _kIdX64End = 47,
150
151 _kIdArmStart = 48,
152 _kIdArmEnd = 49,
153
154 //! \endcond
155
156 // ------------------------------------------------------------------------
157 // [Host]
158 // ------------------------------------------------------------------------
159
160 #if defined(ASMJIT_DOCGEN)
161
162 //! Default calling convention based on the current C++ compiler's settings.
163 //!
164 //! \note This should be always the same as `kIdHostCDecl`, but some
165 //! compilers allow to override the default calling convention. Overriding
166 //! is not detected at the moment.
167 kIdHost = DETECTED_AT_COMPILE_TIME,
168
169 //! Default CDECL calling convention based on the current C++ compiler's settings.
170 kIdHostCDecl = DETECTED_AT_COMPILE_TIME,
171
172 //! Default STDCALL calling convention based on the current C++ compiler's settings.
173 //!
174 //! \note If not defined by the host then it's the same as `kIdHostCDecl`.
175 kIdHostStdCall = DETECTED_AT_COMPILE_TIME,
176
177 //! Compatibility for `__fastcall` calling convention.
178 //!
179 //! \note If not defined by the host then it's the same as `kIdHostCDecl`.
180 kIdHostFastCall = DETECTED_AT_COMPILE_TIME
181
182 #elif ASMJIT_ARCH_X86 == 32
183
184 kIdHost = kIdX86CDecl,
185 kIdHostCDecl = kIdX86CDecl,
186 kIdHostStdCall = kIdX86StdCall,
187
188 #if defined(_MSC_VER)
189 kIdHostFastCall = kIdX86MsFastCall,
190 #elif defined(__GNUC__)
191 kIdHostFastCall = kIdX86GccFastCall,
192 #else
193 kIdHostFastCall = kIdHost,
194 #endif
195
196 kIdHostLightCall2 = kIdX86LightCall2,
197 kIdHostLightCall3 = kIdX86LightCall3,
198 kIdHostLightCall4 = kIdX86LightCall4
199
200 #elif ASMJIT_ARCH_X86 == 64
201
202 #if defined(_WIN32)
203 kIdHost = kIdX86Win64,
204 #else
205 kIdHost = kIdX86SysV64,
206 #endif
207
208 kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
209 kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
210 kIdHostFastCall = kIdHost, // Doesn't exist, redirected to host.
211
212 kIdHostLightCall2 = kIdX64LightCall2,
213 kIdHostLightCall3 = kIdX64LightCall3,
214 kIdHostLightCall4 = kIdX64LightCall4
215
216 #elif ASMJIT_ARCH_ARM == 32
217
218 #if defined(__SOFTFP__)
219 kIdHost = kIdArm32SoftFP,
220 #else
221 kIdHost = kIdArm32HardFP,
222 #endif
223 // These don't exist on ARM.
224 kIdHostCDecl = kIdHost, // Doesn't exist, redirected to host.
225 kIdHostStdCall = kIdHost, // Doesn't exist, redirected to host.
226 kIdHostFastCall = kIdHost // Doesn't exist, redirected to host.
227
228 #else
229
230 kIdHost = kIdNone,
231 kIdHostCDecl = kIdHost,
232 kIdHostStdCall = kIdHost,
233 kIdHostFastCall = kIdHost
234
235 #endif
236 };
237
238 //! Strategy used to assign registers to function arguments.
239 //!
240 //! This is AsmJit specific. It basically describes how AsmJit should convert
241 //! the function arguments defined by `FuncSignature` into register IDs and
242 //! stack offsets. The default strategy `kStrategyDefault` assigns registers
243 //! and then stack whereas `kStrategyWin64` strategy does register shadowing
244 //! as defined by WIN64 calling convention - it applies to 64-bit calling
245 //! conventions only.
246 enum Strategy : uint32_t {
247 kStrategyDefault = 0, //!< Default register assignment strategy.
248 kStrategyWin64 = 1 //!< WIN64 specific register assignment strategy.
249 };
250
251 //! Calling convention flags.
252 enum Flags : uint32_t {
253 kFlagCalleePopsStack = 0x01, //!< Callee is responsible for cleaning up the stack.
254 kFlagPassFloatsByVec = 0x02, //!< Pass F32 and F64 arguments by VEC128 register.
255 kFlagVectorCall = 0x04, //!< This is a '__vectorcall' calling convention.
256 kFlagIndirectVecArgs = 0x08 //!< Pass vector arguments indirectly (as a pointer).
257 };
258
259 //! \name Construction & Destruction
260 //! \{
261
262 ASMJIT_API Error init(uint32_t ccId) noexcept;
263
264 inline void reset() noexcept {
265 memset(this, 0, sizeof(*this));
266 memset(_passedOrder, 0xFF, sizeof(_passedOrder));
267 }
268
269 //! \}
270
271 //! \name Accessors
272 //! \{
273
274 //! Returns the calling convention id, see `Id`.
275 inline uint32_t id() const noexcept { return _id; }
276 //! Sets the calling convention id, see `Id`.
277 inline void setId(uint32_t id) noexcept { _id = uint8_t(id); }
278
279 //! Returns the calling function architecture id.
280 inline uint32_t archId() const noexcept { return _archId; }
281 //! Sets the calling function architecture id.
282 inline void setArchType(uint32_t archId) noexcept { _archId = uint8_t(archId); }
283
284 //! Returns the strategy used to assign registers to arguments, see `Strategy`.
285 inline uint32_t strategy() const noexcept { return _strategy; }
286 //! Sets the strategy used to assign registers to arguments, see `Strategy`.
287 inline void setStrategy(uint32_t strategy) noexcept { _strategy = uint8_t(strategy); }
288
289 //! Tests whether the calling convention has the given `flag` set.
290 inline bool hasFlag(uint32_t flag) const noexcept { return (uint32_t(_flags) & flag) != 0; }
291 //! Returns the calling convention flags, see `Flags`.
292 inline uint32_t flags() const noexcept { return _flags; }
293 //! Adds the calling convention flags, see `Flags`.
294 inline void setFlags(uint32_t flag) noexcept { _flags = uint8_t(flag); };
295 //! Adds the calling convention flags, see `Flags`.
296 inline void addFlags(uint32_t flags) noexcept { _flags = uint8_t(_flags | flags); };
297
298 //! Tests whether this calling convention specifies 'RedZone'.
299 inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
300 //! Tests whether this calling convention specifies 'SpillZone'.
301 inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
302
303 //! Returns size of 'RedZone'.
304 inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
305 //! Returns size of 'SpillZone'.
306 inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
307
308 //! Sets size of 'RedZone'.
309 inline void setRedZoneSize(uint32_t size) noexcept { _redZoneSize = uint8_t(size); }
310 //! Sets size of 'SpillZone'.
311 inline void setSpillZoneSize(uint32_t size) noexcept { _spillZoneSize = uint8_t(size); }
312
313 //! Returns a natural stack alignment.
314 inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
315 //! Sets a natural stack alignment.
316 //!
317 //! This function can be used to override the default stack alignment in case
318 //! that you know that it's alignment is different. For example it allows to
319 //! implement custom calling conventions that guarantee higher stack alignment.
320 inline void setNaturalStackAlignment(uint32_t value) noexcept { _naturalStackAlignment = uint8_t(value); }
321
322 inline const uint8_t* passedOrder(uint32_t group) const noexcept {
323 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
324 return _passedOrder[group].id;
325 }
326
327 inline uint32_t passedRegs(uint32_t group) const noexcept {
328 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
329 return _passedRegs[group];
330 }
331
332 inline void _setPassedPacked(uint32_t group, uint32_t p0, uint32_t p1, uint32_t p2, uint32_t p3) noexcept {
333 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
334
335 _passedOrder[group].packed[0] = p0;
336 _passedOrder[group].packed[1] = p1;
337 _passedOrder[group].packed[2] = p2;
338 _passedOrder[group].packed[3] = p3;
339 }
340
341 inline void setPassedToNone(uint32_t group) noexcept {
342 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
343
344 _setPassedPacked(group, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu);
345 _passedRegs[group] = 0u;
346 }
347
348 inline void setPassedOrder(uint32_t group, uint32_t a0, uint32_t a1 = 0xFF, uint32_t a2 = 0xFF, uint32_t a3 = 0xFF, uint32_t a4 = 0xFF, uint32_t a5 = 0xFF, uint32_t a6 = 0xFF, uint32_t a7 = 0xFF) noexcept {
349 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
350
351 // NOTE: This should always be called with all arguments known at compile time,
352 // so even if it looks scary it should be translated into few instructions.
353 _setPassedPacked(group, Support::bytepack32_4x8(a0, a1, a2, a3),
354 Support::bytepack32_4x8(a4, a5, a6, a7),
355 0xFFFFFFFFu,
356 0xFFFFFFFFu);
357
358 _passedRegs[group] = (a0 != 0xFF ? 1u << a0 : 0u) |
359 (a1 != 0xFF ? 1u << a1 : 0u) |
360 (a2 != 0xFF ? 1u << a2 : 0u) |
361 (a3 != 0xFF ? 1u << a3 : 0u) |
362 (a4 != 0xFF ? 1u << a4 : 0u) |
363 (a5 != 0xFF ? 1u << a5 : 0u) |
364 (a6 != 0xFF ? 1u << a6 : 0u) |
365 (a7 != 0xFF ? 1u << a7 : 0u) ;
366 }
367
368 inline uint32_t preservedRegs(uint32_t group) const noexcept {
369 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
370 return _preservedRegs[group];
371 }
372
373 inline void setPreservedRegs(uint32_t group, uint32_t regs) noexcept {
374 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
375 _preservedRegs[group] = regs;
376 }
377
378 //! \}
379
380 //! \name Static Functions
381 //! \{
382
383 static inline bool isX86Family(uint32_t ccId) noexcept { return ccId >= _kIdX86Start && ccId <= _kIdX64End; }
384 static inline bool isArmFamily(uint32_t ccId) noexcept { return ccId >= _kIdArmStart && ccId <= _kIdArmEnd; }
385
386 //! \}
387 };
388
389 //! \}
390
391 ASMJIT_END_NAMESPACE
392
393 #endif // _ASMJIT_CORE_CALLCONV_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_CODEBUFFERWRITER_P_H
7 #define _ASMJIT_CORE_CODEBUFFERWRITER_P_H
8
9 #include "../core/assembler.h"
10 #include "../core/support.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \cond INTERNAL
15 //! \addtogroup asmjit_core
16 //! \{
17
18 // ============================================================================
19 // [asmjit::CodeBufferWriter]
20 // ============================================================================
21
22 //! Helper that is used to write into a `CodeBuffer` held by `BaseAssembler`.
23 class CodeBufferWriter {
24 public:
25 uint8_t* _cursor;
26
27 ASMJIT_INLINE explicit CodeBufferWriter(BaseAssembler* a) noexcept
28 : _cursor(a->_bufferPtr) {}
29
30 ASMJIT_INLINE Error ensureSpace(BaseAssembler* a, size_t n) noexcept {
31 size_t remainingSpace = (size_t)(a->_bufferEnd - _cursor);
32 if (ASMJIT_UNLIKELY(remainingSpace < n)) {
33 CodeBuffer& buffer = a->_section->_buffer;
34 Error err = a->_code->growBuffer(&buffer, n);
35 if (ASMJIT_UNLIKELY(err))
36 return a->reportError(err);
37 _cursor = a->_bufferPtr;
38 }
39 return kErrorOk;
40 }
41
42 ASMJIT_INLINE uint8_t* cursor() const noexcept { return _cursor; }
43 ASMJIT_INLINE void setCursor(uint8_t* cursor) noexcept { _cursor = cursor; }
44 ASMJIT_INLINE void advance(size_t n) noexcept { _cursor += n; }
45
46 ASMJIT_INLINE size_t offsetFrom(uint8_t* from) const noexcept {
47 ASMJIT_ASSERT(_cursor >= from);
48 return (size_t)(_cursor - from);
49 }
50
51 template<typename T>
52 ASMJIT_INLINE void emit8(T val) noexcept {
53 typedef typename std::make_unsigned<T>::type U;
54 _cursor[0] = uint8_t(U(val) & U(0xFF));
55 _cursor++;
56 }
57
58 template<typename T, typename Y>
59 ASMJIT_INLINE void emit8If(T val, Y cond) noexcept {
60 typedef typename std::make_unsigned<T>::type U;
61 ASMJIT_ASSERT(size_t(cond) <= 1u);
62
63 _cursor[0] = uint8_t(U(val) & U(0xFF));
64 _cursor += size_t(cond);
65 }
66
67 template<typename T>
68 ASMJIT_INLINE void emit16uLE(T val) noexcept {
69 typedef typename std::make_unsigned<T>::type U;
70 Support::writeU16uLE(_cursor, uint32_t(U(val) & 0xFFFFu));
71 _cursor += 2;
72 }
73
74 template<typename T>
75 ASMJIT_INLINE void emit16uBE(T val) noexcept {
76 typedef typename std::make_unsigned<T>::type U;
77 Support::writeU16uBE(_cursor, uint32_t(U(val) & 0xFFFFu));
78 _cursor += 2;
79 }
80
81 template<typename T>
82 ASMJIT_INLINE void emit32uLE(T val) noexcept {
83 typedef typename std::make_unsigned<T>::type U;
84 Support::writeU32uLE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
85 _cursor += 4;
86 }
87
88 template<typename T>
89 ASMJIT_INLINE void emit32uBE(T val) noexcept {
90 typedef typename std::make_unsigned<T>::type U;
91 Support::writeU32uBE(_cursor, uint32_t(U(val) & 0xFFFFFFFFu));
92 _cursor += 4;
93 }
94
95 ASMJIT_INLINE void emitData(const void* data, size_t size) noexcept {
96 ASMJIT_ASSERT(size != 0);
97 memcpy(_cursor, data, size);
98 _cursor += size;
99 }
100
101 template<typename T>
102 ASMJIT_INLINE void emitValueLE(const T& value, size_t size) noexcept {
103 typedef typename std::make_unsigned<T>::type U;
104 ASMJIT_ASSERT(size <= sizeof(T));
105
106 U v = U(value);
107 for (uint32_t i = 0; i < size; i++) {
108 _cursor[i] = uint8_t(v & 0xFFu);
109 v >>= 8;
110 }
111 _cursor += size;
112 }
113
114 template<typename T>
115 ASMJIT_INLINE void emitValueBE(const T& value, size_t size) noexcept {
116 typedef typename std::make_unsigned<T>::type U;
117 ASMJIT_ASSERT(size <= sizeof(T));
118
119 U v = U(value);
120 for (uint32_t i = 0; i < size; i++) {
121 _cursor[i] = uint8_t(v >> (sizeof(T) - 8));
122 v <<= 8;
123 }
124 _cursor += size;
125 }
126
127 ASMJIT_INLINE void emitZeros(size_t size) noexcept {
128 ASMJIT_ASSERT(size != 0);
129 memset(_cursor, 0, size);
130 _cursor += size;
131 }
132
133 ASMJIT_INLINE void remove8(uint8_t* where) noexcept {
134 ASMJIT_ASSERT(where < _cursor);
135
136 uint8_t* p = where;
137 while (++p != _cursor)
138 p[-1] = p[0];
139 _cursor--;
140 }
141
142 template<typename T>
143 ASMJIT_INLINE void insert8(uint8_t* where, T val) noexcept {
144 uint8_t* p = _cursor;
145
146 while (p != where) {
147 p[0] = p[-1];
148 p--;
149 }
150
151 *p = uint8_t(val & 0xFF);
152 _cursor++;
153 }
154
155 ASMJIT_INLINE void done(BaseAssembler* a) noexcept {
156 CodeBuffer& buffer = a->_section->_buffer;
157 size_t newSize = (size_t)(_cursor - a->_bufferData);
158 ASMJIT_ASSERT(newSize <= buffer.capacity());
159
160 a->_bufferPtr = _cursor;
161 buffer._size = Support::max(buffer._size, newSize);
162 }
163 };
164
165 //! \}
166 //! \endcond
167
168 ASMJIT_END_NAMESPACE
169
170 #endif // _ASMJIT_CORE_CODEBUFFERWRITER_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/assembler.h"
8 #include "../core/logging.h"
9 #include "../core/support.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [Globals]
15 // ============================================================================
16
17 static const char CodeHolder_addrTabName[] = ".addrtab";
18
19 //! Encode MOD byte.
20 static inline uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
21 return (m << 6) | (o << 3) | rm;
22 }
23
24 // ============================================================================
25 // [asmjit::LabelLinkIterator]
26 // ============================================================================
27
28 class LabelLinkIterator {
29 public:
30 ASMJIT_INLINE LabelLinkIterator(LabelEntry* le) noexcept { reset(le); }
31
32 ASMJIT_INLINE explicit operator bool() const noexcept { return isValid(); }
33 ASMJIT_INLINE bool isValid() const noexcept { return _link != nullptr; }
34
35 ASMJIT_INLINE LabelLink* link() const noexcept { return _link; }
36 ASMJIT_INLINE LabelLink* operator->() const noexcept { return _link; }
37
38 ASMJIT_INLINE void reset(LabelEntry* le) noexcept {
39 _pPrev = &le->_links;
40 _link = *_pPrev;
41 }
42
43 ASMJIT_INLINE void next() noexcept {
44 _pPrev = &_link->next;
45 _link = *_pPrev;
46 }
47
48 ASMJIT_INLINE void resolveAndNext(CodeHolder* code) noexcept {
49 LabelLink* linkToDelete = _link;
50
51 _link = _link->next;
52 *_pPrev = _link;
53
54 code->_unresolvedLinkCount--;
55 code->_allocator.release(linkToDelete, sizeof(LabelLink));
56 }
57
58 LabelLink** _pPrev;
59 LabelLink* _link;
60 };
61
62 // ============================================================================
63 // [asmjit::ErrorHandler]
64 // ============================================================================
65
66 ErrorHandler::ErrorHandler() noexcept {}
67 ErrorHandler::~ErrorHandler() noexcept {}
68
69 // ============================================================================
70 // [asmjit::CodeHolder - Utilities]
71 // ============================================================================
72
73 static void CodeHolder_resetInternal(CodeHolder* self, uint32_t resetPolicy) noexcept {
74 uint32_t i;
75 const ZoneVector<BaseEmitter*>& emitters = self->emitters();
76
77 i = emitters.size();
78 while (i)
79 self->detach(emitters[--i]);
80
81 // Reset everything into its construction state.
82 self->_codeInfo.reset();
83 self->_emitterOptions = 0;
84 self->_logger = nullptr;
85 self->_errorHandler = nullptr;
86
87 // Reset all sections.
88 uint32_t numSections = self->_sections.size();
89 for (i = 0; i < numSections; i++) {
90 Section* section = self->_sections[i];
91 if (section->_buffer.data() && !section->_buffer.isExternal())
92 ::free(section->_buffer._data);
93 section->_buffer._data = nullptr;
94 section->_buffer._capacity = 0;
95 }
96
97 // Reset zone allocator and all containers using it.
98 ZoneAllocator* allocator = self->allocator();
99
100 self->_emitters.reset();
101 self->_namedLabels.reset();
102 self->_relocations.reset();
103 self->_labelEntries.reset();
104 self->_sections.reset();
105
106 self->_unresolvedLinkCount = 0;
107 self->_addressTableSection = nullptr;
108 self->_addressTableEntries.reset();
109
110 allocator->reset(&self->_zone);
111 self->_zone.reset(resetPolicy);
112 }
113
114 static void CodeHolder_modifyEmitterOptions(CodeHolder* self, uint32_t clear, uint32_t add) noexcept {
115 uint32_t oldOpt = self->_emitterOptions;
116 uint32_t newOpt = (oldOpt & ~clear) | add;
117
118 if (oldOpt == newOpt)
119 return;
120
121 // Modify emitter options of `CodeHolder` itself.
122 self->_emitterOptions = newOpt;
123
124 // Modify emitter options of all attached emitters.
125 for (BaseEmitter* emitter : self->emitters()) {
126 emitter->_emitterOptions = (emitter->_emitterOptions & ~clear) | add;
127 emitter->onUpdateGlobalInstOptions();
128 }
129 }
130
131 // ============================================================================
132 // [asmjit::CodeHolder - Construction / Destruction]
133 // ============================================================================
134
135 CodeHolder::CodeHolder() noexcept
136 : _codeInfo(),
137 _emitterOptions(0),
138 _logger(nullptr),
139 _errorHandler(nullptr),
140 _zone(16384 - Zone::kBlockOverhead),
141 _allocator(&_zone),
142 _unresolvedLinkCount(0),
143 _addressTableSection(nullptr) {}
144
145 CodeHolder::~CodeHolder() noexcept {
146 CodeHolder_resetInternal(this, Globals::kResetHard);
147 }
148
149 // ============================================================================
150 // [asmjit::CodeHolder - Init / Reset]
151 // ============================================================================
152
153 inline void CodeHolder_setSectionDefaultName(
154 Section* section,
155 char c0 = 0, char c1 = 0, char c2 = 0, char c3 = 0,
156 char c4 = 0, char c5 = 0, char c6 = 0, char c7 = 0) noexcept {
157
158 section->_name.u32[0] = Support::bytepack32_4x8(uint8_t(c0), uint8_t(c1), uint8_t(c2), uint8_t(c3));
159 section->_name.u32[1] = Support::bytepack32_4x8(uint8_t(c4), uint8_t(c5), uint8_t(c6), uint8_t(c7));
160 }
161
162 Error CodeHolder::init(const CodeInfo& info) noexcept {
163 // Cannot reinitialize if it's locked or there is one or more emitter attached.
164 if (isInitialized())
165 return DebugUtils::errored(kErrorAlreadyInitialized);
166
167 // If we are just initializing there should be no emitters attached.
168 ASMJIT_ASSERT(_emitters.empty());
169
170 // Create the default section and insert it to the `_sections` array.
171 Error err = _sections.willGrow(&_allocator);
172 if (err == kErrorOk) {
173 Section* section = _allocator.allocZeroedT<Section>();
174 if (ASMJIT_LIKELY(section)) {
175 section->_flags = Section::kFlagExec | Section::kFlagConst;
176 CodeHolder_setSectionDefaultName(section, '.', 't', 'e', 'x', 't');
177 _sections.appendUnsafe(section);
178 }
179 else {
180 err = DebugUtils::errored(kErrorOutOfMemory);
181 }
182 }
183
184 if (ASMJIT_UNLIKELY(err)) {
185 _zone.reset();
186 return err;
187 }
188 else {
189 _codeInfo = info;
190 return kErrorOk;
191 }
192 }
193
194 void CodeHolder::reset(uint32_t resetPolicy) noexcept {
195 CodeHolder_resetInternal(this, resetPolicy);
196 }
197
198 // ============================================================================
199 // [asmjit::CodeHolder - Attach / Detach]
200 // ============================================================================
201
202 Error CodeHolder::attach(BaseEmitter* emitter) noexcept {
203 // Catch a possible misuse of the API.
204 if (ASMJIT_UNLIKELY(!emitter))
205 return DebugUtils::errored(kErrorInvalidArgument);
206
207 // Invalid emitter, this should not be possible.
208 uint32_t type = emitter->emitterType();
209 if (ASMJIT_UNLIKELY(type == BaseEmitter::kTypeNone || type >= BaseEmitter::kTypeCount))
210 return DebugUtils::errored(kErrorInvalidState);
211
212 // This is suspicious, but don't fail if `emitter` is already attached
213 // to this code holder. This is not error, but it's not recommended.
214 if (emitter->_code != nullptr) {
215 if (emitter->_code == this)
216 return kErrorOk;
217 return DebugUtils::errored(kErrorInvalidState);
218 }
219
220 // Reserve the space now as we cannot fail after `onAttach()` succeeded.
221 ASMJIT_PROPAGATE(_emitters.willGrow(&_allocator, 1));
222 ASMJIT_PROPAGATE(emitter->onAttach(this));
223
224 // Connect CodeHolder <-> BaseEmitter.
225 ASMJIT_ASSERT(emitter->_code == this);
226 _emitters.appendUnsafe(emitter);
227
228 return kErrorOk;
229 }
230
231 Error CodeHolder::detach(BaseEmitter* emitter) noexcept {
232 if (ASMJIT_UNLIKELY(!emitter))
233 return DebugUtils::errored(kErrorInvalidArgument);
234
235 if (ASMJIT_UNLIKELY(emitter->_code != this))
236 return DebugUtils::errored(kErrorInvalidState);
237
238 // NOTE: We always detach if we were asked to, if error happens during
239 // `emitter->onDetach()` we just propagate it, but the BaseEmitter will
240 // be detached.
241 Error err = kErrorOk;
242 if (!emitter->isDestroyed())
243 err = emitter->onDetach(this);
244
245 // Disconnect CodeHolder <-> BaseEmitter.
246 uint32_t index = _emitters.indexOf(emitter);
247 ASMJIT_ASSERT(index != Globals::kNotFound);
248
249 _emitters.removeAt(index);
250 emitter->_code = nullptr;
251
252 return err;
253 }
254
255 // ============================================================================
256 // [asmjit::CodeHolder - Emitter Options]
257 // ============================================================================
258
259 static constexpr uint32_t kEmitterOptionsFilter = ~uint32_t(BaseEmitter::kOptionLoggingEnabled);
260
261 void CodeHolder::addEmitterOptions(uint32_t options) noexcept {
262 CodeHolder_modifyEmitterOptions(this, 0, options & kEmitterOptionsFilter);
263 }
264
265 void CodeHolder::clearEmitterOptions(uint32_t options) noexcept {
266 CodeHolder_modifyEmitterOptions(this, options & kEmitterOptionsFilter, 0);
267 }
268
269 // ============================================================================
270 // [asmjit::CodeHolder - Logging & Error Handling]
271 // ============================================================================
272
273 void CodeHolder::setLogger(Logger* logger) noexcept {
274 #ifndef ASMJIT_NO_LOGGING
275 _logger = logger;
276 uint32_t option = !logger ? uint32_t(0) : uint32_t(BaseEmitter::kOptionLoggingEnabled);
277 CodeHolder_modifyEmitterOptions(this, BaseEmitter::kOptionLoggingEnabled, option);
278 #else
279 ASMJIT_UNUSED(logger);
280 #endif
281 }
282
283 // ============================================================================
284 // [asmjit::CodeHolder - Code Buffer]
285 // ============================================================================
286
287 static Error CodeHolder_reserveInternal(CodeHolder* self, CodeBuffer* cb, size_t n) noexcept {
288 uint8_t* oldData = cb->_data;
289 uint8_t* newData;
290
291 if (oldData && !cb->isExternal())
292 newData = static_cast<uint8_t*>(::realloc(oldData, n));
293 else
294 newData = static_cast<uint8_t*>(::malloc(n));
295
296 if (ASMJIT_UNLIKELY(!newData))
297 return DebugUtils::errored(kErrorOutOfMemory);
298
299 cb->_data = newData;
300 cb->_capacity = n;
301
302 // Update pointers used by assemblers, if attached.
303 for (BaseEmitter* emitter : self->emitters()) {
304 if (emitter->isAssembler()) {
305 BaseAssembler* a = static_cast<BaseAssembler*>(emitter);
306 if (&a->_section->_buffer == cb) {
307 size_t offset = a->offset();
308
309 a->_bufferData = newData;
310 a->_bufferEnd = newData + n;
311 a->_bufferPtr = newData + offset;
312 }
313 }
314 }
315
316 return kErrorOk;
317 }
318
319 Error CodeHolder::growBuffer(CodeBuffer* cb, size_t n) noexcept {
320 // The size of the section must be valid.
321 size_t size = cb->size();
322 if (ASMJIT_UNLIKELY(n > std::numeric_limits<uintptr_t>::max() - size))
323 return DebugUtils::errored(kErrorOutOfMemory);
324
325 // We can now check if growing the buffer is really necessary. It's unlikely
326 // that this function is called while there is still room for `n` bytes.
327 size_t capacity = cb->capacity();
328 size_t required = cb->size() + n;
329 if (ASMJIT_UNLIKELY(required <= capacity))
330 return kErrorOk;
331
332 if (cb->isFixed())
333 return DebugUtils::errored(kErrorTooLarge);
334
335 size_t kInitialCapacity = 8096;
336 if (capacity < kInitialCapacity)
337 capacity = kInitialCapacity;
338 else
339 capacity += Globals::kAllocOverhead;
340
341 do {
342 size_t old = capacity;
343 if (capacity < Globals::kGrowThreshold)
344 capacity *= 2;
345 else
346 capacity += Globals::kGrowThreshold;
347
348 // Overflow.
349 if (ASMJIT_UNLIKELY(old > capacity))
350 return DebugUtils::errored(kErrorOutOfMemory);
351 } while (capacity - Globals::kAllocOverhead < required);
352
353 return CodeHolder_reserveInternal(this, cb, capacity - Globals::kAllocOverhead);
354 }
355
356 Error CodeHolder::reserveBuffer(CodeBuffer* cb, size_t n) noexcept {
357 size_t capacity = cb->capacity();
358 if (n <= capacity) return kErrorOk;
359
360 if (cb->isFixed())
361 return DebugUtils::errored(kErrorTooLarge);
362
363 return CodeHolder_reserveInternal(this, cb, n);
364 }
365
366 // ============================================================================
367 // [asmjit::CodeHolder - Sections]
368 // ============================================================================
369
370 Error CodeHolder::newSection(Section** sectionOut, const char* name, size_t nameSize, uint32_t flags, uint32_t alignment) noexcept {
371 *sectionOut = nullptr;
372
373 if (nameSize == SIZE_MAX)
374 nameSize = strlen(name);
375
376 if (alignment == 0)
377 alignment = 1;
378
379 if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment)))
380 return DebugUtils::errored(kErrorInvalidArgument);
381
382 if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxSectionNameSize))
383 return DebugUtils::errored(kErrorInvalidSectionName);
384
385 uint32_t sectionId = _sections.size();
386 if (ASMJIT_UNLIKELY(sectionId == Globals::kInvalidId))
387 return DebugUtils::errored(kErrorTooManySections);
388
389 ASMJIT_PROPAGATE(_sections.willGrow(&_allocator));
390 Section* section = _allocator.allocZeroedT<Section>();
391
392 if (ASMJIT_UNLIKELY(!section))
393 return DebugUtils::errored(kErrorOutOfMemory);
394
395 section->_id = sectionId;
396 section->_flags = flags;
397 section->_alignment = alignment;
398 memcpy(section->_name.str, name, nameSize);
399 _sections.appendUnsafe(section);
400
401 *sectionOut = section;
402 return kErrorOk;
403 }
404
405 Section* CodeHolder::sectionByName(const char* name, size_t nameSize) const noexcept {
406 if (nameSize == SIZE_MAX)
407 nameSize = strlen(name);
408
409 // This could be also put in a hash-table similarly like we do with labels,
410 // however it's questionable as the number of sections should be pretty low
411 // in general. Create an issue if this becomes a problem.
412 if (ASMJIT_UNLIKELY(nameSize <= Globals::kMaxSectionNameSize)) {
413 for (Section* section : _sections)
414 if (memcmp(section->_name.str, name, nameSize) == 0 && section->_name.str[nameSize] == '\0')
415 return section;
416 }
417
418 return nullptr;
419 }
420
421 Section* CodeHolder::ensureAddressTableSection() noexcept {
422 if (_addressTableSection)
423 return _addressTableSection;
424
425 newSection(&_addressTableSection, CodeHolder_addrTabName, sizeof(CodeHolder_addrTabName) - 1, 0, _codeInfo.gpSize());
426 return _addressTableSection;
427 }
428
429 Error CodeHolder::addAddressToAddressTable(uint64_t address) noexcept {
430 AddressTableEntry* entry = _addressTableEntries.get(address);
431 if (entry)
432 return kErrorOk;
433
434 Section* section = ensureAddressTableSection();
435 if (ASMJIT_UNLIKELY(!section))
436 return DebugUtils::errored(kErrorOutOfMemory);
437
438 entry = _zone.newT<AddressTableEntry>(address);
439 if (ASMJIT_UNLIKELY(!entry))
440 return DebugUtils::errored(kErrorOutOfMemory);
441
442 _addressTableEntries.insert(entry);
443 section->_virtualSize += _codeInfo.gpSize();
444
445 return kErrorOk;
446 }
447
448 // ============================================================================
449 // [asmjit::CodeHolder - Labels / Symbols]
450 // ============================================================================
451
452 //! Only used to lookup a label from `_namedLabels`.
453 class LabelByName {
454 public:
455 inline LabelByName(const char* key, size_t keySize, uint32_t hashCode) noexcept
456 : _key(key),
457 _keySize(uint32_t(keySize)),
458 _hashCode(hashCode) {}
459
460 inline uint32_t hashCode() const noexcept { return _hashCode; }
461
462 inline bool matches(const LabelEntry* entry) const noexcept {
463 return entry->nameSize() == _keySize && ::memcmp(entry->name(), _key, _keySize) == 0;
464 }
465
466 const char* _key;
467 uint32_t _keySize;
468 uint32_t _hashCode;
469 };
470
471 // Returns a hash of `name` and fixes `nameSize` if it's `SIZE_MAX`.
472 static uint32_t CodeHolder_hashNameAndGetSize(const char* name, size_t& nameSize) noexcept {
473 uint32_t hashCode = 0;
474 if (nameSize == SIZE_MAX) {
475 size_t i = 0;
476 for (;;) {
477 uint8_t c = uint8_t(name[i]);
478 if (!c) break;
479 hashCode = Support::hashRound(hashCode, c);
480 i++;
481 }
482 nameSize = i;
483 }
484 else {
485 for (size_t i = 0; i < nameSize; i++) {
486 uint8_t c = uint8_t(name[i]);
487 if (ASMJIT_UNLIKELY(!c)) return DebugUtils::errored(kErrorInvalidLabelName);
488 hashCode = Support::hashRound(hashCode, c);
489 }
490 }
491 return hashCode;
492 }
493
494 static bool CodeHolder_writeDisplacement(void* dst, int64_t displacement, uint32_t displacementSize) {
495 if (displacementSize == 4 && Support::isInt32(displacement)) {
496 Support::writeI32uLE(dst, int32_t(displacement));
497 return true;
498 }
499 else if (displacementSize == 1 && Support::isInt8(displacement)) {
500 Support::writeI8(dst, int8_t(displacement));
501 return true;
502 }
503
504 return false;
505 }
506
507 LabelLink* CodeHolder::newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel) noexcept {
508 LabelLink* link = _allocator.allocT<LabelLink>();
509 if (ASMJIT_UNLIKELY(!link)) return nullptr;
510
511 link->next = le->_links;
512 le->_links = link;
513
514 link->sectionId = sectionId;
515 link->relocId = Globals::kInvalidId;
516 link->offset = offset;
517 link->rel = rel;
518
519 _unresolvedLinkCount++;
520 return link;
521 }
522
523 Error CodeHolder::newLabelEntry(LabelEntry** entryOut) noexcept {
524 *entryOut = 0;
525
526 uint32_t labelId = _labelEntries.size();
527 if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
528 return DebugUtils::errored(kErrorTooManyLabels);
529
530 ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
531 LabelEntry* le = _allocator.allocZeroedT<LabelEntry>();
532
533 if (ASMJIT_UNLIKELY(!le))
534 return DebugUtils::errored(kErrorOutOfMemory);
535
536 le->_setId(labelId);
537 le->_parentId = Globals::kInvalidId;
538 le->_offset = 0;
539 _labelEntries.appendUnsafe(le);
540
541 *entryOut = le;
542 return kErrorOk;
543 }
544
545 Error CodeHolder::newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, uint32_t type, uint32_t parentId) noexcept {
546 *entryOut = 0;
547 uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
548
549 if (ASMJIT_UNLIKELY(nameSize == 0))
550 return DebugUtils::errored(kErrorInvalidLabelName);
551
552 if (ASMJIT_UNLIKELY(nameSize > Globals::kMaxLabelNameSize))
553 return DebugUtils::errored(kErrorLabelNameTooLong);
554
555 switch (type) {
556 case Label::kTypeLocal:
557 if (ASMJIT_UNLIKELY(parentId >= _labelEntries.size()))
558 return DebugUtils::errored(kErrorInvalidParentLabel);
559
560 hashCode ^= parentId;
561 break;
562
563 case Label::kTypeGlobal:
564 if (ASMJIT_UNLIKELY(parentId != Globals::kInvalidId))
565 return DebugUtils::errored(kErrorNonLocalLabelCantHaveParent);
566
567 break;
568
569 default:
570 return DebugUtils::errored(kErrorInvalidArgument);
571 }
572
573 // Don't allow to insert duplicates. Local labels allow duplicates that have
574 // different id, this is already accomplished by having a different hashes
575 // between the same label names having different parent labels.
576 LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode));
577 if (ASMJIT_UNLIKELY(le))
578 return DebugUtils::errored(kErrorLabelAlreadyDefined);
579
580 Error err = kErrorOk;
581 uint32_t labelId = _labelEntries.size();
582
583 if (ASMJIT_UNLIKELY(labelId == Globals::kInvalidId))
584 return DebugUtils::errored(kErrorTooManyLabels);
585
586 ASMJIT_PROPAGATE(_labelEntries.willGrow(&_allocator));
587 le = _allocator.allocZeroedT<LabelEntry>();
588
589 if (ASMJIT_UNLIKELY(!le))
590 return DebugUtils::errored(kErrorOutOfMemory);
591
592 le->_hashCode = hashCode;
593 le->_setId(labelId);
594 le->_type = uint8_t(type);
595 le->_parentId = Globals::kInvalidId;
596 le->_offset = 0;
597 ASMJIT_PROPAGATE(le->_name.setData(&_zone, name, nameSize));
598
599 _labelEntries.appendUnsafe(le);
600 _namedLabels.insert(allocator(), le);
601
602 *entryOut = le;
603 return err;
604 }
605
606 uint32_t CodeHolder::labelIdByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
607 // TODO: Finalize - parent id is not used here?
608 ASMJIT_UNUSED(parentId);
609
610 uint32_t hashCode = CodeHolder_hashNameAndGetSize(name, nameSize);
611 if (ASMJIT_UNLIKELY(!nameSize)) return 0;
612
613 LabelEntry* le = _namedLabels.get(LabelByName(name, nameSize, hashCode));
614 return le ? le->id() : uint32_t(Globals::kInvalidId);
615 }
616
617 ASMJIT_API Error CodeHolder::resolveUnresolvedLinks() noexcept {
618 if (!hasUnresolvedLinks())
619 return kErrorOk;
620
621 Error err = kErrorOk;
622 for (LabelEntry* le : labelEntries()) {
623 if (!le->isBound())
624 continue;
625
626 LabelLinkIterator link(le);
627 if (link) {
628 Support::FastUInt8 of = 0;
629 Section* toSection = le->section();
630 uint64_t toOffset = Support::addOverflow(toSection->offset(), le->offset(), &of);
631
632 do {
633 uint32_t linkSectionId = link->sectionId;
634 if (link->relocId == Globals::kInvalidId) {
635 Section* fromSection = sectionById(linkSectionId);
636 size_t linkOffset = link->offset;
637
638 CodeBuffer& buf = _sections[linkSectionId]->buffer();
639 ASMJIT_ASSERT(linkOffset < buf.size());
640
641 // Calculate the offset relative to the start of the virtual base.
642 uint64_t fromOffset = Support::addOverflow<uint64_t>(fromSection->offset(), linkOffset, &of);
643 int64_t displacement = int64_t(toOffset - fromOffset + uint64_t(int64_t(link->rel)));
644
645 if (!of) {
646 ASMJIT_ASSERT(size_t(linkOffset) < buf.size());
647
648 // Size of the value we are going to patch. Only BYTE/DWORD is allowed.
649 uint32_t displacementSize = buf._data[linkOffset];
650 ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= displacementSize);
651
652 // Overwrite a real displacement in the CodeBuffer.
653 if (CodeHolder_writeDisplacement(buf._data + linkOffset, displacement, displacementSize)) {
654 link.resolveAndNext(this);
655 continue;
656 }
657 }
658
659 err = DebugUtils::errored(kErrorInvalidDisplacement);
660 // Falls through to `link.next()`.
661 }
662
663 link.next();
664 } while (link);
665 }
666 }
667
668 return err;
669 }
670
671 ASMJIT_API Error CodeHolder::bindLabel(const Label& label, uint32_t toSectionId, uint64_t toOffset) noexcept {
672 LabelEntry* le = labelEntry(label);
673 if (ASMJIT_UNLIKELY(!le))
674 return DebugUtils::errored(kErrorInvalidLabel);
675
676 if (ASMJIT_UNLIKELY(toSectionId > _sections.size()))
677 return DebugUtils::errored(kErrorInvalidSection);
678
679 // Label can be bound only once.
680 if (ASMJIT_UNLIKELY(le->isBound()))
681 return DebugUtils::errored(kErrorLabelAlreadyBound);
682
683 // Bind the label.
684 Section* section = _sections[toSectionId];
685 le->_section = section;
686 le->_offset = toOffset;
687
688 Error err = kErrorOk;
689 CodeBuffer& buf = section->buffer();
690
691 // Fix all links to this label we have collected so far if they are within
692 // the same section. We ignore any inter-section links as these have to be
693 // fixed later.
694 LabelLinkIterator link(le);
695 while (link) {
696 uint32_t linkSectionId = link->sectionId;
697 size_t linkOffset = link->offset;
698
699 uint32_t relocId = link->relocId;
700 if (relocId != Globals::kInvalidId) {
701 // Adjust relocation data only.
702 RelocEntry* re = _relocations[relocId];
703 re->_payload += toOffset;
704 re->_targetSectionId = toSectionId;
705 }
706 else {
707 if (linkSectionId != toSectionId) {
708 link.next();
709 continue;
710 }
711
712 ASMJIT_ASSERT(linkOffset < buf.size());
713 int64_t displacement = int64_t(toOffset - uint64_t(linkOffset) + uint64_t(int64_t(link->rel)));
714
715 // Size of the value we are going to patch. Only BYTE/DWORD is allowed.
716 uint32_t displacementSize = buf._data[linkOffset];
717 ASMJIT_ASSERT(buf.size() - size_t(linkOffset) >= displacementSize);
718
719 // Overwrite a real displacement in the CodeBuffer.
720 if (!CodeHolder_writeDisplacement(buf._data + linkOffset, displacement, displacementSize)) {
721 err = DebugUtils::errored(kErrorInvalidDisplacement);
722 link.next();
723 continue;
724 }
725 }
726
727 link.resolveAndNext(this);
728 }
729
730 return err;
731 }
732
733 // ============================================================================
734 // [asmjit::BaseEmitter - Relocations]
735 // ============================================================================
736
737 Error CodeHolder::newRelocEntry(RelocEntry** dst, uint32_t relocType, uint32_t valueSize) noexcept {
738 ASMJIT_PROPAGATE(_relocations.willGrow(&_allocator));
739
740 uint32_t relocId = _relocations.size();
741 if (ASMJIT_UNLIKELY(relocId == Globals::kInvalidId))
742 return DebugUtils::errored(kErrorTooManyRelocations);
743
744 RelocEntry* re = _allocator.allocZeroedT<RelocEntry>();
745 if (ASMJIT_UNLIKELY(!re))
746 return DebugUtils::errored(kErrorOutOfMemory);
747
748 re->_id = relocId;
749 re->_relocType = uint8_t(relocType);
750 re->_valueSize = uint8_t(valueSize);
751 re->_sourceSectionId = Globals::kInvalidId;
752 re->_targetSectionId = Globals::kInvalidId;
753 _relocations.appendUnsafe(re);
754
755 *dst = re;
756 return kErrorOk;
757 }
758
759 // ============================================================================
760 // [asmjit::BaseEmitter - Expression Evaluation]
761 // ============================================================================
762
763 static Error CodeHolder_evaluateExpression(CodeHolder* self, Expression* exp, uint64_t* out) noexcept {
764 uint64_t value[2];
765 for (size_t i = 0; i < 2; i++) {
766 uint64_t v;
767 switch (exp->valueType[i]) {
768 case Expression::kValueNone: {
769 v = 0;
770 break;
771 }
772
773 case Expression::kValueConstant: {
774 v = exp->value[i].constant;
775 break;
776 }
777
778 case Expression::kValueLabel: {
779 LabelEntry* le = exp->value[i].label;
780 if (!le->isBound())
781 return DebugUtils::errored(kErrorExpressionLabelNotBound);
782 v = le->section()->offset() + le->offset();
783 break;
784 }
785
786 case Expression::kValueExpression: {
787 Expression* nested = exp->value[i].expression;
788 ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(self, nested, &v));
789 break;
790 }
791
792 default:
793 return DebugUtils::errored(kErrorInvalidState);
794 }
795
796 value[i] = v;
797 }
798
799 uint64_t result;
800 uint64_t& a = value[0];
801 uint64_t& b = value[1];
802
803 switch (exp->opType) {
804 case Expression::kOpAdd:
805 result = a + b;
806 break;
807
808 case Expression::kOpSub:
809 result = a - b;
810 break;
811
812 case Expression::kOpMul:
813 result = a * b;
814 break;
815
816 case Expression::kOpSll:
817 result = (b > 63) ? uint64_t(0) : uint64_t(a << b);
818 break;
819
820 case Expression::kOpSrl:
821 result = (b > 63) ? uint64_t(0) : uint64_t(a >> b);
822 break;
823
824 case Expression::kOpSra:
825 result = Support::sar(a, Support::min<uint64_t>(b, 63));
826 break;
827
828 default:
829 return DebugUtils::errored(kErrorInvalidState);
830 }
831
832 *out = result;
833 return kErrorOk;
834 }
835
836 // ============================================================================
837 // [asmjit::BaseEmitter - Utilities]
838 // ============================================================================
839
840 Error CodeHolder::flatten() noexcept {
841 uint64_t offset = 0;
842 for (Section* section : _sections) {
843 uint64_t realSize = section->realSize();
844 if (realSize) {
845 uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
846 if (ASMJIT_UNLIKELY(alignedOffset < offset))
847 return DebugUtils::errored(kErrorTooLarge);
848
849 Support::FastUInt8 of = 0;
850 offset = Support::addOverflow(alignedOffset, realSize, &of);
851
852 if (ASMJIT_UNLIKELY(of))
853 return DebugUtils::errored(kErrorTooLarge);
854 }
855 }
856
857 // Now we know that we can assign offsets of all sections properly.
858 Section* prev = nullptr;
859 offset = 0;
860 for (Section* section : _sections) {
861 uint64_t realSize = section->realSize();
862 if (realSize)
863 offset = Support::alignUp(offset, section->alignment());
864 section->_offset = offset;
865
866 // Make sure the previous section extends a bit to cover the alignment.
867 if (prev)
868 prev->_virtualSize = offset - prev->_offset;
869
870 prev = section;
871 offset += realSize;
872 }
873
874 return kErrorOk;
875 }
876
877 size_t CodeHolder::codeSize() const noexcept {
878 Support::FastUInt8 of = 0;
879 uint64_t offset = 0;
880
881 for (Section* section : _sections) {
882 uint64_t realSize = section->realSize();
883
884 if (realSize) {
885 uint64_t alignedOffset = Support::alignUp(offset, section->alignment());
886 ASMJIT_ASSERT(alignedOffset >= offset);
887 offset = Support::addOverflow(alignedOffset, realSize, &of);
888 }
889 }
890
891 // TODO: Not nice, maybe changing `codeSize()` to return `uint64_t` instead?
892 if ((sizeof(uint64_t) > sizeof(size_t) && offset > SIZE_MAX) || of)
893 return SIZE_MAX;
894
895 return size_t(offset);
896 }
897
898 Error CodeHolder::relocateToBase(uint64_t baseAddress) noexcept {
899 // Base address must be provided.
900 if (ASMJIT_UNLIKELY(baseAddress == Globals::kNoBaseAddress))
901 return DebugUtils::errored(kErrorInvalidArgument);
902
903 _codeInfo.setBaseAddress(baseAddress);
904 uint32_t gpSize = _codeInfo.gpSize();
905
906 Section* addressTableSection = _addressTableSection;
907 uint32_t addressTableEntryCount = 0;
908 uint8_t* addressTableEntryData = nullptr;
909
910 if (addressTableSection) {
911 ASMJIT_PROPAGATE(
912 reserveBuffer(&addressTableSection->_buffer, size_t(addressTableSection->virtualSize())));
913 addressTableEntryData = addressTableSection->_buffer.data();
914 }
915
916 // Relocate all recorded locations.
917 for (const RelocEntry* re : _relocations) {
918 // Possibly deleted or optimized-out entry.
919 if (re->relocType() == RelocEntry::kTypeNone)
920 continue;
921
922 Section* sourceSection = sectionById(re->sourceSectionId());
923 Section* targetSection = nullptr;
924
925 if (re->targetSectionId() != Globals::kInvalidId)
926 targetSection = sectionById(re->targetSectionId());
927
928 uint64_t value = re->payload();
929 uint64_t sectionOffset = sourceSection->offset();
930 uint64_t sourceOffset = re->sourceOffset();
931
932 // Make sure that the `RelocEntry` doesn't go out of bounds.
933 size_t regionSize = re->leadingSize() + re->valueSize() + re->trailingSize();
934 if (ASMJIT_UNLIKELY(re->sourceOffset() >= sourceSection->bufferSize() ||
935 sourceSection->bufferSize() - size_t(re->sourceOffset()) < regionSize))
936 return DebugUtils::errored(kErrorInvalidRelocEntry);
937
938 uint8_t* buffer = sourceSection->data();
939 size_t valueOffset = size_t(re->sourceOffset()) + re->leadingSize();
940
941 switch (re->relocType()) {
942 case RelocEntry::kTypeExpression: {
943 Expression* expression = (Expression*)(uintptr_t(value));
944 ASMJIT_PROPAGATE(CodeHolder_evaluateExpression(this, expression, &value));
945 break;
946 }
947
948 case RelocEntry::kTypeAbsToAbs: {
949 break;
950 }
951
952 case RelocEntry::kTypeRelToAbs: {
953 // Value is currently a relative offset from the start of its section.
954 // We have to convert it to an absolute offset (including base address).
955 if (ASMJIT_UNLIKELY(!targetSection))
956 return DebugUtils::errored(kErrorInvalidRelocEntry);
957
958 //value += baseAddress + sectionOffset + sourceOffset + regionSize;
959 value += baseAddress + targetSection->offset();
960 break;
961 }
962
963 case RelocEntry::kTypeAbsToRel: {
964 value -= baseAddress + sectionOffset + sourceOffset + regionSize;
965 if (gpSize > 4 && !Support::isInt32(int64_t(value)))
966 return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
967 break;
968 }
969
970 case RelocEntry::kTypeX64AddressEntry: {
971 if (re->valueSize() != 4 || re->leadingSize() < 2)
972 return DebugUtils::errored(kErrorInvalidRelocEntry);
973
974 // First try whether a relative 32-bit displacement would work.
975 value -= baseAddress + sectionOffset + sourceOffset + regionSize;
976 if (!Support::isInt32(int64_t(value))) {
977 // Relative 32-bit displacement is not possible, use '.addrtab' section.
978 AddressTableEntry* atEntry = _addressTableEntries.get(re->payload());
979 if (ASMJIT_UNLIKELY(!atEntry))
980 return DebugUtils::errored(kErrorInvalidRelocEntry);
981
982 // Cannot be null as we have just matched the `AddressTableEntry`.
983 ASMJIT_ASSERT(addressTableSection != nullptr);
984
985 if (!atEntry->hasAssignedSlot())
986 atEntry->_slot = addressTableEntryCount++;
987
988 size_t atEntryIndex = size_t(atEntry->slot()) * gpSize;
989 uint64_t addrSrc = sectionOffset + sourceOffset + regionSize;
990 uint64_t addrDst = addressTableSection->offset() + uint64_t(atEntryIndex);
991
992 value = addrDst - addrSrc;
993 if (!Support::isInt32(int64_t(value)))
994 return DebugUtils::errored(kErrorRelocOffsetOutOfRange);
995
996 // Bytes that replace [REX, OPCODE] bytes.
997 uint32_t byte0 = 0xFF;
998 uint32_t byte1 = buffer[valueOffset - 1];
999
1000 if (byte1 == 0xE8) {
1001 // Patch CALL/MOD byte to FF /2 (-> 0x15).
1002 byte1 = x86EncodeMod(0, 2, 5);
1003 }
1004 else if (byte1 == 0xE9) {
1005 // Patch JMP/MOD byte to FF /4 (-> 0x25).
1006 byte1 = x86EncodeMod(0, 4, 5);
1007 }
1008 else {
1009 return DebugUtils::errored(kErrorInvalidRelocEntry);
1010 }
1011
1012 // Patch `jmp/call` instruction.
1013 buffer[valueOffset - 2] = uint8_t(byte0);
1014 buffer[valueOffset - 1] = uint8_t(byte1);
1015
1016 Support::writeU64uLE(addressTableEntryData + atEntryIndex, re->payload());
1017 }
1018 break;
1019 }
1020
1021 default:
1022 return DebugUtils::errored(kErrorInvalidRelocEntry);
1023 }
1024
1025 switch (re->valueSize()) {
1026 case 1:
1027 Support::writeU8(buffer + valueOffset, uint32_t(value & 0xFFu));
1028 break;
1029
1030 case 2:
1031 Support::writeU16uLE(buffer + valueOffset, uint32_t(value & 0xFFFFu));
1032 break;
1033
1034 case 4:
1035 Support::writeU32uLE(buffer + valueOffset, uint32_t(value & 0xFFFFFFFFu));
1036 break;
1037
1038 case 8:
1039 Support::writeU64uLE(buffer + valueOffset, value);
1040 break;
1041
1042 default:
1043 return DebugUtils::errored(kErrorInvalidRelocEntry);
1044 }
1045 }
1046
1047 // Fixup the virtual size of the address table if it's the last section.
1048 if (_sections.last() == addressTableSection) {
1049 size_t addressTableSize = addressTableEntryCount * gpSize;
1050 addressTableSection->_buffer._size = addressTableSize;
1051 addressTableSection->_virtualSize = addressTableSize;
1052 }
1053
1054 return kErrorOk;
1055 }
1056
1057 Error CodeHolder::copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t options) noexcept {
1058 if (ASMJIT_UNLIKELY(!isSectionValid(sectionId)))
1059 return DebugUtils::errored(kErrorInvalidSection);
1060
1061 Section* section = sectionById(sectionId);
1062 size_t bufferSize = section->bufferSize();
1063
1064 if (ASMJIT_UNLIKELY(dstSize < bufferSize))
1065 return DebugUtils::errored(kErrorInvalidArgument);
1066
1067 memcpy(dst, section->data(), bufferSize);
1068
1069 if (bufferSize < dstSize && (options & kCopyWithPadding)) {
1070 size_t paddingSize = dstSize - bufferSize;
1071 memset(static_cast<uint8_t*>(dst) + bufferSize, 0, paddingSize);
1072 }
1073
1074 return kErrorOk;
1075 }
1076
1077 Error CodeHolder::copyFlattenedData(void* dst, size_t dstSize, uint32_t options) noexcept {
1078 size_t end = 0;
1079 for (Section* section : _sections) {
1080 if (section->offset() > dstSize)
1081 return DebugUtils::errored(kErrorInvalidArgument);
1082
1083 size_t bufferSize = section->bufferSize();
1084 size_t offset = size_t(section->offset());
1085
1086 if (ASMJIT_UNLIKELY(dstSize - offset < bufferSize))
1087 return DebugUtils::errored(kErrorInvalidArgument);
1088
1089 uint8_t* dstTarget = static_cast<uint8_t*>(dst) + offset;
1090 size_t paddingSize = 0;
1091 memcpy(dstTarget, section->data(), bufferSize);
1092
1093 if ((options & kCopyWithPadding) && bufferSize < section->virtualSize()) {
1094 paddingSize = Support::min<size_t>(dstSize - offset, size_t(section->virtualSize())) - bufferSize;
1095 memset(dstTarget + bufferSize, 0, paddingSize);
1096 }
1097
1098 end = Support::max(end, offset + bufferSize + paddingSize);
1099 }
1100
1101 // TODO: `end` is not used atm, we need an option to also pad anything beyond
1102 // the code in case that the destination was much larger (for example page-size).
1103
1104 return kErrorOk;
1105 }
1106
1107 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_CODEHOLDER_H
7 #define _ASMJIT_CORE_CODEHOLDER_H
8
9 #include "../core/arch.h"
10 #include "../core/datatypes.h"
11 #include "../core/operand.h"
12 #include "../core/string.h"
13 #include "../core/support.h"
14 #include "../core/target.h"
15 #include "../core/zone.h"
16 #include "../core/zonehash.h"
17 #include "../core/zonestring.h"
18 #include "../core/zonetree.h"
19 #include "../core/zonevector.h"
20
21 ASMJIT_BEGIN_NAMESPACE
22
23 //! \addtogroup asmjit_core
24 //! \{
25
26 // ============================================================================
27 // [Forward Declarations]
28 // ============================================================================
29
30 class BaseEmitter;
31 class CodeHolder;
32 class LabelEntry;
33 class Logger;
34
35 // ============================================================================
36 // [asmjit::AlignMode]
37 // ============================================================================
38
39 //! Align mode.
40 enum AlignMode : uint32_t {
41 kAlignCode = 0, //!< Align executable code.
42 kAlignData = 1, //!< Align non-executable code.
43 kAlignZero = 2, //!< Align by a sequence of zeros.
44 kAlignCount = 3 //!< Count of alignment modes.
45 };
46
47 // ============================================================================
48 // [asmjit::ErrorHandler]
49 // ============================================================================
50
51 //! Error handler can be used to override the default behavior of error handling
52 //! available to all classes that inherit `BaseEmitter`.
53 //!
54 //! Override `ErrorHandler::handleError()` to implement your own error handler.
55 class ASMJIT_VIRTAPI ErrorHandler {
56 public:
57 ASMJIT_BASE_CLASS(ErrorHandler)
58
59 // --------------------------------------------------------------------------
60 // [Construction / Destruction]
61 // --------------------------------------------------------------------------
62
63 //! Creates a new `ErrorHandler` instance.
64 ASMJIT_API ErrorHandler() noexcept;
65 //! Destroys the `ErrorHandler` instance.
66 ASMJIT_API virtual ~ErrorHandler() noexcept;
67
68 // --------------------------------------------------------------------------
69 // [Handle Error]
70 // --------------------------------------------------------------------------
71
72 //! Error handler (must be reimplemented).
73 //!
74 //! Error handler is called after an error happened and before it's propagated
75 //! to the caller. There are multiple ways how the error handler can be used:
76 //!
77 //! 1. User-based error handling without throwing exception or using C's
78 //! `longjmp()`. This is for users that don't use exceptions and want
79 //! customized error handling.
80 //!
81 //! 2. Throwing an exception. AsmJit doesn't use exceptions and is completely
82 //! exception-safe, but you can throw exception from your error handler if
83 //! this way is the preferred way of handling errors in your project.
84 //!
85 //! 3. Using plain old C's `setjmp()` and `longjmp()`. Asmjit always puts
86 //! `BaseEmitter` to a consistent state before calling `handleError()`
87 //! so `longjmp()` can be used without any issues to cancel the code
88 //! generation if an error occurred. There is no difference between
89 //! exceptions and `longjmp()` from AsmJit's perspective, however,
90 //! never jump outside of `CodeHolder` and `BaseEmitter` scope as you
91 //! would leak memory.
92 virtual void handleError(Error err, const char* message, BaseEmitter* origin) = 0;
93 };
94
95 // ============================================================================
96 // [asmjit::CodeBuffer]
97 // ============================================================================
98
99 //! Code or data buffer.
100 struct CodeBuffer {
101 //! The content of the buffer (data).
102 uint8_t* _data;
103 //! Number of bytes of `data` used.
104 size_t _size;
105 //! Buffer capacity (in bytes).
106 size_t _capacity;
107 //! Buffer flags.
108 uint32_t _flags;
109
110 enum Flags : uint32_t {
111 //! Buffer is external (not allocated by asmjit).
112 kFlagIsExternal = 0x00000001u,
113 //! Buffer is fixed (cannot be reallocated).
114 kFlagIsFixed = 0x00000002u
115 };
116
117 //! \name Overloaded Operators
118 //! \{
119
120 inline uint8_t& operator[](size_t index) noexcept {
121 ASMJIT_ASSERT(index < _size);
122 return _data[index];
123 }
124
125 inline const uint8_t& operator[](size_t index) const noexcept {
126 ASMJIT_ASSERT(index < _size);
127 return _data[index];
128 }
129
130 //! \}
131
132 //! \name Accessors
133 //! \{
134
135 inline uint32_t flags() const noexcept { return _flags; }
136 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
137
138 inline bool isAllocated() const noexcept { return _data != nullptr; }
139 inline bool isFixed() const noexcept { return hasFlag(kFlagIsFixed); }
140 inline bool isExternal() const noexcept { return hasFlag(kFlagIsExternal); }
141
142 inline uint8_t* data() noexcept { return _data; }
143 inline const uint8_t* data() const noexcept { return _data; }
144
145 inline bool empty() const noexcept { return !_size; }
146 inline size_t size() const noexcept { return _size; }
147 inline size_t capacity() const noexcept { return _capacity; }
148
149 //! \}
150
151 //! \name Iterators
152 //! \{
153
154 inline uint8_t* begin() noexcept { return _data; }
155 inline const uint8_t* begin() const noexcept { return _data; }
156
157 inline uint8_t* end() noexcept { return _data + _size; }
158 inline const uint8_t* end() const noexcept { return _data + _size; }
159
160 //! \}
161 };
162
163 // ============================================================================
164 // [asmjit::Section]
165 // ============================================================================
166
167 //! Section entry.
168 class Section {
169 public:
170 //! Section id.
171 uint32_t _id;
172 //! Section flags.
173 uint32_t _flags;
174 //! Section alignment requirements (0 if no requirements).
175 uint32_t _alignment;
176 //! Reserved for future use (padding).
177 uint32_t _reserved;
178 //! Offset of this section from base-address.
179 uint64_t _offset;
180 //! Virtual size of the section (zero initialized sections).
181 uint64_t _virtualSize;
182 //! Section name (max 35 characters, PE allows max 8).
183 FixedString<Globals::kMaxSectionNameSize + 1> _name;
184 //! Code or data buffer.
185 CodeBuffer _buffer;
186
187 //! Section flags.
188 enum Flags : uint32_t {
189 kFlagExec = 0x00000001u, //!< Executable (.text sections).
190 kFlagConst = 0x00000002u, //!< Read-only (.text and .data sections).
191 kFlagZero = 0x00000004u, //!< Zero initialized by the loader (BSS).
192 kFlagInfo = 0x00000008u, //!< Info / comment flag.
193 kFlagImplicit = 0x80000000u //!< Section created implicitly and can be deleted by `Target`.
194 };
195
196 //! \name Accessors
197 //! \{
198
199 inline uint32_t id() const noexcept { return _id; }
200 inline const char* name() const noexcept { return _name.str; }
201
202 inline uint8_t* data() noexcept { return _buffer.data(); }
203 inline const uint8_t* data() const noexcept { return _buffer.data(); }
204
205 inline uint32_t flags() const noexcept { return _flags; }
206 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
207 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
208 inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; }
209
210 inline uint32_t alignment() const noexcept { return _alignment; }
211 inline void setAlignment(uint32_t alignment) noexcept { _alignment = alignment; }
212
213 inline uint64_t offset() const noexcept { return _offset; }
214 inline void setOffset(uint64_t offset) noexcept { _offset = offset; }
215
216 //! Returns the virtual size of the section.
217 //!
218 //! Virtual size is initially zero and is never changed by AsmJit. It's normal
219 //! if virtual size is smaller than size returned by `bufferSize()` as the buffer
220 //! stores real data emitted by assemblers or appended by users.
221 //!
222 //! Use `realSize()` to get the real and final size of this section.
223 inline uint64_t virtualSize() const noexcept { return _virtualSize; }
224 //! Sets the virtual size of the section.
225 inline void setVirtualSize(uint64_t virtualSize) noexcept { _virtualSize = virtualSize; }
226
227 //! Returns the buffer size of the section.
228 inline size_t bufferSize() const noexcept { return _buffer.size(); }
229 //! Returns the real size of the section calculated from virtual and buffer sizes.
230 inline uint64_t realSize() const noexcept { return Support::max<uint64_t>(virtualSize(), bufferSize()); }
231
232 //! Returns the `CodeBuffer` used by this section.
233 inline CodeBuffer& buffer() noexcept { return _buffer; }
234 //! Returns the `CodeBuffer` used by this section (const).
235 inline const CodeBuffer& buffer() const noexcept { return _buffer; }
236
237 //! \}
238 };
239
240 // ============================================================================
241 // [asmjit::LabelLink]
242 // ============================================================================
243
244 //! Data structure used to link either unbound labels or cross-section links.
245 struct LabelLink {
246 //! Next link (single-linked list).
247 LabelLink* next;
248 //! Section id where the label is bound.
249 uint32_t sectionId;
250 //! Relocation id or Globals::kInvalidId.
251 uint32_t relocId;
252 //! Label offset relative to the start of the section.
253 size_t offset;
254 //! Inlined rel8/rel32.
255 intptr_t rel;
256 };
257
258 // ============================================================================
259 // [asmjit::Expression]
260 // ============================================================================
261
262 struct Expression {
263 enum OpType : uint8_t {
264 kOpAdd = 0,
265 kOpSub = 1,
266 kOpMul = 2,
267 kOpSll = 3,
268 kOpSrl = 4,
269 kOpSra = 5
270 };
271
272 enum ValueType : uint8_t {
273 kValueNone = 0,
274 kValueConstant = 1,
275 kValueLabel = 2,
276 kValueExpression = 3
277 };
278
279 union Value {
280 uint64_t constant;
281 Expression* expression;
282 LabelEntry* label;
283 };
284
285 uint8_t opType;
286 uint8_t valueType[2];
287 uint8_t reserved[5];
288 Value value[2];
289
290 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
291
292 inline void setValueAsConstant(size_t index, uint64_t constant) noexcept {
293 valueType[index] = kValueConstant;
294 value[index].constant = constant;
295 }
296
297 inline void setValueAsLabel(size_t index, LabelEntry* label) noexcept {
298 valueType[index] = kValueLabel;
299 value[index].label = label;
300 }
301
302 inline void setValueAsExpression(size_t index, Expression* expression) noexcept {
303 valueType[index] = kValueLabel;
304 value[index].expression = expression;
305 }
306 };
307
308 // ============================================================================
309 // [asmjit::LabelEntry]
310 // ============================================================================
311
312 //! Label entry.
313 //!
314 //! Contains the following properties:
315 //! * Label id - This is the only thing that is set to the `Label` operand.
316 //! * Label name - Optional, used mostly to create executables and libraries.
317 //! * Label type - Type of the label, default `Label::kTypeAnonymous`.
318 //! * Label parent id - Derived from many assemblers that allow to define a
319 //! local label that falls under a global label. This allows to define
320 //! many labels of the same name that have different parent (global) label.
321 //! * Offset - offset of the label bound by `Assembler`.
322 //! * Links - single-linked list that contains locations of code that has
323 //! to be patched when the label gets bound. Every use of unbound label
324 //! adds one link to `_links` list.
325 //! * HVal - Hash value of label's name and optionally parentId.
326 //! * HashNext - Hash-table implementation detail.
327 class LabelEntry : public ZoneHashNode {
328 public:
329 // Let's round the size of `LabelEntry` to 64 bytes (as `ZoneAllocator` has
330 // granularity of 32 bytes anyway). This gives `_name` the remaining space,
331 // which is should be 16 bytes on 64-bit and 28 bytes on 32-bit architectures.
332 static constexpr uint32_t kStaticNameSize =
333 64 - (sizeof(ZoneHashNode) + 8 + sizeof(Section*) + sizeof(size_t) + sizeof(LabelLink*));
334
335 //! Label type, see `Label::LabelType`.
336 uint8_t _type;
337 //! Must be zero.
338 uint8_t _flags;
339 //! Reserved.
340 uint16_t _reserved16;
341 //! Label parent id or zero.
342 uint32_t _parentId;
343 //! Label offset relative to the start of the `_section`.
344 uint64_t _offset;
345 //! Section where the label was bound.
346 Section* _section;
347 //! Label links.
348 LabelLink* _links;
349 //! Label name.
350 ZoneString<kStaticNameSize> _name;
351
352 //! \name Accessors
353 //! \{
354
355 // NOTE: Label id is stored in `_customData`, which is provided by ZoneHashNode
356 // to fill a padding that a C++ compiler targeting 64-bit CPU will add to align
357 // the structure to 64-bits.
358
359 //! Returns label id.
360 inline uint32_t id() const noexcept { return _customData; }
361 //! Sets label id (internal, used only by `CodeHolder`).
362 inline void _setId(uint32_t id) noexcept { _customData = id; }
363
364 //! Returns label type, see `Label::LabelType`.
365 inline uint32_t type() const noexcept { return _type; }
366 //! Returns label flags, returns 0 at the moment.
367 inline uint32_t flags() const noexcept { return _flags; }
368
369 //! Tests whether the label has a parent label.
370 inline bool hasParent() const noexcept { return _parentId != Globals::kInvalidId; }
371 //! Returns label's parent id.
372 inline uint32_t parentId() const noexcept { return _parentId; }
373
374 //! Returns the section where the label was bound.
375 //!
376 //! If the label was not yet bound the return value is `nullptr`.
377 inline Section* section() const noexcept { return _section; }
378
379 //! Tests whether the label has name.
380 inline bool hasName() const noexcept { return !_name.empty(); }
381
382 //! Returns the label's name.
383 //!
384 //! \note Local labels will return their local name without their parent
385 //! part, for example ".L1".
386 inline const char* name() const noexcept { return _name.data(); }
387
388 //! Returns size of label's name.
389 //!
390 //! \note Label name is always null terminated, so you can use `strlen()` to
391 //! get it, however, it's also cached in `LabelEntry` itself, so if you want
392 //! to know the size the fastest way is to call `LabelEntry::nameSize()`.
393 inline uint32_t nameSize() const noexcept { return _name.size(); }
394
395 //! Returns links associated with this label.
396 inline LabelLink* links() const noexcept { return _links; }
397
398 //! Tests whether the label is bound.
399 inline bool isBound() const noexcept { return _section != nullptr; }
400 //! Tests whether the label is bound to a the given `sectionId`.
401 inline bool isBoundTo(Section* section) const noexcept { return _section == section; }
402
403 //! Returns the label offset (only useful if the label is bound).
404 inline uint64_t offset() const noexcept { return _offset; }
405
406 //! Returns the hash-value of label's name and its parent label (if any).
407 //!
408 //! Label hash is calculated as `HASH(Name) ^ ParentId`. The hash function
409 //! is implemented in `Support::hashString()` and `Support::hashRound()`.
410 inline uint32_t hashCode() const noexcept { return _hashCode; }
411
412 //! \}
413 };
414
415 // ============================================================================
416 // [asmjit::RelocEntry]
417 // ============================================================================
418
419 //! Relocation entry.
420 //!
421 //! We describe relocation data in the following way:
422 //!
423 //! ```
424 //! +- Start of the buffer +- End of the data
425 //! | |*PATCHED*| | or instruction
426 //! |xxxxxxxxxxxxxxxxxxxxxx|LeadSize|ValueSize|TrailSize|xxxxxxxxxxxxxxxxxxxx->
427 //! |
428 //! +- Source offset
429 //! ```
430 struct RelocEntry {
431 //! Relocation id.
432 uint32_t _id;
433 //! Type of the relocation.
434 uint8_t _relocType;
435 //! Size of the relocation data/value (1, 2, 4 or 8 bytes).
436 uint8_t _valueSize;
437 //! Number of bytes after `_sourceOffset` to reach the value to be patched.
438 uint8_t _leadingSize;
439 //! Number of bytes after `_sourceOffset + _valueSize` to reach end of the
440 //! instruction.
441 uint8_t _trailingSize;
442 //! Source section id.
443 uint32_t _sourceSectionId;
444 //! Target section id.
445 uint32_t _targetSectionId;
446 //! Source offset (relative to start of the section).
447 uint64_t _sourceOffset;
448 //! Payload (target offset, target address, expression, etc).
449 uint64_t _payload;
450
451 //! Relocation type.
452 enum RelocType : uint32_t {
453 //! None/deleted (no relocation).
454 kTypeNone = 0,
455 //! Expression evaluation, `_payload` is pointer to `Expression`.
456 kTypeExpression = 1,
457 //! Relocate absolute to absolute.
458 kTypeAbsToAbs = 2,
459 //! Relocate relative to absolute.
460 kTypeRelToAbs = 3,
461 //! Relocate absolute to relative.
462 kTypeAbsToRel = 4,
463 //! Relocate absolute to relative or use trampoline.
464 kTypeX64AddressEntry = 5
465 };
466
467 //! \name Accessors
468 //! \{
469
470 inline uint32_t id() const noexcept { return _id; }
471
472 inline uint32_t relocType() const noexcept { return _relocType; }
473 inline uint32_t valueSize() const noexcept { return _valueSize; }
474
475 inline uint32_t leadingSize() const noexcept { return _leadingSize; }
476 inline uint32_t trailingSize() const noexcept { return _trailingSize; }
477
478 inline uint32_t sourceSectionId() const noexcept { return _sourceSectionId; }
479 inline uint32_t targetSectionId() const noexcept { return _targetSectionId; }
480
481 inline uint64_t sourceOffset() const noexcept { return _sourceOffset; }
482 inline uint64_t payload() const noexcept { return _payload; }
483
484 Expression* payloadAsExpression() const noexcept {
485 return reinterpret_cast<Expression*>(uintptr_t(_payload));
486 }
487
488 //! \}
489 };
490
491 // ============================================================================
492 // [asmjit::AddressTableEntry]
493 // ============================================================================
494
495 class AddressTableEntry : public ZoneTreeNodeT<AddressTableEntry> {
496 public:
497 ASMJIT_NONCOPYABLE(AddressTableEntry)
498
499 uint64_t _address;
500 uint32_t _slot;
501
502 //! \name Construction & Destruction
503 //! \{
504
505 inline explicit AddressTableEntry(uint64_t address) noexcept
506 : _address(address),
507 _slot(0xFFFFFFFFu) {}
508
509 //! \}
510
511 //! \name Accessors
512 //! \{
513
514 inline uint64_t address() const noexcept { return _address; }
515 inline uint32_t slot() const noexcept { return _slot; }
516
517 inline bool hasAssignedSlot() const noexcept { return _slot != 0xFFFFFFFFu; }
518
519 inline bool operator<(const AddressTableEntry& other) const noexcept { return _address < other._address; }
520 inline bool operator>(const AddressTableEntry& other) const noexcept { return _address > other._address; }
521
522 inline bool operator<(uint64_t queryAddress) const noexcept { return _address < queryAddress; }
523 inline bool operator>(uint64_t queryAddress) const noexcept { return _address > queryAddress; }
524
525 //! \}
526 };
527
528 // ============================================================================
529 // [asmjit::CodeHolder]
530 // ============================================================================
531
532 //! Contains basic information about the target architecture plus its settings,
533 //! and holds code & data (including sections, labels, and relocation information).
534 //! CodeHolder can store both binary and intermediate representation of assembly,
535 //! which can be generated by `BaseAssembler` and/or `BaseBuilder`.
536 //!
537 //! \note `CodeHolder` has ability to attach an `ErrorHandler`, however, the
538 //! error handler is not triggered by `CodeHolder` itself, it's only used by
539 //! emitters attached to `CodeHolder`.
540 class CodeHolder {
541 public:
542 ASMJIT_NONCOPYABLE(CodeHolder)
543
544 //! Basic information about the code (architecture and other info).
545 CodeInfo _codeInfo;
546 //! Emitter options, propagated to all emitters when changed.
547 uint32_t _emitterOptions;
548
549 //! Attached `Logger`, used by all consumers.
550 Logger* _logger;
551 //! Attached `ErrorHandler`.
552 ErrorHandler* _errorHandler;
553
554 //! Code zone (used to allocate core structures).
555 Zone _zone;
556 //! Zone allocator, used to manage internal containers.
557 ZoneAllocator _allocator;
558
559 //! Attached code emitters.
560 ZoneVector<BaseEmitter*> _emitters;
561 //! Section entries.
562 ZoneVector<Section*> _sections;
563 //! Label entries.
564 ZoneVector<LabelEntry*> _labelEntries;
565 //! Relocation entries.
566 ZoneVector<RelocEntry*> _relocations;
567 //! Label name -> LabelEntry (only named labels).
568 ZoneHash<LabelEntry> _namedLabels;
569
570 //! Count of label links, which are not resolved.
571 size_t _unresolvedLinkCount;
572 //! Pointer to an address table section (or null if this section doesn't exist).
573 Section* _addressTableSection;
574 //! Address table entries.
575 ZoneTree<AddressTableEntry> _addressTableEntries;
576
577 //! \name Construction & Destruction
578 //! \{
579
580 //! Creates an uninitialized CodeHolder (you must init() it before it can be used).
581 ASMJIT_API CodeHolder() noexcept;
582 //! Destroys the CodeHolder.
583 ASMJIT_API ~CodeHolder() noexcept;
584
585 inline bool isInitialized() const noexcept { return _codeInfo.isInitialized(); }
586
587 //! Initializes CodeHolder to hold code described by `codeInfo`.
588 ASMJIT_API Error init(const CodeInfo& info) noexcept;
589 //! Detaches all code-generators attached and resets the `CodeHolder`.
590 ASMJIT_API void reset(uint32_t resetPolicy = Globals::kResetSoft) noexcept;
591
592 //! \}
593
594 //! \name Attach & Detach
595 //! \{
596
597 //! Attaches an emitter to this `CodeHolder`.
598 ASMJIT_API Error attach(BaseEmitter* emitter) noexcept;
599 //! Detaches an emitter from this `CodeHolder`.
600 ASMJIT_API Error detach(BaseEmitter* emitter) noexcept;
601
602 //! \}
603
604 //! \name Allocators
605 //! \{
606
607 inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
608
609 //! \}
610
611 //! \name Code Emitter
612 //! \{
613
614 inline const ZoneVector<BaseEmitter*>& emitters() const noexcept { return _emitters; }
615
616 //! Returns global emitter options, internally propagated to all attached emitters.
617 inline uint32_t emitterOptions() const noexcept { return _emitterOptions; }
618
619 //! Enables the given global emitter `options` and propagates the resulting
620 //! options to all attached emitters.
621 ASMJIT_API void addEmitterOptions(uint32_t options) noexcept;
622
623 //! Disables the given global emitter `options` and propagates the resulting
624 //! options to all attached emitters.
625 ASMJIT_API void clearEmitterOptions(uint32_t options) noexcept;
626
627 //! \}
628
629 //! \name Code & Architecture
630 //! \{
631
632 //! Returns the target architecture information, see `ArchInfo`.
633 inline const ArchInfo& archInfo() const noexcept { return _codeInfo.archInfo(); }
634 //! Returns the target code information, see `CodeInfo`.
635 inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; }
636
637 //! Returns the target architecture id.
638 inline uint32_t archId() const noexcept { return archInfo().archId(); }
639 //! Returns the target architecture sub-id.
640 inline uint32_t archSubId() const noexcept { return archInfo().archSubId(); }
641
642 //! Tests whether a static base-address is set.
643 inline bool hasBaseAddress() const noexcept { return _codeInfo.hasBaseAddress(); }
644 //! Returns a static base-address (uint64_t).
645 inline uint64_t baseAddress() const noexcept { return _codeInfo.baseAddress(); }
646
647 //! \}
648
649 //! \name Logging & Error Handling
650 //! \{
651
652 //! Returns the attached logger.
653 inline Logger* logger() const noexcept { return _logger; }
654 //! Attaches a `logger` to CodeHolder and propagates it to all attached emitters.
655 ASMJIT_API void setLogger(Logger* logger) noexcept;
656 //! Resets the logger to none.
657 inline void resetLogger() noexcept { setLogger(nullptr); }
658
659 //! Tests whether the global error handler is attached.
660 inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
661 //! Returns the global error handler.
662 inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
663 //! Sets the global error handler.
664 inline void setErrorHandler(ErrorHandler* handler) noexcept { _errorHandler = handler; }
665 //! Resets the global error handler to none.
666 inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
667
668 //! \}
669
670 //! \name Code Buffer
671 //! \{
672
673 ASMJIT_API Error growBuffer(CodeBuffer* cb, size_t n) noexcept;
674 ASMJIT_API Error reserveBuffer(CodeBuffer* cb, size_t n) noexcept;
675
676 //! \}
677
678 //! \name Sections
679 //! \{
680
681 //! Returns an array of `Section*` records.
682 inline const ZoneVector<Section*>& sections() const noexcept { return _sections; }
683 //! Returns the number of sections.
684 inline uint32_t sectionCount() const noexcept { return _sections.size(); }
685
686 //! Tests whether the given `sectionId` is valid.
687 inline bool isSectionValid(uint32_t sectionId) const noexcept { return sectionId < _sections.size(); }
688
689 //! Creates a new section and return its pointer in `sectionOut`.
690 //!
691 //! Returns `Error`, does not report a possible error to `ErrorHandler`.
692 ASMJIT_API Error newSection(Section** sectionOut, const char* name, size_t nameSize = SIZE_MAX, uint32_t flags = 0, uint32_t alignment = 1) noexcept;
693
694 //! Returns a section entry of the given index.
695 inline Section* sectionById(uint32_t sectionId) const noexcept { return _sections[sectionId]; }
696
697 //! Returns section-id that matches the given `name`.
698 //!
699 //! If there is no such section `Section::kInvalidId` is returned.
700 ASMJIT_API Section* sectionByName(const char* name, size_t nameSize = SIZE_MAX) const noexcept;
701
702 //! Returns '.text' section (section that commonly represents code).
703 //!
704 //! \note Text section is always the first section in `CodeHolder::sections()` array.
705 inline Section* textSection() const noexcept { return _sections[0]; }
706
707 //! Tests whether '.addrtab' section exists.
708 inline bool hasAddressTable() const noexcept { return _addressTableSection != nullptr; }
709
710 //! Returns '.addrtab' section.
711 //!
712 //! This section is used exclusively by AsmJit to store absolute 64-bit
713 //! addresses that cannot be encoded in instructions like 'jmp' or 'call'.
714 inline Section* addressTableSection() const noexcept { return _addressTableSection; }
715
716 //! Ensures that '.addrtab' section exists (creates it if it doesn't) and
717 //! returns it. Can return `nullptr` on out of memory condition.
718 ASMJIT_API Section* ensureAddressTableSection() noexcept;
719
720 //! Used to add an address to an address table.
721 //!
722 //! This implicitly calls `ensureAddressTableSection()` and then creates
723 //! `AddressTableEntry` that is inserted to `_addressTableEntries`. If the
724 //! address already exists this operation does nothing as the same addresses
725 //! use the same slot.
726 //!
727 //! This function should be considered internal as it's used by assemblers to
728 //! insert an absolute address into the address table. Inserting address into
729 //! address table without creating a particula relocation entry makes no sense.
730 ASMJIT_API Error addAddressToAddressTable(uint64_t address) noexcept;
731
732 //! \}
733
734 //! \name Labels & Symbols
735 //! \{
736
737 //! Returns array of `LabelEntry*` records.
738 inline const ZoneVector<LabelEntry*>& labelEntries() const noexcept { return _labelEntries; }
739
740 //! Returns number of labels created.
741 inline uint32_t labelCount() const noexcept { return _labelEntries.size(); }
742
743 //! Tests whether the label having `id` is valid (i.e. created by `newLabelEntry()`).
744 inline bool isLabelValid(uint32_t labelId) const noexcept {
745 return labelId < _labelEntries.size();
746 }
747
748 //! Tests whether the `label` is valid (i.e. created by `newLabelEntry()`).
749 inline bool isLabelValid(const Label& label) const noexcept {
750 return label.id() < _labelEntries.size();
751 }
752
753 //! \overload
754 inline bool isLabelBound(uint32_t labelId) const noexcept {
755 return isLabelValid(labelId) && _labelEntries[labelId]->isBound();
756 }
757
758 //! Tests whether the `label` is already bound.
759 //!
760 //! Returns `false` if the `label` is not valid.
761 inline bool isLabelBound(const Label& label) const noexcept {
762 return isLabelBound(label.id());
763 }
764
765 //! Returns LabelEntry of the given label `id`.
766 inline LabelEntry* labelEntry(uint32_t labelId) const noexcept {
767 return isLabelValid(labelId) ? _labelEntries[labelId] : static_cast<LabelEntry*>(nullptr);
768 }
769
770 //! Returns LabelEntry of the given `label`.
771 inline LabelEntry* labelEntry(const Label& label) const noexcept {
772 return labelEntry(label.id());
773 }
774
775 //! Returns offset of a `Label` by its `labelId`.
776 //!
777 //! The offset returned is relative to the start of the section. Zero offset
778 //! is returned for unbound labels, which is their initial offset value.
779 inline uint64_t labelOffset(uint32_t labelId) const noexcept {
780 ASMJIT_ASSERT(isLabelValid(labelId));
781 return _labelEntries[labelId]->offset();
782 }
783
784 //! \overload
785 inline uint64_t labelOffset(const Label& label) const noexcept {
786 return labelOffset(label.id());
787 }
788
789 //! Returns offset of a label by it's `labelId` relative to the base offset.
790 //!
791 //! \remarks The offset of the section where the label is bound must be valid
792 //! in order to use this function, otherwise the value returned will not be
793 //! reliable.
794 inline uint64_t labelOffsetFromBase(uint32_t labelId) const noexcept {
795 ASMJIT_ASSERT(isLabelValid(labelId));
796 const LabelEntry* le = _labelEntries[labelId];
797 return (le->isBound() ? le->section()->offset() : uint64_t(0)) + le->offset();
798 }
799
800 //! \overload
801 inline uint64_t labelOffsetFromBase(const Label& label) const noexcept {
802 return labelOffsetFromBase(label.id());
803 }
804
805 //! Creates a new anonymous label and return its id in `idOut`.
806 //!
807 //! Returns `Error`, does not report error to `ErrorHandler`.
808 ASMJIT_API Error newLabelEntry(LabelEntry** entryOut) noexcept;
809
810 //! Creates a new named label label-type `type`.
811 //!
812 //! Returns `Error`, does not report a possible error to `ErrorHandler`.
813 ASMJIT_API Error newNamedLabelEntry(LabelEntry** entryOut, const char* name, size_t nameSize, uint32_t type, uint32_t parentId = Globals::kInvalidId) noexcept;
814
815 //! Returns a label id by name.
816 ASMJIT_API uint32_t labelIdByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
817
818 inline Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept {
819 return Label(labelIdByName(name, nameSize, parentId));
820 }
821
822 //! Tests whether there are any unresolved label links.
823 inline bool hasUnresolvedLinks() const noexcept { return _unresolvedLinkCount != 0; }
824 //! Returns the number of label links, which are unresolved.
825 inline size_t unresolvedLinkCount() const noexcept { return _unresolvedLinkCount; }
826
827 //! Creates a new label-link used to store information about yet unbound labels.
828 //!
829 //! Returns `null` if the allocation failed.
830 ASMJIT_API LabelLink* newLabelLink(LabelEntry* le, uint32_t sectionId, size_t offset, intptr_t rel) noexcept;
831
832 //! Resolves cross-section links (`LabelLink`) associated with each label that
833 //! was used as a destination in code of a different section. It's only useful
834 //! to people that use multiple sections as it will do nothing if the code only
835 //! contains a single section in which cross-section links are not possible.
836 ASMJIT_API Error resolveUnresolvedLinks() noexcept;
837
838 //! Binds a label to a given `sectionId` and `offset` (relative to start of the section).
839 //!
840 //! This function is generally used by `BaseAssembler::bind()` to do the heavy lifting.
841 ASMJIT_API Error bindLabel(const Label& label, uint32_t sectionId, uint64_t offset) noexcept;
842
843 //! \}
844
845 //! \name Relocations
846 //! \{
847
848 //! Tests whether the code contains relocation entries.
849 inline bool hasRelocEntries() const noexcept { return !_relocations.empty(); }
850 //! Returns array of `RelocEntry*` records.
851 inline const ZoneVector<RelocEntry*>& relocEntries() const noexcept { return _relocations; }
852
853 //! Returns a RelocEntry of the given `id`.
854 inline RelocEntry* relocEntry(uint32_t id) const noexcept { return _relocations[id]; }
855
856 //! Creates a new relocation entry of type `relocType` and size `valueSize`.
857 //!
858 //! Additional fields can be set after the relocation entry was created.
859 ASMJIT_API Error newRelocEntry(RelocEntry** dst, uint32_t relocType, uint32_t valueSize) noexcept;
860
861 //! \}
862
863 //! \name Utilities
864 //! \{
865
866 //! Flattens all sections by recalculating their offsets, starting at 0.
867 //!
868 //! \note This should never be called more than once.
869 ASMJIT_API Error flatten() noexcept;
870
871 //! Returns computed the size of code & data of all sections.
872 //!
873 //! \note All sections will be iterated over and the code size returned
874 //! would represent the minimum code size of all combined sections after
875 //! applying minimum alignment. Code size may decrease after calling
876 //! `flatten()` and `relocateToBase()`.
877 ASMJIT_API size_t codeSize() const noexcept;
878
879 //! Relocates the code to the given `baseAddress`.
880 //!
881 //! \param baseAddress Absolute base address where the code will be relocated
882 //! to. Please note that nothing is copied to such base address, it's just an
883 //! absolute value used by the relocator to resolve all stored relocations.
884 //!
885 //! \note This should never be called more than once.
886 ASMJIT_API Error relocateToBase(uint64_t baseAddress) noexcept;
887
888 //! Options that can be used with \ref copySectionData().
889 enum CopyOptions : uint32_t {
890 //! If virtual size of the section is larger than the size of its buffer
891 //! then all bytes between buffer size and virtual size will be zeroed.
892 kCopyWithPadding = 0x1
893 };
894
895 //! Copies a single section into `dst`.
896 ASMJIT_API Error copySectionData(void* dst, size_t dstSize, uint32_t sectionId, uint32_t options = 0) noexcept;
897
898 //! Copies all sections into `dst`.
899 //!
900 //! This should only be used if the data was flattened and there are no gaps
901 //! between the sections. The `dstSize` is always checked and the copy will
902 //! never write anything outside the provided buffer.
903 ASMJIT_API Error copyFlattenedData(void* dst, size_t dstSize, uint32_t options = 0) noexcept;
904
905 //! \}
906 };
907
908 //! \}
909
910 ASMJIT_END_NAMESPACE
911
912 #endif // _ASMJIT_CORE_CODEHOLDER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_COMPILER
8
9 #include "../core/assembler.h"
10 #include "../core/compiler.h"
11 #include "../core/cpuinfo.h"
12 #include "../core/logging.h"
13 #include "../core/rapass_p.h"
14 #include "../core/support.h"
15 #include "../core/type.h"
16
17 ASMJIT_BEGIN_NAMESPACE
18
19 // ============================================================================
20 // [asmjit::GlobalConstPoolPass]
21 // ============================================================================
22
23 class GlobalConstPoolPass : public Pass {
24 ASMJIT_NONCOPYABLE(GlobalConstPoolPass)
25 typedef Pass Base;
26
27 GlobalConstPoolPass() noexcept : Pass("GlobalConstPoolPass") {}
28
29 Error run(Zone* zone, Logger* logger) noexcept override {
30 ASMJIT_UNUSED(zone);
31 ASMJIT_UNUSED(logger);
32
33 // Flush the global constant pool.
34 BaseCompiler* compiler = static_cast<BaseCompiler*>(_cb);
35 if (compiler->_globalConstPool) {
36 compiler->addAfter(compiler->_globalConstPool, compiler->lastNode());
37 compiler->_globalConstPool = nullptr;
38 }
39 return kErrorOk;
40 }
41 };
42
43 // ============================================================================
44 // [asmjit::FuncCallNode - Arg / Ret]
45 // ============================================================================
46
47 bool FuncCallNode::_setArg(uint32_t i, const Operand_& op) noexcept {
48 if ((i & ~kFuncArgHi) >= _funcDetail.argCount())
49 return false;
50
51 _args[i] = op;
52 return true;
53 }
54
55 bool FuncCallNode::_setRet(uint32_t i, const Operand_& op) noexcept {
56 if (i >= 2)
57 return false;
58
59 _rets[i] = op;
60 return true;
61 }
62
63 // ============================================================================
64 // [asmjit::BaseCompiler - Construction / Destruction]
65 // ============================================================================
66
67 BaseCompiler::BaseCompiler() noexcept
68 : BaseBuilder(),
69 _func(nullptr),
70 _vRegZone(4096 - Zone::kBlockOverhead),
71 _vRegArray(),
72 _localConstPool(nullptr),
73 _globalConstPool(nullptr) {
74
75 _type = kTypeCompiler;
76 }
77 BaseCompiler::~BaseCompiler() noexcept {}
78
79 // ============================================================================
80 // [asmjit::BaseCompiler - Function API]
81 // ============================================================================
82
83 FuncNode* BaseCompiler::newFunc(const FuncSignature& sign) noexcept {
84 Error err;
85
86 FuncNode* func = newNodeT<FuncNode>();
87 if (ASMJIT_UNLIKELY(!func)) {
88 reportError(DebugUtils::errored(kErrorOutOfMemory));
89 return nullptr;
90 }
91
92 err = registerLabelNode(func);
93 if (ASMJIT_UNLIKELY(err)) {
94 // TODO: Calls reportError, maybe rethink noexcept?
95 reportError(err);
96 return nullptr;
97 }
98
99 // Create helper nodes.
100 func->_exitNode = newLabelNode();
101 func->_end = newNodeT<SentinelNode>(SentinelNode::kSentinelFuncEnd);
102
103 if (ASMJIT_UNLIKELY(!func->_exitNode || !func->_end)) {
104 reportError(DebugUtils::errored(kErrorOutOfMemory));
105 return nullptr;
106 }
107
108 // Initialize the function info.
109 err = func->detail().init(sign);
110 if (ASMJIT_UNLIKELY(err)) {
111 reportError(err);
112 return nullptr;
113 }
114
115 // If the Target guarantees greater stack alignment than required by the
116 // calling convention then override it as we can prevent having to perform
117 // dynamic stack alignment
118 if (func->_funcDetail._callConv.naturalStackAlignment() < _codeInfo.stackAlignment())
119 func->_funcDetail._callConv.setNaturalStackAlignment(_codeInfo.stackAlignment());
120
121 // Initialize the function frame.
122 err = func->_frame.init(func->_funcDetail);
123 if (ASMJIT_UNLIKELY(err)) {
124 reportError(err);
125 return nullptr;
126 }
127
128 // Allocate space for function arguments.
129 func->_args = nullptr;
130 if (func->argCount() != 0) {
131 func->_args = _allocator.allocT<VirtReg*>(func->argCount() * sizeof(VirtReg*));
132 if (ASMJIT_UNLIKELY(!func->_args)) {
133 reportError(DebugUtils::errored(kErrorOutOfMemory));
134 return nullptr;
135 }
136
137 memset(func->_args, 0, func->argCount() * sizeof(VirtReg*));
138 }
139
140 return func;
141 }
142
143 FuncNode* BaseCompiler::addFunc(FuncNode* func) {
144 ASMJIT_ASSERT(_func == nullptr);
145 _func = func;
146
147 addNode(func); // Function node.
148 BaseNode* prev = cursor(); // {CURSOR}.
149 addNode(func->exitNode()); // Function exit label.
150 addNode(func->endNode()); // Function end marker.
151
152 _setCursor(prev);
153 return func;
154 }
155
156 FuncNode* BaseCompiler::addFunc(const FuncSignature& sign) {
157 FuncNode* func = newFunc(sign);
158
159 if (!func) {
160 reportError(DebugUtils::errored(kErrorOutOfMemory));
161 return nullptr;
162 }
163
164 return addFunc(func);
165 }
166
167 Error BaseCompiler::endFunc() {
168 FuncNode* func = _func;
169 if (ASMJIT_UNLIKELY(!func))
170 return reportError(DebugUtils::errored(kErrorInvalidState));
171
172 // Add the local constant pool at the end of the function (if exists).
173 if (_localConstPool) {
174 setCursor(func->endNode()->prev());
175 addNode(_localConstPool);
176 _localConstPool = nullptr;
177 }
178
179 // Mark as finished.
180 _func = nullptr;
181
182 SentinelNode* end = func->endNode();
183 setCursor(end);
184 return kErrorOk;
185 }
186
187 Error BaseCompiler::setArg(uint32_t argIndex, const BaseReg& r) {
188 FuncNode* func = _func;
189
190 if (ASMJIT_UNLIKELY(!func))
191 return reportError(DebugUtils::errored(kErrorInvalidState));
192
193 if (ASMJIT_UNLIKELY(!isVirtRegValid(r)))
194 return reportError(DebugUtils::errored(kErrorInvalidVirtId));
195
196 VirtReg* vReg = virtRegByReg(r);
197 func->setArg(argIndex, vReg);
198
199 return kErrorOk;
200 }
201
202 FuncRetNode* BaseCompiler::newRet(const Operand_& o0, const Operand_& o1) noexcept {
203 FuncRetNode* node = newNodeT<FuncRetNode>();
204 if (!node) {
205 reportError(DebugUtils::errored(kErrorOutOfMemory));
206 return nullptr;
207 }
208
209 node->setOp(0, o0);
210 node->setOp(1, o1);
211 node->setOpCount(!o1.isNone() ? 2u : !o0.isNone() ? 1u : 0u);
212
213 return node;
214 }
215
216 FuncRetNode* BaseCompiler::addRet(const Operand_& o0, const Operand_& o1) noexcept {
217 FuncRetNode* node = newRet(o0, o1);
218 if (!node) return nullptr;
219 return addNode(node)->as<FuncRetNode>();
220 }
221
222 // ============================================================================
223 // [asmjit::BaseCompiler - Call]
224 // ============================================================================
225
226 FuncCallNode* BaseCompiler::newCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept {
227 FuncCallNode* node = newNodeT<FuncCallNode>(instId, 0u);
228 if (ASMJIT_UNLIKELY(!node)) {
229 reportError(DebugUtils::errored(kErrorOutOfMemory));
230 return nullptr;
231 }
232
233 node->setOpCount(1);
234 node->setOp(0, o0);
235 node->resetOp(1);
236 node->resetOp(2);
237 node->resetOp(3);
238
239 Error err = node->detail().init(sign);
240 if (ASMJIT_UNLIKELY(err)) {
241 reportError(err);
242 return nullptr;
243 }
244
245 // If there are no arguments skip the allocation.
246 uint32_t nArgs = sign.argCount();
247 if (!nArgs) return node;
248
249 node->_args = static_cast<Operand*>(_allocator.alloc(nArgs * sizeof(Operand)));
250 if (!node->_args) {
251 reportError(DebugUtils::errored(kErrorOutOfMemory));
252 return nullptr;
253 }
254
255 memset(node->_args, 0, nArgs * sizeof(Operand));
256 return node;
257 }
258
259 FuncCallNode* BaseCompiler::addCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept {
260 FuncCallNode* node = newCall(instId, o0, sign);
261 if (!node) return nullptr;
262 return addNode(node)->as<FuncCallNode>();
263 }
264
265 // ============================================================================
266 // [asmjit::BaseCompiler - Vars]
267 // ============================================================================
268
269 static void CodeCompiler_assignGenericName(BaseCompiler* self, VirtReg* vReg) {
270 uint32_t index = unsigned(Operand::virtIdToIndex(vReg->_id));
271
272 char buf[64];
273 int size = snprintf(buf, ASMJIT_ARRAY_SIZE(buf), "%%%u", unsigned(index));
274
275 ASMJIT_ASSERT(size > 0 && size < int(ASMJIT_ARRAY_SIZE(buf)));
276 vReg->_name.setData(&self->_dataZone, buf, unsigned(size));
277 }
278
279 VirtReg* BaseCompiler::newVirtReg(uint32_t typeId, uint32_t signature, const char* name) noexcept {
280 uint32_t index = _vRegArray.size();
281 if (ASMJIT_UNLIKELY(index >= uint32_t(Operand::kVirtIdCount)))
282 return nullptr;
283
284 if (_vRegArray.willGrow(&_allocator) != kErrorOk)
285 return nullptr;
286
287 VirtReg* vReg = _vRegZone.allocZeroedT<VirtReg>();
288 if (ASMJIT_UNLIKELY(!vReg)) return nullptr;
289
290 uint32_t size = Type::sizeOf(typeId);
291 uint32_t alignment = Support::min<uint32_t>(size, 64);
292
293 vReg = new(vReg) VirtReg(Operand::indexToVirtId(index), signature, size, alignment, typeId);
294
295 #ifndef ASMJIT_NO_LOGGING
296 if (name && name[0] != '\0')
297 vReg->_name.setData(&_dataZone, name, SIZE_MAX);
298 else
299 CodeCompiler_assignGenericName(this, vReg);
300 #endif
301
302 _vRegArray.appendUnsafe(vReg);
303 return vReg;
304 }
305
306 Error BaseCompiler::_newReg(BaseReg& out, uint32_t typeId, const char* name) {
307 RegInfo regInfo;
308
309 Error err = ArchUtils::typeIdToRegInfo(archId(), typeId, regInfo);
310 if (ASMJIT_UNLIKELY(err)) return reportError(err);
311
312 VirtReg* vReg = newVirtReg(typeId, regInfo.signature(), name);
313 if (ASMJIT_UNLIKELY(!vReg)) {
314 out.reset();
315 return reportError(DebugUtils::errored(kErrorOutOfMemory));
316 }
317
318 out._initReg(regInfo.signature(), vReg->id());
319 return kErrorOk;
320 }
321
322 Error BaseCompiler::_newRegFmt(BaseReg& out, uint32_t typeId, const char* fmt, ...) {
323 va_list ap;
324 StringTmp<256> sb;
325
326 va_start(ap, fmt);
327 sb.appendVFormat(fmt, ap);
328 va_end(ap);
329
330 return _newReg(out, typeId, sb.data());
331 }
332
333 Error BaseCompiler::_newReg(BaseReg& out, const BaseReg& ref, const char* name) {
334 RegInfo regInfo;
335 uint32_t typeId;
336
337 if (isVirtRegValid(ref)) {
338 VirtReg* vRef = virtRegByReg(ref);
339 typeId = vRef->typeId();
340
341 // NOTE: It's possible to cast one register type to another if it's the
342 // same register group. However, VirtReg always contains the TypeId that
343 // was used to create the register. This means that in some cases we may
344 // end up having different size of `ref` and `vRef`. In such case we
345 // adjust the TypeId to match the `ref` register type instead of the
346 // original register type, which should be the expected behavior.
347 uint32_t typeSize = Type::sizeOf(typeId);
348 uint32_t refSize = ref.size();
349
350 if (typeSize != refSize) {
351 if (Type::isInt(typeId)) {
352 // GP register - change TypeId to match `ref`, but keep sign of `vRef`.
353 switch (refSize) {
354 case 1: typeId = Type::kIdI8 | (typeId & 1); break;
355 case 2: typeId = Type::kIdI16 | (typeId & 1); break;
356 case 4: typeId = Type::kIdI32 | (typeId & 1); break;
357 case 8: typeId = Type::kIdI64 | (typeId & 1); break;
358 default: typeId = Type::kIdVoid; break;
359 }
360 }
361 else if (Type::isMmx(typeId)) {
362 // MMX register - always use 64-bit.
363 typeId = Type::kIdMmx64;
364 }
365 else if (Type::isMask(typeId)) {
366 // Mask register - change TypeId to match `ref` size.
367 switch (refSize) {
368 case 1: typeId = Type::kIdMask8; break;
369 case 2: typeId = Type::kIdMask16; break;
370 case 4: typeId = Type::kIdMask32; break;
371 case 8: typeId = Type::kIdMask64; break;
372 default: typeId = Type::kIdVoid; break;
373 }
374 }
375 else {
376 // VEC register - change TypeId to match `ref` size, keep vector metadata.
377 uint32_t elementTypeId = Type::baseOf(typeId);
378
379 switch (refSize) {
380 case 16: typeId = Type::_kIdVec128Start + (elementTypeId - Type::kIdI8); break;
381 case 32: typeId = Type::_kIdVec256Start + (elementTypeId - Type::kIdI8); break;
382 case 64: typeId = Type::_kIdVec512Start + (elementTypeId - Type::kIdI8); break;
383 default: typeId = Type::kIdVoid; break;
384 }
385 }
386
387 if (typeId == Type::kIdVoid)
388 return reportError(DebugUtils::errored(kErrorInvalidState));
389 }
390 }
391 else {
392 typeId = ref.type();
393 }
394
395 Error err = ArchUtils::typeIdToRegInfo(archId(), typeId, regInfo);
396 if (ASMJIT_UNLIKELY(err)) return reportError(err);
397
398 VirtReg* vReg = newVirtReg(typeId, regInfo.signature(), name);
399 if (ASMJIT_UNLIKELY(!vReg)) {
400 out.reset();
401 return reportError(DebugUtils::errored(kErrorOutOfMemory));
402 }
403
404 out._initReg(regInfo.signature(), vReg->id());
405 return kErrorOk;
406 }
407
408 Error BaseCompiler::_newRegFmt(BaseReg& out, const BaseReg& ref, const char* fmt, ...) {
409 va_list ap;
410 StringTmp<256> sb;
411
412 va_start(ap, fmt);
413 sb.appendVFormat(fmt, ap);
414 va_end(ap);
415
416 return _newReg(out, ref, sb.data());
417 }
418
419 Error BaseCompiler::_newStack(BaseMem& out, uint32_t size, uint32_t alignment, const char* name) {
420 if (size == 0)
421 return reportError(DebugUtils::errored(kErrorInvalidArgument));
422
423 if (alignment == 0)
424 alignment = 1;
425
426 if (!Support::isPowerOf2(alignment))
427 return reportError(DebugUtils::errored(kErrorInvalidArgument));
428
429 if (alignment > 64)
430 alignment = 64;
431
432 VirtReg* vReg = newVirtReg(0, 0, name);
433 if (ASMJIT_UNLIKELY(!vReg)) {
434 out.reset();
435 return reportError(DebugUtils::errored(kErrorOutOfMemory));
436 }
437
438 vReg->_virtSize = size;
439 vReg->_isStack = true;
440 vReg->_alignment = uint8_t(alignment);
441
442 // Set the memory operand to GPD/GPQ and its id to VirtReg.
443 out = BaseMem(BaseMem::Decomposed { _gpRegInfo.type(), vReg->id(), BaseReg::kTypeNone, 0, 0, 0, BaseMem::kSignatureMemRegHomeFlag });
444 return kErrorOk;
445 }
446
447 Error BaseCompiler::_newConst(BaseMem& out, uint32_t scope, const void* data, size_t size) {
448 ConstPoolNode** pPool;
449 if (scope == ConstPool::kScopeLocal)
450 pPool = &_localConstPool;
451 else if (scope == ConstPool::kScopeGlobal)
452 pPool = &_globalConstPool;
453 else
454 return reportError(DebugUtils::errored(kErrorInvalidArgument));
455
456 ConstPoolNode* pool = *pPool;
457 if (!pool) {
458 pool = newConstPoolNode();
459 if (ASMJIT_UNLIKELY(!pool))
460 return reportError(DebugUtils::errored(kErrorOutOfMemory));
461 *pPool = pool;
462 }
463
464 size_t off;
465 Error err = pool->add(data, size, off);
466
467 if (ASMJIT_UNLIKELY(err))
468 return reportError(err);
469
470 out = BaseMem(BaseMem::Decomposed {
471 Label::kLabelTag, // Base type.
472 pool->id(), // Base id.
473 0, // Index type.
474 0, // Index id.
475 int32_t(off), // Offset.
476 uint32_t(size), // Size.
477 0 // Flags.
478 });
479 return kErrorOk;
480 }
481
482 void BaseCompiler::rename(const BaseReg& reg, const char* fmt, ...) {
483 if (!reg.isVirtReg()) return;
484
485 VirtReg* vReg = virtRegById(reg.id());
486 if (!vReg) return;
487
488 if (fmt && fmt[0] != '\0') {
489 char buf[128];
490 va_list ap;
491
492 va_start(ap, fmt);
493 vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
494 va_end(ap);
495
496 vReg->_name.setData(&_dataZone, buf, SIZE_MAX);
497 }
498 else {
499 CodeCompiler_assignGenericName(this, vReg);
500 }
501 }
502
503 // ============================================================================
504 // [asmjit::BaseCompiler - Events]
505 // ============================================================================
506
507 Error BaseCompiler::onAttach(CodeHolder* code) noexcept {
508 ASMJIT_PROPAGATE(Base::onAttach(code));
509
510 Error err = addPassT<GlobalConstPoolPass>();
511 if (ASMJIT_UNLIKELY(err)) {
512 onDetach(code);
513 return err;
514 }
515
516 return kErrorOk;
517 }
518
519 Error BaseCompiler::onDetach(CodeHolder* code) noexcept {
520 _func = nullptr;
521 _localConstPool = nullptr;
522 _globalConstPool = nullptr;
523
524 _vRegArray.reset();
525 _vRegZone.reset();
526
527 return Base::onDetach(code);
528 }
529
530 // ============================================================================
531 // [asmjit::FuncPass - Construction / Destruction]
532 // ============================================================================
533
534 FuncPass::FuncPass(const char* name) noexcept
535 : Pass(name) {}
536
537 // ============================================================================
538 // [asmjit::FuncPass - Run]
539 // ============================================================================
540
541 Error FuncPass::run(Zone* zone, Logger* logger) noexcept {
542 BaseNode* node = cb()->firstNode();
543 if (!node) return kErrorOk;
544
545 do {
546 if (node->type() == BaseNode::kNodeFunc) {
547 FuncNode* func = node->as<FuncNode>();
548 node = func->endNode();
549 ASMJIT_PROPAGATE(runOnFunction(zone, logger, func));
550 }
551
552 // Find a function by skipping all nodes that are not `kNodeFunc`.
553 do {
554 node = node->next();
555 } while (node && node->type() != BaseNode::kNodeFunc);
556 } while (node);
557
558 return kErrorOk;
559 }
560
561 ASMJIT_END_NAMESPACE
562
563 #endif // !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_COMPILER_H
7 #define _ASMJIT_CORE_COMPILER_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/assembler.h"
13 #include "../core/builder.h"
14 #include "../core/constpool.h"
15 #include "../core/func.h"
16 #include "../core/inst.h"
17 #include "../core/operand.h"
18 #include "../core/support.h"
19 #include "../core/zone.h"
20 #include "../core/zonevector.h"
21
22 ASMJIT_BEGIN_NAMESPACE
23
24 // ============================================================================
25 // [Forward Declarations]
26 // ============================================================================
27
28 struct RATiedReg;
29 class RAWorkReg;
30
31 class FuncNode;
32 class FuncRetNode;
33 class FuncCallNode;
34
35 //! \addtogroup asmjit_compiler
36 //! \{
37
38 // ============================================================================
39 // [asmjit::VirtReg]
40 // ============================================================================
41
42 //! Virtual register data (BaseCompiler).
43 class VirtReg {
44 public:
45 ASMJIT_NONCOPYABLE(VirtReg)
46
47 //! Virtual register id.
48 uint32_t _id;
49 //! Virtual register info (signature).
50 RegInfo _info;
51 //! Virtual register size (can be smaller than `regInfo._size`).
52 uint32_t _virtSize;
53 //! Virtual register alignment (for spilling).
54 uint8_t _alignment;
55 //! Type-id.
56 uint8_t _typeId;
57 //! Virtual register weight for alloc/spill decisions.
58 uint8_t _weight;
59 //! True if this is a fixed register, never reallocated.
60 uint8_t _isFixed : 1;
61 //! True if the virtual register is only used as a stack (never accessed as register).
62 uint8_t _isStack : 1;
63 uint8_t _reserved : 6;
64
65 //! Virtual register name (user provided or automatically generated).
66 ZoneString<16> _name;
67
68 // -------------------------------------------------------------------------
69 // The following members are used exclusively by RAPass. They are initialized
70 // when the VirtReg is created to NULL pointers and then changed during RAPass
71 // execution. RAPass sets them back to NULL before it returns.
72 // -------------------------------------------------------------------------
73
74 //! Reference to `RAWorkReg`, used during register allocation.
75 RAWorkReg* _workReg;
76
77 //! \name Construction & Destruction
78 //! \{
79
80 inline VirtReg(uint32_t id, uint32_t signature, uint32_t virtSize, uint32_t alignment, uint32_t typeId) noexcept
81 : _id(id),
82 _virtSize(virtSize),
83 _alignment(uint8_t(alignment)),
84 _typeId(uint8_t(typeId)),
85 _weight(1),
86 _isFixed(false),
87 _isStack(false),
88 _reserved(0),
89 _name(),
90 _workReg(nullptr) { _info._signature = signature; }
91
92 //! \}
93
94 //! \name Accessors
95 //! \{
96
97 //! Returns the virtual register id.
98 inline uint32_t id() const noexcept { return _id; }
99
100 //! Returns the virtual register name.
101 inline const char* name() const noexcept { return _name.data(); }
102 //! Returns the size of the virtual register name.
103 inline uint32_t nameSize() const noexcept { return _name.size(); }
104
105 //! Returns a register information that wraps the register signature.
106 inline const RegInfo& info() const noexcept { return _info; }
107 //! Returns a virtual register type (maps to the physical register type as well).
108 inline uint32_t type() const noexcept { return _info.type(); }
109 //! Returns a virtual register group (maps to the physical register group as well).
110 inline uint32_t group() const noexcept { return _info.group(); }
111
112 //! Returns a real size of the register this virtual register maps to.
113 //!
114 //! For example if this is a 128-bit SIMD register used for a scalar single
115 //! precision floating point value then its virtSize would be 4, however, the
116 //! `regSize` would still say 16 (128-bits), because it's the smallest size
117 //! of that register type.
118 inline uint32_t regSize() const noexcept { return _info.size(); }
119
120 //! Returns a register signature of this virtual register.
121 inline uint32_t signature() const noexcept { return _info.signature(); }
122
123 //! Returns the virtual register size.
124 //!
125 //! The virtual register size describes how many bytes the virtual register
126 //! needs to store its content. It can be smaller than the physical register
127 //! size, see `regSize()`.
128 inline uint32_t virtSize() const noexcept { return _virtSize; }
129
130 //! Returns the virtual register alignment.
131 inline uint32_t alignment() const noexcept { return _alignment; }
132
133 //! Returns the virtual register type id, see `Type::Id`.
134 inline uint32_t typeId() const noexcept { return _typeId; }
135
136 //! Returns the virtual register weight - the register allocator can use it
137 //! as explicit hint for alloc/spill decisions.
138 inline uint32_t weight() const noexcept { return _weight; }
139 //! Sets the virtual register weight (0 to 255) - the register allocator can
140 //! use it as explicit hint for alloc/spill decisions and initial bin-packing.
141 inline void setWeight(uint32_t weight) noexcept { _weight = uint8_t(weight); }
142
143 //! Returns whether the virtual register is always allocated to a fixed
144 //! physical register (and never reallocated).
145 //!
146 //! \note This is only used for special purposes and it's mostly internal.
147 inline bool isFixed() const noexcept { return bool(_isFixed); }
148
149 //! Returns whether the virtual register is indeed a stack that only uses
150 //! the virtual register id for making it accessible.
151 //!
152 //! \note It's an error if a stack is accessed as a register.
153 inline bool isStack() const noexcept { return bool(_isStack); }
154
155 inline bool hasWorkReg() const noexcept { return _workReg != nullptr; }
156 inline RAWorkReg* workReg() const noexcept { return _workReg; }
157 inline void setWorkReg(RAWorkReg* workReg) noexcept { _workReg = workReg; }
158 inline void resetWorkReg() noexcept { _workReg = nullptr; }
159
160 //! \}
161 };
162
163 // ============================================================================
164 // [asmjit::BaseCompiler]
165 // ============================================================================
166
167 //! Code emitter that uses virtual registers and performs register allocation.
168 //!
169 //! Compiler is a high-level code-generation tool that provides register
170 //! allocation and automatic handling of function calling conventions. It was
171 //! primarily designed for merging multiple parts of code into a function
172 //! without worrying about registers and function calling conventions.
173 //!
174 //! BaseCompiler can be used, with a minimum effort, to handle 32-bit and 64-bit
175 //! code at the same time.
176 //!
177 //! BaseCompiler is based on BaseBuilder and contains all the features it
178 //! provides. It means that the code it stores can be modified (removed, added,
179 //! injected) and analyzed. When the code is finalized the compiler can emit
180 //! the code into an Assembler to translate the abstract representation into a
181 //! machine code.
182 class ASMJIT_VIRTAPI BaseCompiler : public BaseBuilder {
183 public:
184 ASMJIT_NONCOPYABLE(BaseCompiler)
185 typedef BaseBuilder Base;
186
187 //! Current function.
188 FuncNode* _func;
189 //! Allocates `VirtReg` objects.
190 Zone _vRegZone;
191 //! Stores array of `VirtReg` pointers.
192 ZoneVector<VirtReg*> _vRegArray;
193
194 //! Local constant pool, flushed at the end of each function.
195 ConstPoolNode* _localConstPool;
196 //! Global constant pool, flushed by `finalize()`.
197 ConstPoolNode* _globalConstPool;
198
199 //! \name Construction & Destruction
200 //! \{
201
202 //! Creates a new `BaseCompiler` instance.
203 ASMJIT_API BaseCompiler() noexcept;
204 //! Destroys the `BaseCompiler` instance.
205 ASMJIT_API virtual ~BaseCompiler() noexcept;
206
207 //! \}
208
209 //! \name Function API
210 //! \{
211
212 //! Returns the current function.
213 inline FuncNode* func() const noexcept { return _func; }
214
215 //! Creates a new `FuncNode`.
216 ASMJIT_API FuncNode* newFunc(const FuncSignature& sign) noexcept;
217 //! Adds a function `node` to the stream.
218 ASMJIT_API FuncNode* addFunc(FuncNode* func);
219 //! Adds a new function.
220 ASMJIT_API FuncNode* addFunc(const FuncSignature& sign);
221 //! Emits a sentinel that marks the end of the current function.
222 ASMJIT_API Error endFunc();
223
224 //! Sets a function argument at `argIndex` to `reg`.
225 ASMJIT_API Error setArg(uint32_t argIndex, const BaseReg& reg);
226
227 //! Creates a new `FuncRetNode`.
228 ASMJIT_API FuncRetNode* newRet(const Operand_& o0, const Operand_& o1) noexcept;
229 //! Adds a new `FuncRetNode`.
230 ASMJIT_API FuncRetNode* addRet(const Operand_& o0, const Operand_& o1) noexcept;
231
232 //! \}
233
234 //! \name Function Calls
235 //! \{
236
237 //! Creates a new `FuncCallNode`.
238 ASMJIT_API FuncCallNode* newCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept;
239 //! Adds a new `FuncCallNode`.
240 ASMJIT_API FuncCallNode* addCall(uint32_t instId, const Operand_& o0, const FuncSignature& sign) noexcept;
241
242 //! \}
243
244 //! \name Virtual Registers
245 //! \{
246
247 //! Creates a new virtual register representing the given `typeId` and `signature`.
248 ASMJIT_API VirtReg* newVirtReg(uint32_t typeId, uint32_t signature, const char* name) noexcept;
249
250 ASMJIT_API Error _newReg(BaseReg& out, uint32_t typeId, const char* name = nullptr);
251 ASMJIT_API Error _newRegFmt(BaseReg& out, uint32_t typeId, const char* fmt, ...);
252
253 ASMJIT_API Error _newReg(BaseReg& out, const BaseReg& ref, const char* name = nullptr);
254 ASMJIT_API Error _newRegFmt(BaseReg& out, const BaseReg& ref, const char* fmt, ...);
255
256 //! Tests whether the given `id` is a valid virtual register id.
257 inline bool isVirtIdValid(uint32_t id) const noexcept {
258 uint32_t index = Operand::virtIdToIndex(id);
259 return index < _vRegArray.size();
260 }
261 //! Tests whether the given `reg` is a virtual register having a valid id.
262 inline bool isVirtRegValid(const BaseReg& reg) const noexcept {
263 return isVirtIdValid(reg.id());
264 }
265
266 //! Returns `VirtReg` associated with the given `id`.
267 inline VirtReg* virtRegById(uint32_t id) const noexcept {
268 ASMJIT_ASSERT(isVirtIdValid(id));
269 return _vRegArray[Operand::virtIdToIndex(id)];
270 }
271 //! Returns `VirtReg` associated with the given `reg`.
272 inline VirtReg* virtRegByReg(const BaseReg& reg) const noexcept { return virtRegById(reg.id()); }
273 //! Returns `VirtReg` associated with the given `index`.
274 inline VirtReg* virtRegByIndex(uint32_t index) const noexcept { return _vRegArray[index]; }
275
276 //! Returns an array of all virtual registers managed by the Compiler.
277 inline const ZoneVector<VirtReg*>& virtRegs() const noexcept { return _vRegArray; }
278
279 //! \name Stack
280 //! \{
281
282 ASMJIT_API Error _newStack(BaseMem& out, uint32_t size, uint32_t alignment, const char* name = nullptr);
283
284 //! \}
285
286 //! \name Constants
287 //! \{
288
289 ASMJIT_API Error _newConst(BaseMem& out, uint32_t scope, const void* data, size_t size);
290
291 //! \}
292
293 //! \name Miscellaneous
294 //! \{
295
296 //! Rename the given virtual register `reg` to a formatted string `fmt`.
297 //!
298 //! \note Only new name will appear in the logger.
299 ASMJIT_API void rename(const BaseReg& reg, const char* fmt, ...);
300
301 //! \}
302
303 // TODO: These should be removed
304 inline void alloc(BaseReg& reg) { ASMJIT_UNUSED(reg); }
305 inline void spill(BaseReg& reg) { ASMJIT_UNUSED(reg); }
306
307 //! \name Events
308 //! \{
309
310 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
311 ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
312
313 //! \}
314 };
315
316 // ============================================================================
317 // [asmjit::FuncNode]
318 // ============================================================================
319
320 //! Function entry (BaseCompiler).
321 class FuncNode : public LabelNode {
322 public:
323 ASMJIT_NONCOPYABLE(FuncNode)
324
325 //! Function detail.
326 FuncDetail _funcDetail;
327 //! Function frame.
328 FuncFrame _frame;
329 //! Function exit (label).
330 LabelNode* _exitNode;
331 //! Function end (sentinel).
332 SentinelNode* _end;
333 //! Arguments array as `VirtReg`.
334 VirtReg** _args;
335
336 //! \name Construction & Destruction
337 //! \{
338
339 //! Creates a new `FuncNode` instance.
340 //!
341 //! Always use `BaseCompiler::addFunc()` to create `FuncNode`.
342 ASMJIT_INLINE FuncNode(BaseBuilder* cb) noexcept
343 : LabelNode(cb),
344 _funcDetail(),
345 _frame(),
346 _exitNode(nullptr),
347 _end(nullptr),
348 _args(nullptr) {
349 setType(kNodeFunc);
350 }
351
352 //! \}
353
354 //! \{
355 //! \name Accessors
356
357 //! Returns function exit `LabelNode`.
358 inline LabelNode* exitNode() const noexcept { return _exitNode; }
359 //! Returns function exit label.
360 inline Label exitLabel() const noexcept { return _exitNode->label(); }
361
362 //! Returns "End of Func" sentinel.
363 inline SentinelNode* endNode() const noexcept { return _end; }
364
365 //! Returns function declaration.
366 inline FuncDetail& detail() noexcept { return _funcDetail; }
367 //! Returns function declaration.
368 inline const FuncDetail& detail() const noexcept { return _funcDetail; }
369
370 //! Returns function frame.
371 inline FuncFrame& frame() noexcept { return _frame; }
372 //! Returns function frame.
373 inline const FuncFrame& frame() const noexcept { return _frame; }
374
375 //! Returns arguments count.
376 inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
377 //! Returns returns count.
378 inline uint32_t retCount() const noexcept { return _funcDetail.retCount(); }
379
380 //! Returns arguments list.
381 inline VirtReg** args() const noexcept { return _args; }
382
383 //! Returns argument at `i`.
384 inline VirtReg* arg(uint32_t i) const noexcept {
385 ASMJIT_ASSERT(i < argCount());
386 return _args[i];
387 }
388
389 //! Sets argument at `i`.
390 inline void setArg(uint32_t i, VirtReg* vReg) noexcept {
391 ASMJIT_ASSERT(i < argCount());
392 _args[i] = vReg;
393 }
394
395 //! Resets argument at `i`.
396 inline void resetArg(uint32_t i) noexcept {
397 ASMJIT_ASSERT(i < argCount());
398 _args[i] = nullptr;
399 }
400
401 inline uint32_t attributes() const noexcept { return _frame.attributes(); }
402 inline void addAttributes(uint32_t attrs) noexcept { _frame.addAttributes(attrs); }
403
404 //! \}
405 };
406
407 // ============================================================================
408 // [asmjit::FuncRetNode]
409 // ============================================================================
410
411 //! Function return (BaseCompiler).
412 class FuncRetNode : public InstNode {
413 public:
414 ASMJIT_NONCOPYABLE(FuncRetNode)
415
416 //! \name Construction & Destruction
417 //! \{
418
419 //! Creates a new `FuncRetNode` instance.
420 inline FuncRetNode(BaseBuilder* cb) noexcept : InstNode(cb, BaseInst::kIdAbstract, 0, 0) {
421 _any._nodeType = kNodeFuncRet;
422 }
423
424 //! \}
425 };
426
427 // ============================================================================
428 // [asmjit::FuncCallNode]
429 // ============================================================================
430
431 //! Function call (BaseCompiler).
432 class FuncCallNode : public InstNode {
433 public:
434 ASMJIT_NONCOPYABLE(FuncCallNode)
435
436 //! Function detail.
437 FuncDetail _funcDetail;
438 //! Returns.
439 Operand_ _rets[2];
440 //! Arguments.
441 Operand_* _args;
442
443 //! \name Construction & Destruction
444 //! \{
445
446 //! Creates a new `FuncCallNode` instance.
447 inline FuncCallNode(BaseBuilder* cb, uint32_t instId, uint32_t options) noexcept
448 : InstNode(cb, instId, options, kBaseOpCapacity),
449 _funcDetail(),
450 _args(nullptr) {
451 setType(kNodeFuncCall);
452 _resetOps();
453 _rets[0].reset();
454 _rets[1].reset();
455 addFlags(kFlagIsRemovable);
456 }
457
458 //! \}
459
460 //! \name Accessors
461 //! \{
462
463 //! Sets the function signature.
464 inline Error setSignature(const FuncSignature& sign) noexcept {
465 return _funcDetail.init(sign);
466 }
467
468 //! Returns the function detail.
469 inline FuncDetail& detail() noexcept { return _funcDetail; }
470 //! Returns the function detail.
471 inline const FuncDetail& detail() const noexcept { return _funcDetail; }
472
473 //! Returns the target operand.
474 inline Operand& target() noexcept { return _opArray[0].as<Operand>(); }
475 //! \overload
476 inline const Operand& target() const noexcept { return _opArray[0].as<Operand>(); }
477
478 //! Returns the number of function arguments.
479 inline uint32_t argCount() const noexcept { return _funcDetail.argCount(); }
480 //! Returns the number of function return values.
481 inline uint32_t retCount() const noexcept { return _funcDetail.retCount(); }
482
483 //! Returns the return value at `i`.
484 inline Operand& ret(uint32_t i = 0) noexcept {
485 ASMJIT_ASSERT(i < 2);
486 return _rets[i].as<Operand>();
487 }
488 //! \overload
489 inline const Operand& ret(uint32_t i = 0) const noexcept {
490 ASMJIT_ASSERT(i < 2);
491 return _rets[i].as<Operand>();
492 }
493
494 //! Returns the function argument at `i`.
495 inline Operand& arg(uint32_t i) noexcept {
496 ASMJIT_ASSERT(i < kFuncArgCountLoHi);
497 return _args[i].as<Operand>();
498 }
499 //! \overload
500 inline const Operand& arg(uint32_t i) const noexcept {
501 ASMJIT_ASSERT(i < kFuncArgCountLoHi);
502 return _args[i].as<Operand>();
503 }
504
505 //! Sets the function argument at `i` to `op`.
506 ASMJIT_API bool _setArg(uint32_t i, const Operand_& op) noexcept;
507 //! Sets the function return value at `i` to `op`.
508 ASMJIT_API bool _setRet(uint32_t i, const Operand_& op) noexcept;
509
510 //! Sets the function argument at `i` to `reg`.
511 inline bool setArg(uint32_t i, const BaseReg& reg) noexcept { return _setArg(i, reg); }
512 //! Sets the function argument at `i` to `imm`.
513 inline bool setArg(uint32_t i, const Imm& imm) noexcept { return _setArg(i, imm); }
514
515 //! Sets the function return value at `i` to `var`.
516 inline bool setRet(uint32_t i, const BaseReg& reg) noexcept { return _setRet(i, reg); }
517
518 //! \}
519 };
520
521 // ============================================================================
522 // [asmjit::FuncPass]
523 // ============================================================================
524
525 class ASMJIT_VIRTAPI FuncPass : public Pass {
526 public:
527 ASMJIT_NONCOPYABLE(FuncPass)
528 typedef Pass Base;
529
530 //! \name Construction & Destruction
531 //! \{
532
533 ASMJIT_API FuncPass(const char* name) noexcept;
534
535 //! \}
536
537 //! \name Accessors
538 //! \{
539
540 //! Returns the associated `BaseCompiler`.
541 inline BaseCompiler* cc() const noexcept { return static_cast<BaseCompiler*>(_cb); }
542
543 //! \}
544
545 //! \name Run
546 //! \{
547
548 //! Calls `runOnFunction()` on each `FuncNode` node found.
549 ASMJIT_API Error run(Zone* zone, Logger* logger) noexcept override;
550
551 //! Called once per `FuncNode`.
552 virtual Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept = 0;
553
554 //! \}
555 };
556
557 //! \}
558
559 ASMJIT_END_NAMESPACE
560
561 #endif // !ASMJIT_NO_COMPILER
562 #endif // _ASMJIT_CORE_COMPILER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/constpool.h"
8 #include "../core/support.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::ConstPool - Construction / Destruction]
14 // ============================================================================
15
16 ConstPool::ConstPool(Zone* zone) noexcept { reset(zone); }
17 ConstPool::~ConstPool() noexcept {}
18
19 // ============================================================================
20 // [asmjit::ConstPool - Reset]
21 // ============================================================================
22
23 void ConstPool::reset(Zone* zone) noexcept {
24 _zone = zone;
25
26 size_t dataSize = 1;
27 for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
28 _tree[i].reset();
29 _tree[i].setDataSize(dataSize);
30 _gaps[i] = nullptr;
31 dataSize <<= 1;
32 }
33
34 _gapPool = nullptr;
35 _size = 0;
36 _alignment = 0;
37 }
38
39 // ============================================================================
40 // [asmjit::ConstPool - Ops]
41 // ============================================================================
42
43 static ASMJIT_INLINE ConstPool::Gap* ConstPool_allocGap(ConstPool* self) noexcept {
44 ConstPool::Gap* gap = self->_gapPool;
45 if (!gap)
46 return self->_zone->allocT<ConstPool::Gap>();
47
48 self->_gapPool = gap->_next;
49 return gap;
50 }
51
52 static ASMJIT_INLINE void ConstPool_freeGap(ConstPool* self, ConstPool::Gap* gap) noexcept {
53 gap->_next = self->_gapPool;
54 self->_gapPool = gap;
55 }
56
57 static void ConstPool_addGap(ConstPool* self, size_t offset, size_t size) noexcept {
58 ASMJIT_ASSERT(size > 0);
59
60 while (size > 0) {
61 size_t gapIndex;
62 size_t gapSize;
63
64 if (size >= 16 && Support::isAligned<size_t>(offset, 16)) {
65 gapIndex = ConstPool::kIndex16;
66 gapSize = 16;
67 }
68 else if (size >= 8 && Support::isAligned<size_t>(offset, 8)) {
69 gapIndex = ConstPool::kIndex8;
70 gapSize = 8;
71 }
72 else if (size >= 4 && Support::isAligned<size_t>(offset, 4)) {
73 gapIndex = ConstPool::kIndex4;
74 gapSize = 4;
75 }
76 else if (size >= 2 && Support::isAligned<size_t>(offset, 2)) {
77 gapIndex = ConstPool::kIndex2;
78 gapSize = 2;
79 }
80 else {
81 gapIndex = ConstPool::kIndex1;
82 gapSize = 1;
83 }
84
85 // We don't have to check for errors here, if this failed nothing really
86 // happened (just the gap won't be visible) and it will fail again at
87 // place where the same check would generate `kErrorOutOfMemory` error.
88 ConstPool::Gap* gap = ConstPool_allocGap(self);
89 if (!gap)
90 return;
91
92 gap->_next = self->_gaps[gapIndex];
93 self->_gaps[gapIndex] = gap;
94
95 gap->_offset = offset;
96 gap->_size = gapSize;
97
98 offset += gapSize;
99 size -= gapSize;
100 }
101 }
102
103 Error ConstPool::add(const void* data, size_t size, size_t& dstOffset) noexcept {
104 size_t treeIndex;
105
106 if (size == 32)
107 treeIndex = kIndex32;
108 else if (size == 16)
109 treeIndex = kIndex16;
110 else if (size == 8)
111 treeIndex = kIndex8;
112 else if (size == 4)
113 treeIndex = kIndex4;
114 else if (size == 2)
115 treeIndex = kIndex2;
116 else if (size == 1)
117 treeIndex = kIndex1;
118 else
119 return DebugUtils::errored(kErrorInvalidArgument);
120
121 ConstPool::Node* node = _tree[treeIndex].get(data);
122 if (node) {
123 dstOffset = node->_offset;
124 return kErrorOk;
125 }
126
127 // Before incrementing the current offset try if there is a gap that can
128 // be used for the requested data.
129 size_t offset = ~size_t(0);
130 size_t gapIndex = treeIndex;
131
132 while (gapIndex != kIndexCount - 1) {
133 ConstPool::Gap* gap = _gaps[treeIndex];
134
135 // Check if there is a gap.
136 if (gap) {
137 size_t gapOffset = gap->_offset;
138 size_t gapSize = gap->_size;
139
140 // Destroy the gap for now.
141 _gaps[treeIndex] = gap->_next;
142 ConstPool_freeGap(this, gap);
143
144 offset = gapOffset;
145 ASMJIT_ASSERT(Support::isAligned<size_t>(offset, size));
146
147 gapSize -= size;
148 if (gapSize > 0)
149 ConstPool_addGap(this, gapOffset, gapSize);
150 }
151
152 gapIndex++;
153 }
154
155 if (offset == ~size_t(0)) {
156 // Get how many bytes have to be skipped so the address is aligned accordingly
157 // to the 'size'.
158 size_t diff = Support::alignUpDiff<size_t>(_size, size);
159
160 if (diff != 0) {
161 ConstPool_addGap(this, _size, diff);
162 _size += diff;
163 }
164
165 offset = _size;
166 _size += size;
167 }
168
169 // Add the initial node to the right index.
170 node = ConstPool::Tree::_newNode(_zone, data, size, offset, false);
171 if (!node) return DebugUtils::errored(kErrorOutOfMemory);
172
173 _tree[treeIndex].insert(node);
174 _alignment = Support::max<size_t>(_alignment, size);
175
176 dstOffset = offset;
177
178 // Now create a bunch of shared constants that are based on the data pattern.
179 // We stop at size 4, it probably doesn't make sense to split constants down
180 // to 1 byte.
181 size_t pCount = 1;
182 while (size > 4) {
183 size >>= 1;
184 pCount <<= 1;
185
186 ASMJIT_ASSERT(treeIndex != 0);
187 treeIndex--;
188
189 const uint8_t* pData = static_cast<const uint8_t*>(data);
190 for (size_t i = 0; i < pCount; i++, pData += size) {
191 node = _tree[treeIndex].get(pData);
192 if (node) continue;
193
194 node = ConstPool::Tree::_newNode(_zone, pData, size, offset + (i * size), true);
195 _tree[treeIndex].insert(node);
196 }
197 }
198
199 return kErrorOk;
200 }
201
202 // ============================================================================
203 // [asmjit::ConstPool - Reset]
204 // ============================================================================
205
206 struct ConstPoolFill {
207 inline ConstPoolFill(uint8_t* dst, size_t dataSize) noexcept :
208 _dst(dst),
209 _dataSize(dataSize) {}
210
211 inline void operator()(const ConstPool::Node* node) noexcept {
212 if (!node->_shared)
213 memcpy(_dst + node->_offset, node->data(), _dataSize);
214 }
215
216 uint8_t* _dst;
217 size_t _dataSize;
218 };
219
220 void ConstPool::fill(void* dst) const noexcept {
221 // Clears possible gaps, asmjit should never emit garbage to the output.
222 memset(dst, 0, _size);
223
224 ConstPoolFill filler(static_cast<uint8_t*>(dst), 1);
225 for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_tree); i++) {
226 _tree[i].forEach(filler);
227 filler._dataSize <<= 1;
228 }
229 }
230
231 // ============================================================================
232 // [asmjit::ConstPool - Unit]
233 // ============================================================================
234
235 #if defined(ASMJIT_TEST)
236 UNIT(const_pool) {
237 Zone zone(32384 - Zone::kBlockOverhead);
238 ConstPool pool(&zone);
239
240 uint32_t i;
241 uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 1000000;
242
243 INFO("Adding %u constants to the pool.", kCount);
244 {
245 size_t prevOffset;
246 size_t curOffset;
247 uint64_t c = 0x0101010101010101u;
248
249 EXPECT(pool.add(&c, 8, prevOffset) == kErrorOk);
250 EXPECT(prevOffset == 0);
251
252 for (i = 1; i < kCount; i++) {
253 c++;
254 EXPECT(pool.add(&c, 8, curOffset) == kErrorOk);
255 EXPECT(prevOffset + 8 == curOffset);
256 EXPECT(pool.size() == (i + 1) * 8);
257 prevOffset = curOffset;
258 }
259
260 EXPECT(pool.alignment() == 8);
261 }
262
263 INFO("Retrieving %u constants from the pool.", kCount);
264 {
265 uint64_t c = 0x0101010101010101u;
266
267 for (i = 0; i < kCount; i++) {
268 size_t offset;
269 EXPECT(pool.add(&c, 8, offset) == kErrorOk);
270 EXPECT(offset == i * 8);
271 c++;
272 }
273 }
274
275 INFO("Checking if the constants were split into 4-byte patterns");
276 {
277 uint32_t c = 0x01010101;
278 for (i = 0; i < kCount; i++) {
279 size_t offset;
280 EXPECT(pool.add(&c, 4, offset) == kErrorOk);
281 EXPECT(offset == i * 8);
282 c++;
283 }
284 }
285
286 INFO("Adding 2 byte constant to misalign the current offset");
287 {
288 uint16_t c = 0xFFFF;
289 size_t offset;
290
291 EXPECT(pool.add(&c, 2, offset) == kErrorOk);
292 EXPECT(offset == kCount * 8);
293 EXPECT(pool.alignment() == 8);
294 }
295
296 INFO("Adding 8 byte constant to check if pool gets aligned again");
297 {
298 uint64_t c = 0xFFFFFFFFFFFFFFFFu;
299 size_t offset;
300
301 EXPECT(pool.add(&c, 8, offset) == kErrorOk);
302 EXPECT(offset == kCount * 8 + 8);
303 }
304
305 INFO("Adding 2 byte constant to verify the gap is filled");
306 {
307 uint16_t c = 0xFFFE;
308 size_t offset;
309
310 EXPECT(pool.add(&c, 2, offset) == kErrorOk);
311 EXPECT(offset == kCount * 8 + 2);
312 EXPECT(pool.alignment() == 8);
313 }
314
315 INFO("Checking reset functionality");
316 {
317 pool.reset(&zone);
318 zone.reset();
319
320 EXPECT(pool.size() == 0);
321 EXPECT(pool.alignment() == 0);
322 }
323
324 INFO("Checking pool alignment when combined constants are added");
325 {
326 uint8_t bytes[32] = { 0 };
327 size_t offset;
328
329 pool.add(bytes, 1, offset);
330 EXPECT(pool.size() == 1);
331 EXPECT(pool.alignment() == 1);
332 EXPECT(offset == 0);
333
334 pool.add(bytes, 2, offset);
335 EXPECT(pool.size() == 4);
336 EXPECT(pool.alignment() == 2);
337 EXPECT(offset == 2);
338
339 pool.add(bytes, 4, offset);
340 EXPECT(pool.size() == 8);
341 EXPECT(pool.alignment() == 4);
342 EXPECT(offset == 4);
343
344 pool.add(bytes, 4, offset);
345 EXPECT(pool.size() == 8);
346 EXPECT(pool.alignment() == 4);
347 EXPECT(offset == 4);
348
349 pool.add(bytes, 32, offset);
350 EXPECT(pool.size() == 64);
351 EXPECT(pool.alignment() == 32);
352 EXPECT(offset == 32);
353 }
354 }
355 #endif
356
357 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_CONSTPOOL_H
7 #define _ASMJIT_CORE_CONSTPOOL_H
8
9 #include "../core/support.h"
10 #include "../core/zone.h"
11 #include "../core/zonetree.h"
12
13 ASMJIT_BEGIN_NAMESPACE
14
15 //! \addtogroup asmjit_core
16 //! \{
17
18 // ============================================================================
19 // [asmjit::ConstPool]
20 // ============================================================================
21
22 //! Constant pool.
23 class ConstPool {
24 public:
25 ASMJIT_NONCOPYABLE(ConstPool)
26
27 //! Constant pool scope.
28 enum Scope : uint32_t {
29 //! Local constant, always embedded right after the current function.
30 kScopeLocal = 0,
31 //! Global constant, embedded at the end of the currently compiled code.
32 kScopeGlobal = 1
33 };
34
35 //! \cond INTERNAL
36
37 //! Index of a given size in const-pool table.
38 enum Index : uint32_t {
39 kIndex1 = 0,
40 kIndex2 = 1,
41 kIndex4 = 2,
42 kIndex8 = 3,
43 kIndex16 = 4,
44 kIndex32 = 5,
45 kIndexCount = 6
46 };
47
48 //! Zone-allocated const-pool gap created by two differently aligned constants.
49 struct Gap {
50 Gap* _next; //!< Pointer to the next gap
51 size_t _offset; //!< Offset of the gap.
52 size_t _size; //!< Remaining bytes of the gap (basically a gap size).
53 };
54
55 //! Zone-allocated const-pool node.
56 class Node : public ZoneTreeNodeT<Node> {
57 public:
58 ASMJIT_NONCOPYABLE(Node)
59
60 inline Node(size_t offset, bool shared) noexcept
61 : ZoneTreeNodeT<Node>(),
62 _shared(shared),
63 _offset(uint32_t(offset)) {}
64
65 inline void* data() const noexcept {
66 return static_cast<void*>(const_cast<ConstPool::Node*>(this) + 1);
67 }
68
69 uint32_t _shared : 1; //!< If this constant is shared with another.
70 uint32_t _offset; //!< Data offset from the beginning of the pool.
71 };
72
73 //! Data comparer used internally.
74 class Compare {
75 public:
76 inline Compare(size_t dataSize) noexcept
77 : _dataSize(dataSize) {}
78
79 inline int operator()(const Node& a, const Node& b) const noexcept {
80 return ::memcmp(a.data(), b.data(), _dataSize);
81 }
82
83 inline int operator()(const Node& a, const void* data) const noexcept {
84 return ::memcmp(a.data(), data, _dataSize);
85 }
86
87 size_t _dataSize;
88 };
89
90 //! Zone-allocated const-pool tree.
91 struct Tree {
92 inline explicit Tree(size_t dataSize = 0) noexcept
93 : _tree(),
94 _size(0),
95 _dataSize(dataSize) {}
96
97 inline void reset() noexcept {
98 _tree.reset();
99 _size = 0;
100 }
101
102 inline bool empty() const noexcept { return _size == 0; }
103 inline size_t size() const noexcept { return _size; }
104
105 inline void setDataSize(size_t dataSize) noexcept {
106 ASMJIT_ASSERT(empty());
107 _dataSize = dataSize;
108 }
109
110 inline Node* get(const void* data) noexcept {
111 Compare cmp(_dataSize);
112 return _tree.get(data, cmp);
113 }
114
115 inline void insert(Node* node) noexcept {
116 Compare cmp(_dataSize);
117 _tree.insert(node, cmp);
118 _size++;
119 }
120
121 template<typename Visitor>
122 inline void forEach(Visitor& visitor) const noexcept {
123 Node* node = _tree.root();
124 if (!node) return;
125
126 Node* stack[Globals::kMaxTreeHeight];
127 size_t top = 0;
128
129 for (;;) {
130 Node* left = node->left();
131 if (left != nullptr) {
132 ASMJIT_ASSERT(top != Globals::kMaxTreeHeight);
133 stack[top++] = node;
134
135 node = left;
136 continue;
137 }
138
139 for (;;) {
140 visitor(node);
141 node = node->right();
142
143 if (node != nullptr)
144 break;
145
146 if (top == 0)
147 return;
148
149 node = stack[--top];
150 }
151 }
152 }
153
154 static inline Node* _newNode(Zone* zone, const void* data, size_t size, size_t offset, bool shared) noexcept {
155 Node* node = zone->allocT<Node>(sizeof(Node) + size);
156 if (ASMJIT_UNLIKELY(!node)) return nullptr;
157
158 node = new(node) Node(offset, shared);
159 memcpy(node->data(), data, size);
160 return node;
161 }
162
163 //! RB tree.
164 ZoneTree<Node> _tree;
165 //! Size of the tree (number of nodes).
166 size_t _size;
167 //! Size of the data.
168 size_t _dataSize;
169 };
170
171 //! \endcond
172
173 //! Zone allocator.
174 Zone* _zone;
175 //! Tree per size.
176 Tree _tree[kIndexCount];
177 //! Gaps per size.
178 Gap* _gaps[kIndexCount];
179 //! Gaps pool
180 Gap* _gapPool;
181
182 //! Size of the pool (in bytes).
183 size_t _size;
184 //! Required pool alignment.
185 size_t _alignment;
186
187 //! \name Construction & Destruction
188 //! \{
189
190 ASMJIT_API ConstPool(Zone* zone) noexcept;
191 ASMJIT_API ~ConstPool() noexcept;
192
193 ASMJIT_API void reset(Zone* zone) noexcept;
194
195 //! \}
196
197 //! \name Accessors
198 //! \{
199
200 //! Tests whether the constant-pool is empty.
201 inline bool empty() const noexcept { return _size == 0; }
202 //! Returns the size of the constant-pool in bytes.
203 inline size_t size() const noexcept { return _size; }
204 //! Returns minimum alignment.
205 inline size_t alignment() const noexcept { return _alignment; }
206
207 //! \}
208
209 //! \name Utilities
210 //! \{
211
212 //! Adds a constant to the constant pool.
213 //!
214 //! The constant must have known size, which is 1, 2, 4, 8, 16 or 32 bytes.
215 //! The constant is added to the pool only if it doesn't not exist, otherwise
216 //! cached value is returned.
217 //!
218 //! AsmJit is able to subdivide added constants, so for example if you add
219 //! 8-byte constant 0x1122334455667788 it will create the following slots:
220 //!
221 //! 8-byte: 0x1122334455667788
222 //! 4-byte: 0x11223344, 0x55667788
223 //!
224 //! The reason is that when combining MMX/SSE/AVX code some patterns are used
225 //! frequently. However, AsmJit is not able to reallocate a constant that has
226 //! been already added. For example if you try to add 4-byte constant and then
227 //! 8-byte constant having the same 4-byte pattern as the previous one, two
228 //! independent slots will be generated by the pool.
229 ASMJIT_API Error add(const void* data, size_t size, size_t& dstOffset) noexcept;
230
231 //! Fills the destination with the content of this constant pool.
232 ASMJIT_API void fill(void* dst) const noexcept;
233 };
234
235 //! \}
236
237 ASMJIT_END_NAMESPACE
238
239 #endif // _ASMJIT_CORE_CONSTPOOL_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/cpuinfo.h"
8
9 #if !defined(_WIN32)
10 #include <errno.h>
11 #include <sys/utsname.h>
12 #include <unistd.h>
13 #endif
14
15 ASMJIT_BEGIN_NAMESPACE
16
17 // ============================================================================
18 // [asmjit::CpuInfo - Detect - CPU NumThreads]
19 // ============================================================================
20
21 #if defined(_WIN32)
22 static inline uint32_t detectHWThreadCount() noexcept {
23 SYSTEM_INFO info;
24 ::GetSystemInfo(&info);
25 return info.dwNumberOfProcessors;
26 }
27 #elif defined(_SC_NPROCESSORS_ONLN)
28 static inline uint32_t detectHWThreadCount() noexcept {
29 long res = ::sysconf(_SC_NPROCESSORS_ONLN);
30 return res <= 0 ? uint32_t(1) : uint32_t(res);
31 }
32 #else
33 static inline uint32_t detectHWThreadCount() noexcept {
34 return 1;
35 }
36 #endif
37
38 // ============================================================================
39 // [asmjit::CpuInfo - Detect - CPU Features]
40 // ============================================================================
41
42 #if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
43 namespace x86 { void detectCpu(CpuInfo& cpu) noexcept; }
44 #endif
45
46 #if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM
47 namespace arm { void detectCpu(CpuInfo& cpu) noexcept; }
48 #endif
49
50 // ============================================================================
51 // [asmjit::CpuInfo - Detect - Static Initializer]
52 // ============================================================================
53
54 static uint32_t cpuInfoInitialized;
55 static CpuInfo cpuInfoGlobal(Globals::NoInit);
56
57 const CpuInfo& CpuInfo::host() noexcept {
58 // This should never cause a problem as the resulting information should
59 // always be the same.
60 if (!cpuInfoInitialized) {
61 CpuInfo cpuInfoLocal;
62
63 #if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
64 x86::detectCpu(cpuInfoLocal);
65 #endif
66
67 #if defined(ASMJIT_BUILD_ARM) && ASMJIT_ARCH_ARM
68 arm::detectCpu(cpuInfoLocal);
69 #endif
70
71 cpuInfoLocal._hwThreadCount = detectHWThreadCount();
72 cpuInfoGlobal = cpuInfoLocal;
73 cpuInfoInitialized = 1;
74 }
75
76 return cpuInfoGlobal;
77 }
78
79 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_CPUINFO_H
7 #define _ASMJIT_CORE_CPUINFO_H
8
9 #include "../core/arch.h"
10 #include "../core/features.h"
11 #include "../core/globals.h"
12 #include "../core/string.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \addtogroup asmjit_support
17 //! \{
18
19 // ============================================================================
20 // [asmjit::CpuInfo]
21 // ============================================================================
22
23 //! CPU information.
24 class CpuInfo {
25 public:
26 //! CPU architecture information.
27 ArchInfo _archInfo;
28 //! CPU family ID.
29 uint32_t _familyId;
30 //! CPU model ID.
31 uint32_t _modelId;
32 //! CPU brand ID.
33 uint32_t _brandId;
34 //! CPU stepping.
35 uint32_t _stepping;
36 //! Processor type.
37 uint32_t _processorType;
38 //! Maximum number of addressable IDs for logical processors.
39 uint32_t _maxLogicalProcessors;
40 //! Cache line size (in bytes).
41 uint32_t _cacheLineSize;
42 //! Number of hardware threads.
43 uint32_t _hwThreadCount;
44
45 //! CPU vendor string.
46 FixedString<16> _vendor;
47 //! CPU brand string.
48 FixedString<64> _brand;
49 //! CPU features.
50 BaseFeatures _features;
51
52 //! \name Construction & Destruction
53 //! \{
54
55 inline CpuInfo() noexcept { reset(); }
56 inline CpuInfo(const CpuInfo& other) noexcept = default;
57
58 inline explicit CpuInfo(Globals::NoInit_) noexcept
59 : _archInfo(Globals::NoInit),
60 _features(Globals::NoInit) {};
61
62 //! Returns the host CPU information.
63 ASMJIT_API static const CpuInfo& host() noexcept;
64
65 //! Initializes CpuInfo to the given architecture, see `ArchInfo`.
66 inline void initArch(uint32_t archId, uint32_t archMode = 0) noexcept {
67 _archInfo.init(archId, archMode);
68 }
69
70 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
71
72 //! \}
73
74 //! \name Overloaded Operators
75 //! \{
76
77 inline CpuInfo& operator=(const CpuInfo& other) noexcept = default;
78
79 //! \}
80
81 //! \name Accessors
82 //! \{
83
84 //! Returns the CPU architecture information.
85 inline const ArchInfo& archInfo() const noexcept { return _archInfo; }
86 //! Returns the CPU architecture id, see `ArchInfo::Id`.
87 inline uint32_t archId() const noexcept { return _archInfo.archId(); }
88 //! Returns the CPU architecture sub-id, see `ArchInfo::SubId`.
89 inline uint32_t archSubId() const noexcept { return _archInfo.archSubId(); }
90
91 //! Returns the CPU family ID.
92 inline uint32_t familyId() const noexcept { return _familyId; }
93 //! Returns the CPU model ID.
94 inline uint32_t modelId() const noexcept { return _modelId; }
95 //! Returns the CPU brand id.
96 inline uint32_t brandId() const noexcept { return _brandId; }
97 //! Returns the CPU stepping.
98 inline uint32_t stepping() const noexcept { return _stepping; }
99 //! Returns the processor type.
100 inline uint32_t processorType() const noexcept { return _processorType; }
101 //! Returns the number of maximum logical processors.
102 inline uint32_t maxLogicalProcessors() const noexcept { return _maxLogicalProcessors; }
103
104 //! Returns the size of a cache line flush.
105 inline uint32_t cacheLineSize() const noexcept { return _cacheLineSize; }
106 //! Returns number of hardware threads available.
107 inline uint32_t hwThreadCount() const noexcept { return _hwThreadCount; }
108
109 //! Returns the CPU vendor.
110 inline const char* vendor() const noexcept { return _vendor.str; }
111 //! Tests whether the CPU vendor is equal to `s`.
112 inline bool isVendor(const char* s) const noexcept { return _vendor.eq(s); }
113
114 //! Returns the CPU brand string.
115 inline const char* brand() const noexcept { return _brand.str; }
116
117 //! Returns all CPU features as `BaseFeatures`, cast to your arch-specific class
118 //! if needed.
119 template<typename T = BaseFeatures>
120 inline const T& features() const noexcept { return _features.as<T>(); }
121
122 //! Tests whether the CPU has the given `feature`.
123 inline bool hasFeature(uint32_t featureId) const noexcept { return _features.has(featureId); }
124 //! Adds the given CPU `feature` to the list of this CpuInfo features.
125 inline CpuInfo& addFeature(uint32_t featureId) noexcept { _features.add(featureId); return *this; }
126
127 //! \}
128 };
129
130 //! \}
131
132 ASMJIT_END_NAMESPACE
133
134 #endif // _ASMJIT_CORE_CPUINFO_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_DATATYPES_H
7 #define _ASMJIT_CORE_DATATYPES_H
8
9 #include "../core/globals.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_support
14 //! \{
15
16 // ============================================================================
17 // [asmjit::Data64]
18 // ============================================================================
19
20 //! 64-bit data useful for creating SIMD constants.
21 union Data64 {
22 //! Array of eight 8-bit signed integers.
23 int8_t sb[8];
24 //! Array of eight 8-bit unsigned integers.
25 uint8_t ub[8];
26 //! Array of four 16-bit signed integers.
27 int16_t sw[4];
28 //! Array of four 16-bit unsigned integers.
29 uint16_t uw[4];
30 //! Array of two 32-bit signed integers.
31 int32_t sd[2];
32 //! Array of two 32-bit unsigned integers.
33 uint32_t ud[2];
34 //! Array of one 64-bit signed integer.
35 int64_t sq[1];
36 //! Array of one 64-bit unsigned integer.
37 uint64_t uq[1];
38
39 //! Array of two SP-FP values.
40 float sf[2];
41 //! Array of one DP-FP value.
42 double df[1];
43
44 //! \name Construction & Destruction
45 //! \{
46
47 //! Sets all eight 8-bit signed integers.
48 static inline Data64 fromI8(int8_t x0) noexcept {
49 Data64 self;
50 self.setI8(x0);
51 return self;
52 }
53
54 //! Sets all eight 8-bit unsigned integers.
55 static inline Data64 fromU8(uint8_t x0) noexcept {
56 Data64 self;
57 self.setU8(x0);
58 return self;
59 }
60
61 //! Sets all eight 8-bit signed integers.
62 static inline Data64 fromI8(
63 int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) noexcept {
64
65 Data64 self;
66 self.setI8(x0, x1, x2, x3, x4, x5, x6, x7);
67 return self;
68 }
69
70 //! Sets all eight 8-bit unsigned integers.
71 static inline Data64 fromU8(
72 uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) noexcept {
73
74 Data64 self;
75 self.setU8(x0, x1, x2, x3, x4, x5, x6, x7);
76 return self;
77 }
78
79 //! Sets all four 16-bit signed integers.
80 static inline Data64 fromI16(int16_t x0) noexcept {
81 Data64 self;
82 self.setI16(x0);
83 return self;
84 }
85
86 //! Sets all four 16-bit unsigned integers.
87 static inline Data64 fromU16(uint16_t x0) noexcept {
88 Data64 self;
89 self.setU16(x0);
90 return self;
91 }
92
93 //! Sets all four 16-bit signed integers.
94 static inline Data64 fromI16(int16_t x0, int16_t x1, int16_t x2, int16_t x3) noexcept {
95 Data64 self;
96 self.setI16(x0, x1, x2, x3);
97 return self;
98 }
99
100 //! Sets all four 16-bit unsigned integers.
101 static inline Data64 fromU16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) noexcept {
102 Data64 self;
103 self.setU16(x0, x1, x2, x3);
104 return self;
105 }
106
107 //! Sets all two 32-bit signed integers.
108 static inline Data64 fromI32(int32_t x0) noexcept {
109 Data64 self;
110 self.setI32(x0);
111 return self;
112 }
113
114 //! Sets all two 32-bit unsigned integers.
115 static inline Data64 fromU32(uint32_t x0) noexcept {
116 Data64 self;
117 self.setU32(x0);
118 return self;
119 }
120
121 //! Sets all two 32-bit signed integers.
122 static inline Data64 fromI32(int32_t x0, int32_t x1) noexcept {
123 Data64 self;
124 self.setI32(x0, x1);
125 return self;
126 }
127
128 //! Sets all two 32-bit unsigned integers.
129 static inline Data64 fromU32(uint32_t x0, uint32_t x1) noexcept {
130 Data64 self;
131 self.setU32(x0, x1);
132 return self;
133 }
134
135 //! Sets 64-bit signed integer.
136 static inline Data64 fromI64(int64_t x0) noexcept {
137 Data64 self;
138 self.setI64(x0);
139 return self;
140 }
141
142 //! Sets 64-bit unsigned integer.
143 static inline Data64 fromU64(uint64_t x0) noexcept {
144 Data64 self;
145 self.setU64(x0);
146 return self;
147 }
148
149 //! Sets all two SP-FP values.
150 static inline Data64 fromF32(float x0) noexcept {
151 Data64 self;
152 self.setF32(x0);
153 return self;
154 }
155
156 //! Sets all two SP-FP values.
157 static inline Data64 fromF32(float x0, float x1) noexcept {
158 Data64 self;
159 self.setF32(x0, x1);
160 return self;
161 }
162
163 //! Sets all two SP-FP values.
164 static inline Data64 fromF64(double x0) noexcept {
165 Data64 self;
166 self.setF64(x0);
167 return self;
168 }
169
170 //! \}
171
172 //! \name Accessors
173 //! \{
174
175 //! Sets all eight 8-bit signed integers.
176 inline void setI8(int8_t x0) noexcept {
177 setU8(uint8_t(x0));
178 }
179
180 //! Sets all eight 8-bit unsigned integers.
181 inline void setU8(uint8_t x0) noexcept {
182 if (ASMJIT_ARCH_BITS >= 64) {
183 uint64_t xq = uint64_t(x0) * 0x0101010101010101u;
184 uq[0] = xq;
185 }
186 else {
187 uint32_t xd = uint32_t(x0) * 0x01010101u;
188 ud[0] = xd;
189 ud[1] = xd;
190 }
191 }
192
193 //! Sets all eight 8-bit signed integers.
194 inline void setI8(
195 int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) noexcept {
196
197 sb[0] = x0; sb[1] = x1; sb[2] = x2; sb[3] = x3;
198 sb[4] = x4; sb[5] = x5; sb[6] = x6; sb[7] = x7;
199 }
200
201 //! Sets all eight 8-bit unsigned integers.
202 inline void setU8(
203 uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) noexcept {
204
205 ub[0] = x0; ub[1] = x1; ub[2] = x2; ub[3] = x3;
206 ub[4] = x4; ub[5] = x5; ub[6] = x6; ub[7] = x7;
207 }
208
209 //! Sets all four 16-bit signed integers.
210 inline void setI16(int16_t x0) noexcept {
211 setU16(uint16_t(x0));
212 }
213
214 //! Sets all four 16-bit unsigned integers.
215 inline void setU16(uint16_t x0) noexcept {
216 if (ASMJIT_ARCH_BITS >= 64) {
217 uint64_t xq = uint64_t(x0) * 0x0001000100010001u;
218 uq[0] = xq;
219 }
220 else {
221 uint32_t xd = uint32_t(x0) * 0x00010001u;
222 ud[0] = xd;
223 ud[1] = xd;
224 }
225 }
226
227 //! Sets all four 16-bit signed integers.
228 inline void setI16(int16_t x0, int16_t x1, int16_t x2, int16_t x3) noexcept {
229 sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3;
230 }
231
232 //! Sets all four 16-bit unsigned integers.
233 inline void setU16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) noexcept {
234 uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3;
235 }
236
237 //! Sets all two 32-bit signed integers.
238 inline void setI32(int32_t x0) noexcept {
239 sd[0] = x0; sd[1] = x0;
240 }
241
242 //! Sets all two 32-bit unsigned integers.
243 inline void setU32(uint32_t x0) noexcept {
244 ud[0] = x0; ud[1] = x0;
245 }
246
247 //! Sets all two 32-bit signed integers.
248 inline void setI32(int32_t x0, int32_t x1) noexcept {
249 sd[0] = x0; sd[1] = x1;
250 }
251
252 //! Sets all two 32-bit unsigned integers.
253 inline void setU32(uint32_t x0, uint32_t x1) noexcept {
254 ud[0] = x0; ud[1] = x1;
255 }
256
257 //! Sets 64-bit signed integer.
258 inline void setI64(int64_t x0) noexcept {
259 sq[0] = x0;
260 }
261
262 //! Sets 64-bit unsigned integer.
263 inline void setU64(uint64_t x0) noexcept {
264 uq[0] = x0;
265 }
266
267 //! Sets all two SP-FP values.
268 inline void setF32(float x0) noexcept {
269 sf[0] = x0; sf[1] = x0;
270 }
271
272 //! Sets all two SP-FP values.
273 inline void setF32(float x0, float x1) noexcept {
274 sf[0] = x0; sf[1] = x1;
275 }
276
277 //! Sets all two SP-FP values.
278 inline void setF64(double x0) noexcept {
279 df[0] = x0;
280 }
281 };
282
283 // ============================================================================
284 // [asmjit::Data128]
285 // ============================================================================
286
287 //! 128-bit data useful for creating SIMD constants.
288 union Data128 {
289 //! Array of sixteen 8-bit signed integers.
290 int8_t sb[16];
291 //! Array of sixteen 8-bit unsigned integers.
292 uint8_t ub[16];
293 //! Array of eight 16-bit signed integers.
294 int16_t sw[8];
295 //! Array of eight 16-bit unsigned integers.
296 uint16_t uw[8];
297 //! Array of four 32-bit signed integers.
298 int32_t sd[4];
299 //! Array of four 32-bit unsigned integers.
300 uint32_t ud[4];
301 //! Array of two 64-bit signed integers.
302 int64_t sq[2];
303 //! Array of two 64-bit unsigned integers.
304 uint64_t uq[2];
305
306 //! Array of four 32-bit single precision floating points.
307 float sf[4];
308 //! Array of two 64-bit double precision floating points.
309 double df[2];
310
311 //! \name Construction & Destruction
312 //! \{
313
314 //! Sets all sixteen 8-bit signed integers.
315 static inline Data128 fromI8(int8_t x0) noexcept {
316 Data128 self;
317 self.setI8(x0);
318 return self;
319 }
320
321 //! Sets all sixteen 8-bit unsigned integers.
322 static inline Data128 fromU8(uint8_t x0) noexcept {
323 Data128 self;
324 self.setU8(x0);
325 return self;
326 }
327
328 //! Sets all sixteen 8-bit signed integers.
329 static inline Data128 fromI8(
330 int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 ,
331 int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 ,
332 int8_t x8 , int8_t x9 , int8_t x10, int8_t x11,
333 int8_t x12, int8_t x13, int8_t x14, int8_t x15) noexcept {
334
335 Data128 self;
336 self.setI8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
337 return self;
338 }
339
340 //! Sets all sixteen 8-bit unsigned integers.
341 static inline Data128 fromU8(
342 uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 ,
343 uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 ,
344 uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11,
345 uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) noexcept {
346
347 Data128 self;
348 self.setU8(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
349 return self;
350 }
351
352 //! Sets all eight 16-bit signed integers.
353 static inline Data128 fromI16(int16_t x0) noexcept {
354 Data128 self;
355 self.setI16(x0);
356 return self;
357 }
358
359 //! Sets all eight 16-bit unsigned integers.
360 static inline Data128 fromU16(uint16_t x0) noexcept {
361 Data128 self;
362 self.setU16(x0);
363 return self;
364 }
365
366 //! Sets all eight 16-bit signed integers.
367 static inline Data128 fromI16(
368 int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) noexcept {
369
370 Data128 self;
371 self.setI16(x0, x1, x2, x3, x4, x5, x6, x7);
372 return self;
373 }
374
375 //! Sets all eight 16-bit unsigned integers.
376 static inline Data128 fromU16(
377 uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) noexcept {
378
379 Data128 self;
380 self.setU16(x0, x1, x2, x3, x4, x5, x6, x7);
381 return self;
382 }
383
384 //! Sets all four 32-bit signed integers.
385 static inline Data128 fromI32(int32_t x0) noexcept {
386 Data128 self;
387 self.setI32(x0);
388 return self;
389 }
390
391 //! Sets all four 32-bit unsigned integers.
392 static inline Data128 fromU32(uint32_t x0) noexcept {
393 Data128 self;
394 self.setU32(x0);
395 return self;
396 }
397
398 //! Sets all four 32-bit signed integers.
399 static inline Data128 fromI32(int32_t x0, int32_t x1, int32_t x2, int32_t x3) noexcept {
400 Data128 self;
401 self.setI32(x0, x1, x2, x3);
402 return self;
403 }
404
405 //! Sets all four 32-bit unsigned integers.
406 static inline Data128 fromU32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) noexcept {
407 Data128 self;
408 self.setU32(x0, x1, x2, x3);
409 return self;
410 }
411
412 //! Sets all two 64-bit signed integers.
413 static inline Data128 fromI64(int64_t x0) noexcept {
414 Data128 self;
415 self.setI64(x0);
416 return self;
417 }
418
419 //! Sets all two 64-bit unsigned integers.
420 static inline Data128 fromU64(uint64_t x0) noexcept {
421 Data128 self;
422 self.setU64(x0);
423 return self;
424 }
425
426 //! Sets all two 64-bit signed integers.
427 static inline Data128 fromI64(int64_t x0, int64_t x1) noexcept {
428 Data128 self;
429 self.setI64(x0, x1);
430 return self;
431 }
432
433 //! Sets all two 64-bit unsigned integers.
434 static inline Data128 fromU64(uint64_t x0, uint64_t x1) noexcept {
435 Data128 self;
436 self.setU64(x0, x1);
437 return self;
438 }
439
440 //! Sets all four SP-FP floats.
441 static inline Data128 fromF32(float x0) noexcept {
442 Data128 self;
443 self.setF32(x0);
444 return self;
445 }
446
447 //! Sets all four SP-FP floats.
448 static inline Data128 fromF32(float x0, float x1, float x2, float x3) noexcept {
449 Data128 self;
450 self.setF32(x0, x1, x2, x3);
451 return self;
452 }
453
454 //! Sets all two DP-FP floats.
455 static inline Data128 fromF64(double x0) noexcept {
456 Data128 self;
457 self.setF64(x0);
458 return self;
459 }
460
461 //! Sets all two DP-FP floats.
462 static inline Data128 fromF64(double x0, double x1) noexcept {
463 Data128 self;
464 self.setF64(x0, x1);
465 return self;
466 }
467
468 //! \}
469
470 //! \name Accessors
471 //! \{
472
473 //! Sets all sixteen 8-bit signed integers.
474 inline void setI8(int8_t x0) noexcept {
475 setU8(uint8_t(x0));
476 }
477
478 //! Sets all sixteen 8-bit unsigned integers.
479 inline void setU8(uint8_t x0) noexcept {
480 if (ASMJIT_ARCH_BITS >= 64) {
481 uint64_t xq = uint64_t(x0) * 0x0101010101010101u;
482 uq[0] = xq;
483 uq[1] = xq;
484 }
485 else {
486 uint32_t xd = uint32_t(x0) * 0x01010101u;
487 ud[0] = xd;
488 ud[1] = xd;
489 ud[2] = xd;
490 ud[3] = xd;
491 }
492 }
493
494 //! Sets all sixteen 8-bit signed integers.
495 inline void setI8(
496 int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 ,
497 int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 ,
498 int8_t x8 , int8_t x9 , int8_t x10, int8_t x11,
499 int8_t x12, int8_t x13, int8_t x14, int8_t x15) noexcept {
500
501 sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ;
502 sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ;
503 sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11;
504 sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15;
505 }
506
507 //! Sets all sixteen 8-bit unsigned integers.
508 inline void setU8(
509 uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 ,
510 uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 ,
511 uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11,
512 uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) noexcept {
513
514 ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ;
515 ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ;
516 ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11;
517 ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15;
518 }
519
520 //! Sets all eight 16-bit signed integers.
521 inline void setI16(int16_t x0) noexcept {
522 setU16(uint16_t(x0));
523 }
524
525 //! Sets all eight 16-bit unsigned integers.
526 inline void setU16(uint16_t x0) noexcept {
527 if (ASMJIT_ARCH_BITS >= 64) {
528 uint64_t xq = uint64_t(x0) * 0x0001000100010001u;
529 uq[0] = xq;
530 uq[1] = xq;
531 }
532 else {
533 uint32_t xd = uint32_t(x0) * 0x00010001u;
534 ud[0] = xd;
535 ud[1] = xd;
536 ud[2] = xd;
537 ud[3] = xd;
538 }
539 }
540
541 //! Sets all eight 16-bit signed integers.
542 inline void setI16(
543 int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) noexcept {
544
545 sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3;
546 sw[4] = x4; sw[5] = x5; sw[6] = x6; sw[7] = x7;
547 }
548
549 //! Sets all eight 16-bit unsigned integers.
550 inline void setU16(
551 uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) noexcept {
552
553 uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3;
554 uw[4] = x4; uw[5] = x5; uw[6] = x6; uw[7] = x7;
555 }
556
557 //! Sets all four 32-bit signed integers.
558 inline void setI32(int32_t x0) noexcept {
559 setU32(uint32_t(x0));
560 }
561
562 //! Sets all four 32-bit unsigned integers.
563 inline void setU32(uint32_t x0) noexcept {
564 if (ASMJIT_ARCH_BITS >= 64) {
565 uint64_t t = (uint64_t(x0) << 32) + x0;
566 uq[0] = t;
567 uq[1] = t;
568 }
569 else {
570 ud[0] = x0;
571 ud[1] = x0;
572 ud[2] = x0;
573 ud[3] = x0;
574 }
575 }
576
577 //! Sets all four 32-bit signed integers.
578 inline void setI32(int32_t x0, int32_t x1, int32_t x2, int32_t x3) noexcept {
579 sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3;
580 }
581
582 //! Sets all four 32-bit unsigned integers.
583 inline void setU32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) noexcept {
584 ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3;
585 }
586
587 //! Sets all two 64-bit signed integers.
588 inline void setI64(int64_t x0) noexcept {
589 sq[0] = x0; sq[1] = x0;
590 }
591
592 //! Sets all two 64-bit unsigned integers.
593 inline void setU64(uint64_t x0) noexcept {
594 uq[0] = x0; uq[1] = x0;
595 }
596
597 //! Sets all two 64-bit signed integers.
598 inline void setI64(int64_t x0, int64_t x1) noexcept {
599 sq[0] = x0; sq[1] = x1;
600 }
601
602 //! Sets all two 64-bit unsigned integers.
603 inline void setU64(uint64_t x0, uint64_t x1) noexcept {
604 uq[0] = x0; uq[1] = x1;
605 }
606
607 //! Sets all four SP-FP floats.
608 inline void setF32(float x0) noexcept {
609 sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0;
610 }
611
612 //! Sets all four SP-FP floats.
613 inline void setF32(float x0, float x1, float x2, float x3) noexcept {
614 sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3;
615 }
616
617 //! Sets all two DP-FP floats.
618 inline void setF64(double x0) noexcept {
619 df[0] = x0; df[1] = x0;
620 }
621
622 //! Sets all two DP-FP floats.
623 inline void setF64(double x0, double x1) noexcept {
624 df[0] = x0; df[1] = x1;
625 }
626 };
627
628 // ============================================================================
629 // [asmjit::Data256]
630 // ============================================================================
631
632 //! 256-bit data useful for creating SIMD constants.
633 union Data256 {
634 //! Array of thirty two 8-bit signed integers.
635 int8_t sb[32];
636 //! Array of thirty two 8-bit unsigned integers.
637 uint8_t ub[32];
638 //! Array of sixteen 16-bit signed integers.
639 int16_t sw[16];
640 //! Array of sixteen 16-bit unsigned integers.
641 uint16_t uw[16];
642 //! Array of eight 32-bit signed integers.
643 int32_t sd[8];
644 //! Array of eight 32-bit unsigned integers.
645 uint32_t ud[8];
646 //! Array of four 64-bit signed integers.
647 int64_t sq[4];
648 //! Array of four 64-bit unsigned integers.
649 uint64_t uq[4];
650
651 //! Array of eight 32-bit single precision floating points.
652 float sf[8];
653 //! Array of four 64-bit double precision floating points.
654 double df[4];
655
656 //! \name Construction & Destruction
657 //! \{
658
659 //! Sets all thirty two 8-bit signed integers.
660 static inline Data256 fromI8(int8_t x0) noexcept {
661 Data256 self;
662 self.setI8(x0);
663 return self;
664 }
665
666 //! Sets all thirty two 8-bit unsigned integers.
667 static inline Data256 fromU8(uint8_t x0) noexcept {
668 Data256 self;
669 self.setU8(x0);
670 return self;
671 }
672
673 //! Sets all thirty two 8-bit signed integers.
674 static inline Data256 fromI8(
675 int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 ,
676 int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 ,
677 int8_t x8 , int8_t x9 , int8_t x10, int8_t x11,
678 int8_t x12, int8_t x13, int8_t x14, int8_t x15,
679 int8_t x16, int8_t x17, int8_t x18, int8_t x19,
680 int8_t x20, int8_t x21, int8_t x22, int8_t x23,
681 int8_t x24, int8_t x25, int8_t x26, int8_t x27,
682 int8_t x28, int8_t x29, int8_t x30, int8_t x31) noexcept {
683
684 Data256 self;
685 self.setI8(
686 x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15,
687 x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31);
688 return self;
689 }
690
691 //! Sets all thirty two 8-bit unsigned integers.
692 static inline Data256 fromU8(
693 uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 ,
694 uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 ,
695 uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11,
696 uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15,
697 uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19,
698 uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23,
699 uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27,
700 uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) noexcept {
701
702 Data256 self;
703 self.setU8(
704 x0, x1 , x2 , x3 , x4 , x5 , x6 , x7 , x8 , x9 , x10, x11, x12, x13, x14, x15,
705 x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31);
706 return self;
707 }
708
709 //! Sets all sixteen 16-bit signed integers.
710 static inline Data256 fromI16(int16_t x0) noexcept {
711 Data256 self;
712 self.setI16(x0);
713 return self;
714 }
715
716 //! Sets all sixteen 16-bit unsigned integers.
717 static inline Data256 fromU16(uint16_t x0) noexcept {
718 Data256 self;
719 self.setU16(x0);
720 return self;
721 }
722
723 //! Sets all sixteen 16-bit signed integers.
724 static inline Data256 fromI16(
725 int16_t x0, int16_t x1, int16_t x2 , int16_t x3 , int16_t x4 , int16_t x5 , int16_t x6 , int16_t x7 ,
726 int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) noexcept {
727
728 Data256 self;
729 self.setI16(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
730 return self;
731 }
732
733 //! Sets all sixteen 16-bit unsigned integers.
734 static inline Data256 fromU16(
735 uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7 ,
736 uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) noexcept {
737
738 Data256 self;
739 self.setU16(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
740 return self;
741 }
742
743 //! Sets all eight 32-bit signed integers.
744 static inline Data256 fromI32(int32_t x0) noexcept {
745 Data256 self;
746 self.setI32(x0);
747 return self;
748 }
749
750 //! Sets all eight 32-bit unsigned integers.
751 static inline Data256 fromU32(uint32_t x0) noexcept {
752 Data256 self;
753 self.setU32(x0);
754 return self;
755 }
756
757 //! Sets all eight 32-bit signed integers.
758 static inline Data256 fromI32(
759 int32_t x0, int32_t x1, int32_t x2, int32_t x3,
760 int32_t x4, int32_t x5, int32_t x6, int32_t x7) noexcept {
761
762 Data256 self;
763 self.setI32(x0, x1, x2, x3, x4, x5, x6, x7);
764 return self;
765 }
766
767 //! Sets all eight 32-bit unsigned integers.
768 static inline Data256 fromU32(
769 uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
770 uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) noexcept {
771
772 Data256 self;
773 self.setU32(x0, x1, x2, x3, x4, x5, x6, x7);
774 return self;
775 }
776
777 //! Sets all four 64-bit signed integers.
778 static inline Data256 fromI64(int64_t x0) noexcept {
779 Data256 self;
780 self.setI64(x0);
781 return self;
782 }
783
784 //! Sets all four 64-bit unsigned integers.
785 static inline Data256 fromU64(uint64_t x0) noexcept {
786 Data256 self;
787 self.setU64(x0);
788 return self;
789 }
790
791 //! Sets all four 64-bit signed integers.
792 static inline Data256 fromI64(int64_t x0, int64_t x1, int64_t x2, int64_t x3) noexcept {
793 Data256 self;
794 self.setI64(x0, x1, x2, x3);
795 return self;
796 }
797
798 //! Sets all four 64-bit unsigned integers.
799 static inline Data256 fromU64(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) noexcept {
800 Data256 self;
801 self.setU64(x0, x1, x2, x3);
802 return self;
803 }
804
805 //! Sets all eight SP-FP floats.
806 static inline Data256 fromF32(float x0) noexcept {
807 Data256 self;
808 self.setF32(x0);
809 return self;
810 }
811
812 //! Sets all eight SP-FP floats.
813 static inline Data256 fromF32(
814 float x0, float x1, float x2, float x3,
815 float x4, float x5, float x6, float x7) noexcept {
816
817 Data256 self;
818 self.setF32(x0, x1, x2, x3, x4, x5, x6, x7);
819 return self;
820 }
821
822 //! Sets all four DP-FP floats.
823 static inline Data256 fromF64(double x0) noexcept {
824 Data256 self;
825 self.setF64(x0);
826 return self;
827 }
828
829 //! Sets all four DP-FP floats.
830 static inline Data256 fromF64(double x0, double x1, double x2, double x3) noexcept {
831 Data256 self;
832 self.setF64(x0, x1, x2, x3);
833 return self;
834 }
835
836 //! \}
837
838 //! \name Accessors
839 //! \{
840
841 //! Sets all thirty two 8-bit signed integers.
842 inline void setI8(int8_t x0) noexcept {
843 setU8(uint8_t(x0));
844 }
845
846 //! Sets all thirty two 8-bit unsigned integers.
847 inline void setU8(uint8_t x0) noexcept {
848 if (ASMJIT_ARCH_BITS >= 64) {
849 uint64_t xq = uint64_t(x0) * 0x0101010101010101u;
850 uq[0] = xq;
851 uq[1] = xq;
852 uq[2] = xq;
853 uq[3] = xq;
854 }
855 else {
856 uint32_t xd = uint32_t(x0) * 0x01010101u;
857 ud[0] = xd;
858 ud[1] = xd;
859 ud[2] = xd;
860 ud[3] = xd;
861 ud[4] = xd;
862 ud[5] = xd;
863 ud[6] = xd;
864 ud[7] = xd;
865 }
866 }
867
868 //! Sets all thirty two 8-bit signed integers.
869 inline void setI8(
870 int8_t x0 , int8_t x1 , int8_t x2 , int8_t x3 ,
871 int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 ,
872 int8_t x8 , int8_t x9 , int8_t x10, int8_t x11,
873 int8_t x12, int8_t x13, int8_t x14, int8_t x15,
874 int8_t x16, int8_t x17, int8_t x18, int8_t x19,
875 int8_t x20, int8_t x21, int8_t x22, int8_t x23,
876 int8_t x24, int8_t x25, int8_t x26, int8_t x27,
877 int8_t x28, int8_t x29, int8_t x30, int8_t x31) noexcept {
878
879 sb[0 ] = x0 ; sb[1 ] = x1 ; sb[2 ] = x2 ; sb[3 ] = x3 ;
880 sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ;
881 sb[8 ] = x8 ; sb[9 ] = x9 ; sb[10] = x10; sb[11] = x11;
882 sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15;
883 sb[16] = x16; sb[17] = x17; sb[18] = x18; sb[19] = x19;
884 sb[20] = x20; sb[21] = x21; sb[22] = x22; sb[23] = x23;
885 sb[24] = x24; sb[25] = x25; sb[26] = x26; sb[27] = x27;
886 sb[28] = x28; sb[29] = x29; sb[30] = x30; sb[31] = x31;
887 }
888
889 //! Sets all thirty two 8-bit unsigned integers.
890 inline void setU8(
891 uint8_t x0 , uint8_t x1 , uint8_t x2 , uint8_t x3 ,
892 uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 ,
893 uint8_t x8 , uint8_t x9 , uint8_t x10, uint8_t x11,
894 uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15,
895 uint8_t x16, uint8_t x17, uint8_t x18, uint8_t x19,
896 uint8_t x20, uint8_t x21, uint8_t x22, uint8_t x23,
897 uint8_t x24, uint8_t x25, uint8_t x26, uint8_t x27,
898 uint8_t x28, uint8_t x29, uint8_t x30, uint8_t x31) noexcept {
899
900 ub[0 ] = x0 ; ub[1 ] = x1 ; ub[2 ] = x2 ; ub[3 ] = x3 ;
901 ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ;
902 ub[8 ] = x8 ; ub[9 ] = x9 ; ub[10] = x10; ub[11] = x11;
903 ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15;
904 ub[16] = x16; ub[17] = x17; ub[18] = x18; ub[19] = x19;
905 ub[20] = x20; ub[21] = x21; ub[22] = x22; ub[23] = x23;
906 ub[24] = x24; ub[25] = x25; ub[26] = x26; ub[27] = x27;
907 ub[28] = x28; ub[29] = x29; ub[30] = x30; ub[31] = x31;
908 }
909
910 //! Sets all sixteen 16-bit signed integers.
911 inline void setI16(int16_t x0) noexcept {
912 setU16(uint16_t(x0));
913 }
914
915 //! Sets all eight 16-bit unsigned integers.
916 inline void setU16(uint16_t x0) noexcept {
917 if (ASMJIT_ARCH_BITS >= 64) {
918 uint64_t xq = uint64_t(x0) * 0x0001000100010001u;
919 uq[0] = xq;
920 uq[1] = xq;
921 uq[2] = xq;
922 uq[3] = xq;
923 }
924 else {
925 uint32_t xd = uint32_t(x0) * 0x00010001u;
926 ud[0] = xd;
927 ud[1] = xd;
928 ud[2] = xd;
929 ud[3] = xd;
930 ud[4] = xd;
931 ud[5] = xd;
932 ud[6] = xd;
933 ud[7] = xd;
934 }
935 }
936
937 //! Sets all sixteen 16-bit signed integers.
938 inline void setI16(
939 int16_t x0, int16_t x1, int16_t x2 , int16_t x3 , int16_t x4 , int16_t x5 , int16_t x6 , int16_t x7,
940 int16_t x8, int16_t x9, int16_t x10, int16_t x11, int16_t x12, int16_t x13, int16_t x14, int16_t x15) noexcept {
941
942 sw[0 ] = x0 ; sw[1 ] = x1 ; sw[2 ] = x2 ; sw[3 ] = x3 ;
943 sw[4 ] = x4 ; sw[5 ] = x5 ; sw[6 ] = x6 ; sw[7 ] = x7 ;
944 sw[8 ] = x8 ; sw[9 ] = x9 ; sw[10] = x10; sw[11] = x11;
945 sw[12] = x12; sw[13] = x13; sw[14] = x14; sw[15] = x15;
946 }
947
948 //! Sets all sixteen 16-bit unsigned integers.
949 inline void setU16(
950 uint16_t x0, uint16_t x1, uint16_t x2 , uint16_t x3 , uint16_t x4 , uint16_t x5 , uint16_t x6 , uint16_t x7,
951 uint16_t x8, uint16_t x9, uint16_t x10, uint16_t x11, uint16_t x12, uint16_t x13, uint16_t x14, uint16_t x15) noexcept {
952
953 uw[0 ] = x0 ; uw[1 ] = x1 ; uw[2 ] = x2 ; uw[3 ] = x3 ;
954 uw[4 ] = x4 ; uw[5 ] = x5 ; uw[6 ] = x6 ; uw[7 ] = x7 ;
955 uw[8 ] = x8 ; uw[9 ] = x9 ; uw[10] = x10; uw[11] = x11;
956 uw[12] = x12; uw[13] = x13; uw[14] = x14; uw[15] = x15;
957 }
958
959 //! Sets all eight 32-bit signed integers.
960 inline void setI32(int32_t x0) noexcept {
961 setU32(uint32_t(x0));
962 }
963
964 //! Sets all eight 32-bit unsigned integers.
965 inline void setU32(uint32_t x0) noexcept {
966 if (ASMJIT_ARCH_BITS >= 64) {
967 uint64_t xq = (uint64_t(x0) << 32) + x0;
968 uq[0] = xq;
969 uq[1] = xq;
970 uq[2] = xq;
971 uq[3] = xq;
972 }
973 else {
974 ud[0] = x0;
975 ud[1] = x0;
976 ud[2] = x0;
977 ud[3] = x0;
978 ud[4] = x0;
979 ud[5] = x0;
980 ud[6] = x0;
981 ud[7] = x0;
982 }
983 }
984
985 //! Sets all eight 32-bit signed integers.
986 inline void setI32(
987 int32_t x0, int32_t x1, int32_t x2, int32_t x3,
988 int32_t x4, int32_t x5, int32_t x6, int32_t x7) noexcept {
989
990 sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3;
991 sd[4] = x4; sd[5] = x5; sd[6] = x6; sd[7] = x7;
992 }
993
994 //! Sets all eight 32-bit unsigned integers.
995 inline void setU32(
996 uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
997 uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7) noexcept {
998
999 ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3;
1000 ud[4] = x4; ud[5] = x5; ud[6] = x6; ud[7] = x7;
1001 }
1002
1003 //! Sets all four 64-bit signed integers.
1004 inline void setI64(int64_t x0) noexcept {
1005 sq[0] = x0; sq[1] = x0; sq[2] = x0; sq[3] = x0;
1006 }
1007
1008 //! Sets all four 64-bit unsigned integers.
1009 inline void setU64(uint64_t x0) noexcept {
1010 uq[0] = x0; uq[1] = x0; uq[2] = x0; uq[3] = x0;
1011 }
1012
1013 //! Sets all four 64-bit signed integers.
1014 inline void setI64(int64_t x0, int64_t x1, int64_t x2, int64_t x3) noexcept {
1015 sq[0] = x0; sq[1] = x1; sq[2] = x2; sq[3] = x3;
1016 }
1017
1018 //! Sets all four 64-bit unsigned integers.
1019 inline void setU64(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3) noexcept {
1020 uq[0] = x0; uq[1] = x1; uq[2] = x2; uq[3] = x3;
1021 }
1022
1023 //! Sets all eight SP-FP floats.
1024 inline void setF32(float x0) noexcept {
1025 sf[0] = x0; sf[1] = x0; sf[2] = x0; sf[3] = x0;
1026 sf[4] = x0; sf[5] = x0; sf[6] = x0; sf[7] = x0;
1027 }
1028
1029 //! Sets all eight SP-FP floats.
1030 inline void setF32(
1031 float x0, float x1, float x2, float x3,
1032 float x4, float x5, float x6, float x7) noexcept {
1033
1034 sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3;
1035 sf[4] = x4; sf[5] = x5; sf[6] = x6; sf[7] = x7;
1036 }
1037
1038 //! Sets all four DP-FP floats.
1039 inline void setF64(double x0) noexcept {
1040 df[0] = x0; df[1] = x0; df[2] = x0; df[3] = x0;
1041 }
1042
1043 //! Sets all four DP-FP floats.
1044 inline void setF64(double x0, double x1, double x2, double x3) noexcept {
1045 df[0] = x0; df[1] = x1; df[2] = x2; df[3] = x3;
1046 }
1047
1048 //! \}
1049 };
1050
1051 //! \}
1052
1053 ASMJIT_END_NAMESPACE
1054
1055 #endif // _ASMJIT_CORE_DATATYPES_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/logging.h"
8 #include "../core/support.h"
9
10 #ifdef ASMJIT_BUILD_X86
11 #include "../x86/x86internal_p.h"
12 #include "../x86/x86instdb_p.h"
13 #endif // ASMJIT_BUILD_X86
14
15 #ifdef ASMJIT_BUILD_ARM
16 #include "../arm/arminternal_p.h"
17 #include "../arm/arminstdb.h"
18 #endif // ASMJIT_BUILD_ARM
19
20 ASMJIT_BEGIN_NAMESPACE
21
22 // ============================================================================
23 // [asmjit::BaseEmitter - Construction / Destruction]
24 // ============================================================================
25
26 BaseEmitter::BaseEmitter(uint32_t type) noexcept
27 : _type(uint8_t(type)),
28 _reserved(0),
29 _flags(0),
30 _emitterOptions(0),
31 _code(nullptr),
32 _errorHandler(nullptr),
33 _codeInfo(),
34 _gpRegInfo(),
35 _privateData(0),
36 _instOptions(0),
37 _globalInstOptions(BaseInst::kOptionReserved),
38 _extraReg(),
39 _inlineComment(nullptr) {}
40
41 BaseEmitter::~BaseEmitter() noexcept {
42 if (_code) {
43 _addFlags(kFlagDestroyed);
44 _code->detach(this);
45 }
46 }
47
48 // ============================================================================
49 // [asmjit::BaseEmitter - Code-Generation]
50 // ============================================================================
51
52 Error BaseEmitter::_emitOpArray(uint32_t instId, const Operand_* operands, size_t count) {
53 const Operand_* op = operands;
54 const Operand& none_ = Globals::none;
55
56 switch (count) {
57 case 0: return _emit(instId, none_, none_, none_, none_);
58 case 1: return _emit(instId, op[0], none_, none_, none_);
59 case 2: return _emit(instId, op[0], op[1], none_, none_);
60 case 3: return _emit(instId, op[0], op[1], op[2], none_);
61 case 4: return _emit(instId, op[0], op[1], op[2], op[3]);
62 case 5: return _emit(instId, op[0], op[1], op[2], op[3], op[4], none_);
63 case 6: return _emit(instId, op[0], op[1], op[2], op[3], op[4], op[5]);
64 default: return DebugUtils::errored(kErrorInvalidArgument);
65 }
66 }
67
68 // ============================================================================
69 // [asmjit::BaseEmitter - Finalize]
70 // ============================================================================
71
72 Label BaseEmitter::labelByName(const char* name, size_t nameSize, uint32_t parentId) noexcept {
73 return Label(_code ? _code->labelIdByName(name, nameSize, parentId) : uint32_t(Globals::kInvalidId));
74 }
75
76 // ============================================================================
77 // [asmjit::BaseEmitter - Finalize]
78 // ============================================================================
79
80 Error BaseEmitter::finalize() {
81 // Does nothing by default, overridden by `BaseBuilder` and `BaseCompiler`.
82 return kErrorOk;
83 }
84
85 // ============================================================================
86 // [asmjit::BaseEmitter - Error Handling]
87 // ============================================================================
88
89 Error BaseEmitter::reportError(Error err, const char* message) {
90 ErrorHandler* handler = errorHandler();
91 if (!handler) {
92 if (code())
93 handler = code()->errorHandler();
94 }
95
96 if (handler) {
97 if (!message)
98 message = DebugUtils::errorAsString(err);
99 handler->handleError(err, message, this);
100 }
101
102 return err;
103 }
104
105 // ============================================================================
106 // [asmjit::BaseEmitter - Label Management]
107 // ============================================================================
108
109 bool BaseEmitter::isLabelValid(uint32_t labelId) const noexcept {
110 return _code && labelId < _code->labelCount();
111 }
112
113 // ============================================================================
114 // [asmjit::BaseEmitter - Emit (High-Level)]
115 // ============================================================================
116
117 ASMJIT_FAVOR_SIZE Error BaseEmitter::emitProlog(const FuncFrame& frame) {
118 if (ASMJIT_UNLIKELY(!_code))
119 return DebugUtils::errored(kErrorNotInitialized);
120
121 #ifdef ASMJIT_BUILD_X86
122 if (archInfo().isX86Family())
123 return x86::X86Internal::emitProlog(as<x86::Emitter>(), frame);
124 #endif
125
126 #ifdef ASMJIT_BUILD_ARM
127 if (archInfo().isArmFamily())
128 return arm::ArmInternal::emitProlog(as<arm::Emitter>(), frame);
129 #endif
130
131 return DebugUtils::errored(kErrorInvalidArch);
132 }
133
134 ASMJIT_FAVOR_SIZE Error BaseEmitter::emitEpilog(const FuncFrame& frame) {
135 if (ASMJIT_UNLIKELY(!_code))
136 return DebugUtils::errored(kErrorNotInitialized);
137
138 #ifdef ASMJIT_BUILD_X86
139 if (archInfo().isX86Family())
140 return x86::X86Internal::emitEpilog(as<x86::Emitter>(), frame);
141 #endif
142
143 #ifdef ASMJIT_BUILD_ARM
144 if (archInfo().isArmFamily())
145 return arm::ArmInternal::emitEpilog(as<arm::Emitter>(), frame);
146 #endif
147
148 return DebugUtils::errored(kErrorInvalidArch);
149 }
150
151 ASMJIT_FAVOR_SIZE Error BaseEmitter::emitArgsAssignment(const FuncFrame& frame, const FuncArgsAssignment& args) {
152 if (ASMJIT_UNLIKELY(!_code))
153 return DebugUtils::errored(kErrorNotInitialized);
154
155 #ifdef ASMJIT_BUILD_X86
156 if (archInfo().isX86Family())
157 return x86::X86Internal::emitArgsAssignment(as<x86::Emitter>(), frame, args);
158 #endif
159
160 #ifdef ASMJIT_BUILD_ARM
161 if (archInfo().isArmFamily())
162 return arm::ArmInternal::emitArgsAssignment(as<arm::Emitter>(), frame, args);
163 #endif
164
165 return DebugUtils::errored(kErrorInvalidArch);
166 }
167
168 // ============================================================================
169 // [asmjit::BaseEmitter - Comment]
170 // ============================================================================
171
172 Error BaseEmitter::commentf(const char* fmt, ...) {
173 if (ASMJIT_UNLIKELY(!_code))
174 return DebugUtils::errored(kErrorNotInitialized);
175
176 #ifndef ASMJIT_NO_LOGGING
177 StringTmp<1024> sb;
178
179 va_list ap;
180 va_start(ap, fmt);
181 Error err = sb.appendVFormat(fmt, ap);
182 va_end(ap);
183
184 if (ASMJIT_UNLIKELY(err))
185 return err;
186
187 return comment(sb.data(), sb.size());
188 #else
189 ASMJIT_UNUSED(fmt);
190 return kErrorOk;
191 #endif
192 }
193
194 Error BaseEmitter::commentv(const char* fmt, va_list ap) {
195 if (ASMJIT_UNLIKELY(!_code))
196 return DebugUtils::errored(kErrorNotInitialized);
197
198 #ifndef ASMJIT_NO_LOGGING
199 StringTmp<1024> sb;
200
201 Error err = sb.appendVFormat(fmt, ap);
202 if (ASMJIT_UNLIKELY(err))
203 return err;
204
205 return comment(sb.data(), sb.size());
206 #else
207 ASMJIT_UNUSED(fmt);
208 ASMJIT_UNUSED(ap);
209 return kErrorOk;
210 #endif
211 }
212
213 // ============================================================================
214 // [asmjit::BaseEmitter - Events]
215 // ============================================================================
216
217 Error BaseEmitter::onAttach(CodeHolder* code) noexcept {
218 _code = code;
219 _codeInfo = code->codeInfo();
220 _emitterOptions = code->emitterOptions();
221
222 onUpdateGlobalInstOptions();
223 return kErrorOk;
224 }
225
226 Error BaseEmitter::onDetach(CodeHolder* code) noexcept {
227 ASMJIT_UNUSED(code);
228
229 _flags = 0;
230 _emitterOptions = 0;
231 _errorHandler = nullptr;
232
233 _codeInfo.reset();
234 _gpRegInfo.reset();
235 _privateData = 0;
236
237 _instOptions = 0;
238 _globalInstOptions = BaseInst::kOptionReserved;
239 _extraReg.reset();
240 _inlineComment = nullptr;
241
242 return kErrorOk;
243 }
244
245 void BaseEmitter::onUpdateGlobalInstOptions() noexcept {
246 constexpr uint32_t kCriticalEmitterOptions =
247 kOptionLoggingEnabled |
248 kOptionStrictValidation ;
249
250 _globalInstOptions &= ~BaseInst::kOptionReserved;
251 if ((_emitterOptions & kCriticalEmitterOptions) != 0)
252 _globalInstOptions |= BaseInst::kOptionReserved;
253 }
254
255 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_EMITTER_H
7 #define _ASMJIT_CORE_EMITTER_H
8
9 #include "../core/arch.h"
10 #include "../core/inst.h"
11 #include "../core/operand.h"
12 #include "../core/codeholder.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \addtogroup asmjit_core
17 //! \{
18
19 // ============================================================================
20 // [Forward Declarations]
21 // ============================================================================
22
23 class ConstPool;
24 class FuncFrame;
25 class FuncArgsAssignment;
26
27 // ============================================================================
28 // [asmjit::BaseEmitter]
29 // ============================================================================
30
31 //! Provides a base foundation to emit code - specialized by `Assembler` and
32 //! `BaseBuilder`.
33 class ASMJIT_VIRTAPI BaseEmitter {
34 public:
35 ASMJIT_BASE_CLASS(BaseEmitter)
36
37 //! See `EmitterType`.
38 uint8_t _type;
39 //! Reserved for future use.
40 uint8_t _reserved;
41 //! See \ref BaseEmitter::Flags.
42 uint16_t _flags;
43 //! Emitter options, always in sync with CodeHolder.
44 uint32_t _emitterOptions;
45
46 //! CodeHolder the BaseEmitter is attached to.
47 CodeHolder* _code;
48 //! Attached `ErrorHandler`.
49 ErrorHandler* _errorHandler;
50
51 //! Basic information about the code (matches CodeHolder::_codeInfo).
52 CodeInfo _codeInfo;
53 //! Native GP register signature and signature related information.
54 RegInfo _gpRegInfo;
55 //! Internal private data used freely by any emitter.
56 uint32_t _privateData;
57
58 //! Next instruction options (affects the next instruction).
59 uint32_t _instOptions;
60 //! Global Instruction options (combined with `_instOptions` by `emit...()`).
61 uint32_t _globalInstOptions;
62 //! Extra register (op-mask {k} on AVX-512) (affects the next instruction).
63 RegOnly _extraReg;
64 //! Inline comment of the next instruction (affects the next instruction).
65 const char* _inlineComment;
66
67 //! Emitter type.
68 enum EmitterType : uint32_t {
69 //! Unknown or uninitialized.
70 kTypeNone = 0,
71 //! Emitter inherits from `BaseAssembler`.
72 kTypeAssembler = 1,
73 //! Emitter inherits from `BaseBuilder`.
74 kTypeBuilder = 2,
75 //! Emitter inherits from `BaseCompiler`.
76 kTypeCompiler = 3,
77 //! Count of emitter types.
78 kTypeCount = 4
79 };
80
81 //! Emitter flags.
82 enum Flags : uint32_t {
83 //! The emitter was finalized.
84 kFlagFinalized = 0x4000u,
85 //! The emitter was destroyed.
86 kFlagDestroyed = 0x8000u
87 };
88
89 //! Emitter options.
90 enum Options : uint32_t {
91 //! Logging is enabled, `BaseEmitter::logger()` must return a valid logger.
92 //! This option is set automatically by the emitter if the logger is present.
93 //! User code should never alter this value.
94 //!
95 //! Default `false`.
96 kOptionLoggingEnabled = 0x00000001u,
97
98 //! Stricly validate each instruction before it's emitted.
99 //!
100 //! Default `false`.
101 kOptionStrictValidation = 0x00000002u,
102
103 //! Emit instructions that are optimized for size, if possible.
104 //!
105 //! Default `false`.
106 //!
107 //! X86 Specific
108 //! ------------
109 //!
110 //! When this option is set it the assembler will try to fix instructions
111 //! if possible into operation equivalent instructions that take less bytes
112 //! by taking advantage of implicit zero extension. For example instruction
113 //! like `mov r64, imm` and `and r64, imm` can be translated to `mov r32, imm`
114 //! and `and r32, imm` when the immediate constant is lesser than `2^31`.
115 kOptionOptimizedForSize = 0x00000004u,
116
117 //! Emit optimized code-alignment sequences.
118 //!
119 //! Default `false`.
120 //!
121 //! X86 Specific
122 //! ------------
123 //!
124 //! Default align sequence used by X86 architecture is one-byte (0x90)
125 //! opcode that is often shown by disassemblers as NOP. However there are
126 //! more optimized align sequences for 2-11 bytes that may execute faster
127 //! on certain CPUs. If this feature is enabled AsmJit will generate
128 //! specialized sequences for alignment between 2 to 11 bytes.
129 kOptionOptimizedAlign = 0x00000008u,
130
131 //! Emit jump-prediction hints.
132 //!
133 //! Default `false`.
134 //!
135 //! X86 Specific
136 //! ------------
137 //!
138 //! Jump prediction is usually based on the direction of the jump. If the
139 //! jump is backward it is usually predicted as taken; and if the jump is
140 //! forward it is usually predicted as not-taken. The reason is that loops
141 //! generally use backward jumps and conditions usually use forward jumps.
142 //! However this behavior can be overridden by using instruction prefixes.
143 //! If this option is enabled these hints will be emitted.
144 //!
145 //! This feature is disabled by default, because the only processor that
146 //! used to take into consideration prediction hints was P4. Newer processors
147 //! implement heuristics for branch prediction and ignore static hints. This
148 //! means that this feature can be used for annotation purposes.
149 kOptionPredictedJumps = 0x00000010u
150 };
151
152 //! \name Construction & Destruction
153 //! \{
154
155 ASMJIT_API explicit BaseEmitter(uint32_t type) noexcept;
156 ASMJIT_API virtual ~BaseEmitter() noexcept;
157
158 //! \}
159
160 //! \name Cast
161 //! \{
162
163 template<typename T>
164 inline T* as() noexcept { return reinterpret_cast<T*>(this); }
165
166 template<typename T>
167 inline const T* as() const noexcept { return reinterpret_cast<const T*>(this); }
168
169 //! \}
170
171 //! \name Emitter Type & Flags
172 //! \{
173
174 //! Returns the type of this emitter, see `EmitterType`.
175 inline uint32_t emitterType() const noexcept { return _type; }
176 //! Returns emitter flags , see `Flags`.
177 inline uint32_t emitterFlags() const noexcept { return _flags; }
178
179 //! Tests whether the emitter inherits from `BaseAssembler`.
180 inline bool isAssembler() const noexcept { return _type == kTypeAssembler; }
181 //! Tests whether the emitter inherits from `BaseBuilder`.
182 //!
183 //! \note Both Builder and Compiler emitters would return `true`.
184 inline bool isBuilder() const noexcept { return _type >= kTypeBuilder; }
185 //! Tests whether the emitter inherits from `BaseCompiler`.
186 inline bool isCompiler() const noexcept { return _type == kTypeCompiler; }
187
188 //! Tests whether the emitter has the given `flag` enabled.
189 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
190 //! Tests whether the emitter is finalized.
191 inline bool isFinalized() const noexcept { return hasFlag(kFlagFinalized); }
192 //! Tests whether the emitter is destroyed (only used during destruction).
193 inline bool isDestroyed() const noexcept { return hasFlag(kFlagDestroyed); }
194
195 inline void _addFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags | flags); }
196 inline void _clearFlags(uint32_t flags) noexcept { _flags = uint16_t(_flags & ~flags); }
197
198 //! \}
199
200 //! \name Target Information
201 //! \{
202
203 //! Returns the CodeHolder this emitter is attached to.
204 inline CodeHolder* code() const noexcept { return _code; }
205 //! Returns an information about the code, see `CodeInfo`.
206 inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; }
207 //! Returns an information about the architecture, see `ArchInfo`.
208 inline const ArchInfo& archInfo() const noexcept { return _codeInfo.archInfo(); }
209
210 //! Tests whether the target architecture is 32-bit.
211 inline bool is32Bit() const noexcept { return archInfo().is32Bit(); }
212 //! Tests whether the target architecture is 64-bit.
213 inline bool is64Bit() const noexcept { return archInfo().is64Bit(); }
214
215 //! Returns the target architecture type.
216 inline uint32_t archId() const noexcept { return archInfo().archId(); }
217 //! Returns the target architecture sub-type.
218 inline uint32_t archSubId() const noexcept { return archInfo().archSubId(); }
219 //! Returns the target architecture's GP register size (4 or 8 bytes).
220 inline uint32_t gpSize() const noexcept { return archInfo().gpSize(); }
221 //! Returns the number of target GP registers.
222 inline uint32_t gpCount() const noexcept { return archInfo().gpCount(); }
223
224 //! \}
225
226 //! \name Initialization & Finalization
227 //! \{
228
229 //! Tests whether the BaseEmitter is initialized (i.e. attached to the `CodeHolder`).
230 inline bool isInitialized() const noexcept { return _code != nullptr; }
231
232 ASMJIT_API virtual Error finalize();
233
234 //! \}
235
236 //! \name Emitter Options
237 //! \{
238
239 //! Tests whether the `option` is present in emitter options.
240 inline bool hasEmitterOption(uint32_t option) const noexcept { return (_emitterOptions & option) != 0; }
241 //! Returns the emitter options.
242 inline uint32_t emitterOptions() const noexcept { return _emitterOptions; }
243
244 // TODO: Deprecate and remove, CodeHolder::addEmitterOptions() is the way.
245 inline void addEmitterOptions(uint32_t options) noexcept {
246 _emitterOptions |= options;
247 onUpdateGlobalInstOptions();
248 }
249
250 inline void clearEmitterOptions(uint32_t options) noexcept {
251 _emitterOptions &= ~options;
252 onUpdateGlobalInstOptions();
253 }
254
255 //! Returns the global instruction options.
256 //!
257 //! Default instruction options are merged with instruction options before the
258 //! instruction is encoded. These options have some bits reserved that are used
259 //! for error handling, logging, and strict validation. Other options are globals that
260 //! affect each instruction, for example if VEX3 is set globally, it will all
261 //! instructions, even those that don't have such option set.
262 inline uint32_t globalInstOptions() const noexcept { return _globalInstOptions; }
263
264 //! \}
265
266 //! \name Error Handling
267 //! \{
268
269 //! Tests whether the local error handler is attached.
270 inline bool hasErrorHandler() const noexcept { return _errorHandler != nullptr; }
271 //! Returns the local error handler.
272 inline ErrorHandler* errorHandler() const noexcept { return _errorHandler; }
273 //! Sets the local error handler.
274 inline void setErrorHandler(ErrorHandler* handler) noexcept { _errorHandler = handler; }
275 //! Resets the local error handler (does nothing if not attached).
276 inline void resetErrorHandler() noexcept { setErrorHandler(nullptr); }
277
278 //! Handles the given error in the following way:
279 //! 1. Gets either Emitter's (preferred) or CodeHolder's ErrorHandler.
280 //! 2. If exists, calls `ErrorHandler::handleError(error, message, this)`.
281 //! 3. Returns the given `err` if ErrorHandler haven't thrown.
282 ASMJIT_API Error reportError(Error err, const char* message = nullptr);
283
284 //! \}
285
286 //! \name Instruction Options
287 //! \{
288
289 //! Returns options of the next instruction.
290 inline uint32_t instOptions() const noexcept { return _instOptions; }
291 //! Returns options of the next instruction.
292 inline void setInstOptions(uint32_t options) noexcept { _instOptions = options; }
293 //! Adds options of the next instruction.
294 inline void addInstOptions(uint32_t options) noexcept { _instOptions |= options; }
295 //! Resets options of the next instruction.
296 inline void resetInstOptions() noexcept { _instOptions = 0; }
297
298 //! Tests whether the extra register operand is valid.
299 inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
300 //! Returns an extra operand that will be used by the next instruction (architecture specific).
301 inline const RegOnly& extraReg() const noexcept { return _extraReg; }
302 //! Sets an extra operand that will be used by the next instruction (architecture specific).
303 inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
304 //! Sets an extra operand that will be used by the next instruction (architecture specific).
305 inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
306 //! Resets an extra operand that will be used by the next instruction (architecture specific).
307 inline void resetExtraReg() noexcept { _extraReg.reset(); }
308
309 //! Returns comment/annotation of the next instruction.
310 inline const char* inlineComment() const noexcept { return _inlineComment; }
311 //! Sets comment/annotation of the next instruction.
312 //!
313 //! \note This string is set back to null by `_emit()`, but until that it has
314 //! to remain valid as the Emitter is not required to make a copy of it (and
315 //! it would be slow to do that for each instruction).
316 inline void setInlineComment(const char* s) noexcept { _inlineComment = s; }
317 //! Resets the comment/annotation to nullptr.
318 inline void resetInlineComment() noexcept { _inlineComment = nullptr; }
319
320 //! \}
321
322 //! \name Sections
323 //! \{
324
325 virtual Error section(Section* section) = 0;
326
327 //! \}
328
329 //! \name Labels
330 //! \{
331
332 //! Creates a new label.
333 virtual Label newLabel() = 0;
334 //! Creates a new named label.
335 virtual Label newNamedLabel(const char* name, size_t nameSize = SIZE_MAX, uint32_t type = Label::kTypeGlobal, uint32_t parentId = Globals::kInvalidId) = 0;
336
337 //! Returns `Label` by `name`.
338 //!
339 //! Returns invalid Label in case that the name is invalid or label was not found.
340 //!
341 //! \note This function doesn't trigger ErrorHandler in case the name is invalid
342 //! or no such label exist. You must always check the validity of the `Label` returned.
343 ASMJIT_API Label labelByName(const char* name, size_t nameSize = SIZE_MAX, uint32_t parentId = Globals::kInvalidId) noexcept;
344
345 //! Binds the `label` to the current position of the current section.
346 //!
347 //! \note Attempt to bind the same label multiple times will return an error.
348 virtual Error bind(const Label& label) = 0;
349
350 //! Tests whether the label `id` is valid (i.e. registered).
351 ASMJIT_API bool isLabelValid(uint32_t labelId) const noexcept;
352 //! Tests whether the `label` is valid (i.e. registered).
353 inline bool isLabelValid(const Label& label) const noexcept { return isLabelValid(label.id()); }
354
355 //! \}
356
357 //! \name Emit
358 //! \{
359
360 // NOTE: These `emit()` helpers are designed to address a code-bloat generated
361 // by C++ compilers to call a function having many arguments. Each parameter to
362 // `_emit()` requires some code to pass it, which means that if we default to 4
363 // operand parameters in `_emit()` and instId the C++ compiler would have to
364 // generate a virtual function call having 5 parameters, which is quite a lot.
365 // Since by default asm instructions have 2 to 3 operands it's better to
366 // introduce helpers that pass those and fill out the remaining operands.
367
368 #define OP const Operand_&
369 #define NONE Globals::none
370
371 //! Emits an instruction.
372 ASMJIT_NOINLINE Error emit(uint32_t instId) { return _emit(instId, NONE, NONE, NONE, NONE); }
373 //! \overload
374 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0) { return _emit(instId, o0, NONE, NONE, NONE); }
375 //! \overload
376 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1) { return _emit(instId, o0, o1, NONE, NONE); }
377 //! \overload
378 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2) { return _emit(instId, o0, o1, o2, NONE); }
379 //! \overload
380 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3) { return _emit(instId, o0, o1, o2, o3); }
381 //! \overload
382 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4) { return _emit(instId, o0, o1, o2, o3, o4, NONE); }
383 //! \overload
384 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, OP o5) { return _emit(instId, o0, o1, o2, o3, o4, o5); }
385
386 //! \overload
387 ASMJIT_NOINLINE Error emit(uint32_t instId, int o0) { return _emit(instId, Imm(o0), NONE, NONE, NONE); }
388 //! \overload
389 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, int o1) { return _emit(instId, o0, Imm(o1), NONE, NONE); }
390 //! \overload
391 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, int o2) { return _emit(instId, o0, o1, Imm(o2), NONE); }
392 //! \overload
393 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, int o3) { return _emit(instId, o0, o1, o2, Imm(o3)); }
394 //! \overload
395 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, int o4) { return _emit(instId, o0, o1, o2, o3, Imm(o4), NONE); }
396 //! \overload
397 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, int o5) { return _emit(instId, o0, o1, o2, o3, o4, Imm(o5)); }
398
399 //! \overload
400 ASMJIT_NOINLINE Error emit(uint32_t instId, int64_t o0) { return _emit(instId, Imm(o0), NONE, NONE, NONE); }
401 //! \overload
402 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, int64_t o1) { return _emit(instId, o0, Imm(o1), NONE, NONE); }
403 //! \overload
404 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, int64_t o2) { return _emit(instId, o0, o1, Imm(o2), NONE); }
405 //! \overload
406 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, int64_t o3) { return _emit(instId, o0, o1, o2, Imm(o3)); }
407 //! \overload
408 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, int64_t o4) { return _emit(instId, o0, o1, o2, o3, Imm(o4), NONE); }
409 //! \overload
410 ASMJIT_NOINLINE Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, int64_t o5) { return _emit(instId, o0, o1, o2, o3, o4, Imm(o5)); }
411
412 //! \overload
413 inline Error emit(uint32_t instId, unsigned int o0) { return emit(instId, int64_t(o0)); }
414 //! \overload
415 inline Error emit(uint32_t instId, OP o0, unsigned int o1) { return emit(instId, o0, int64_t(o1)); }
416 //! \overload
417 inline Error emit(uint32_t instId, OP o0, OP o1, unsigned int o2) { return emit(instId, o0, o1, int64_t(o2)); }
418 //! \overload
419 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, unsigned int o3) { return emit(instId, o0, o1, o2, int64_t(o3)); }
420 //! \overload
421 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, unsigned int o4) { return emit(instId, o0, o1, o2, o3, int64_t(o4)); }
422 //! \overload
423 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, unsigned int o5) { return emit(instId, o0, o1, o2, o3, o4, int64_t(o5)); }
424
425 //! \overload
426 inline Error emit(uint32_t instId, uint64_t o0) { return emit(instId, int64_t(o0)); }
427 //! \overload
428 inline Error emit(uint32_t instId, OP o0, uint64_t o1) { return emit(instId, o0, int64_t(o1)); }
429 //! \overload
430 inline Error emit(uint32_t instId, OP o0, OP o1, uint64_t o2) { return emit(instId, o0, o1, int64_t(o2)); }
431 //! \overload
432 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, uint64_t o3) { return emit(instId, o0, o1, o2, int64_t(o3)); }
433 //! \overload
434 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, uint64_t o4) { return emit(instId, o0, o1, o2, o3, int64_t(o4)); }
435 //! \overload
436 inline Error emit(uint32_t instId, OP o0, OP o1, OP o2, OP o3, OP o4, uint64_t o5) { return emit(instId, o0, o1, o2, o3, o4, int64_t(o5)); }
437
438 #undef NONE
439 #undef OP
440
441 inline Error emitOpArray(uint32_t instId, const Operand_* operands, size_t count) { return _emitOpArray(instId, operands, count); }
442
443 inline Error emitInst(const BaseInst& inst, const Operand_* operands, size_t count) {
444 setInstOptions(inst.options());
445 setExtraReg(inst.extraReg());
446 return _emitOpArray(inst.id(), operands, count);
447 }
448
449 //! \cond INTERNAL
450 //! Emits instruction having max 4 operands.
451 virtual Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) = 0;
452 //! Emits instruction having max 6 operands.
453 virtual Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3, const Operand_& o4, const Operand_& o5) = 0;
454 //! Emits instruction having operands stored in array.
455 virtual Error _emitOpArray(uint32_t instId, const Operand_* operands, size_t count);
456 //! \endcond
457
458 //! \}
459
460 //! \name Emit Utilities
461 //! \{
462
463 ASMJIT_API Error emitProlog(const FuncFrame& layout);
464 ASMJIT_API Error emitEpilog(const FuncFrame& layout);
465 ASMJIT_API Error emitArgsAssignment(const FuncFrame& layout, const FuncArgsAssignment& args);
466
467 //! \}
468
469 //! \name Align
470 //! \{
471
472 //! Aligns the current CodeBuffer to the `alignment` specified.
473 //!
474 //! The sequence that is used to fill the gap between the aligned location
475 //! and the current location depends on the align `mode`, see `AlignMode`.
476 virtual Error align(uint32_t alignMode, uint32_t alignment) = 0;
477
478 //! \}
479
480 //! \name Embed
481 //! \{
482
483 //! Embeds raw data into the CodeBuffer.
484 virtual Error embed(const void* data, uint32_t dataSize) = 0;
485
486 //! Embeds an absolute label address as data (4 or 8 bytes).
487 virtual Error embedLabel(const Label& label) = 0;
488
489 //! Embeds a delta (distance) between the `label` and `base` calculating it
490 //! as `label - base`. This function was designed to make it easier to embed
491 //! lookup tables where each index is a relative distance of two labels.
492 virtual Error embedLabelDelta(const Label& label, const Label& base, uint32_t dataSize) = 0;
493
494 //! Embeds a constant pool at the current offset by performing the following:
495 //! 1. Aligns by using kAlignData to the minimum `pool` alignment.
496 //! 2. Binds the ConstPool label so it's bound to an aligned location.
497 //! 3. Emits ConstPool content.
498 virtual Error embedConstPool(const Label& label, const ConstPool& pool) = 0;
499
500 //! \}
501
502 //! \name Comment
503 //! \{
504
505 //! Emits a comment stored in `data` with an optional `size` parameter.
506 virtual Error comment(const char* data, size_t size = SIZE_MAX) = 0;
507
508 //! Emits a formatted comment specified by `fmt` and variable number of arguments.
509 ASMJIT_API Error commentf(const char* fmt, ...);
510 //! Emits a formatted comment specified by `fmt` and `ap`.
511 ASMJIT_API Error commentv(const char* fmt, va_list ap);
512
513 //! \}
514
515 //! \name Events
516 //! \{
517
518 //! Called after the emitter was attached to `CodeHolder`.
519 virtual Error onAttach(CodeHolder* code) noexcept = 0;
520 //! Called after the emitter was detached from `CodeHolder`.
521 virtual Error onDetach(CodeHolder* code) noexcept = 0;
522
523 //! Called to update `_globalInstOptions` based on `_emitterOptions`.
524 //!
525 //! This function should only touch one bit `BaseInst::kOptionReserved`, which
526 //! is used to handle errors and special-cases in a way that minimizes branching.
527 ASMJIT_API void onUpdateGlobalInstOptions() noexcept;
528
529 //! \}
530 };
531
532 //! \}
533
534 ASMJIT_END_NAMESPACE
535
536 #endif // _ASMJIT_CORE_EMITTER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_FEATURES_H
7 #define _ASMJIT_CORE_FEATURES_H
8
9 #include "../core/globals.h"
10 #include "../core/support.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_core
15 //! \{
16
17 // ============================================================================
18 // [asmjit::BaseFeatures]
19 // ============================================================================
20
21 class BaseFeatures {
22 public:
23 typedef Support::BitWord BitWord;
24
25 enum : uint32_t {
26 kMaxFeatures = 128,
27 kNumBitWords = kMaxFeatures / Support::kBitWordSizeInBits
28 };
29
30 BitWord _bits[kNumBitWords];
31
32 //! \name Construction & Destruction
33 //! \{
34
35 inline BaseFeatures() noexcept { reset(); }
36 inline BaseFeatures(const BaseFeatures& other) noexcept = default;
37 inline explicit BaseFeatures(Globals::NoInit_) noexcept {}
38
39 inline void reset() noexcept {
40 for (size_t i = 0; i < kNumBitWords; i++)
41 _bits[i] = 0;
42 }
43
44 //! \}
45
46 //! \name Overloaded Operators
47 //! \{
48
49 inline BaseFeatures& operator=(const BaseFeatures& other) noexcept = default;
50
51 inline bool operator==(const BaseFeatures& other) noexcept { return eq(other); }
52 inline bool operator!=(const BaseFeatures& other) noexcept { return !eq(other); }
53
54 //! \}
55
56 //! \name Cast
57 //! \{
58
59 template<typename T>
60 inline T& as() noexcept { return static_cast<T&>(*this); }
61
62 template<typename T>
63 inline const T& as() const noexcept { return static_cast<const T&>(*this); }
64
65 //! \}
66
67 //! \name Accessors
68 //! \{
69
70 //! Returns all features as `BitWord` array.
71 inline BitWord* bits() noexcept { return _bits; }
72 //! Returns all features as `BitWord` array (const).
73 inline const BitWord* bits() const noexcept { return _bits; }
74
75 //! Tests whether the feature `featureId` is present.
76 inline bool has(uint32_t featureId) const noexcept {
77 ASMJIT_ASSERT(featureId < kMaxFeatures);
78
79 uint32_t idx = featureId / Support::kBitWordSizeInBits;
80 uint32_t bit = featureId % Support::kBitWordSizeInBits;
81
82 return bool((_bits[idx] >> bit) & 0x1);
83 }
84
85 //! Tests whether all features as defined by `other` are present.
86 inline bool hasAll(const BaseFeatures& other) const noexcept {
87 for (uint32_t i = 0; i < kNumBitWords; i++)
88 if ((_bits[i] & other._bits[i]) != other._bits[i])
89 return false;
90 return true;
91 }
92
93 //! \}
94
95 //! \name Utilities
96 //! \{
97
98 //! Adds the given CPU `featureId` to the list of features.
99 inline void add(uint32_t featureId) noexcept {
100 ASMJIT_ASSERT(featureId < kMaxFeatures);
101
102 uint32_t idx = featureId / Support::kBitWordSizeInBits;
103 uint32_t bit = featureId % Support::kBitWordSizeInBits;
104
105 _bits[idx] |= BitWord(1) << bit;
106 }
107
108 template<typename... Args>
109 inline void add(uint32_t featureId, Args... otherIds) noexcept {
110 add(featureId);
111 add(otherIds...);
112 }
113
114 //! Removes the given CPU `featureId` from the list of features.
115 inline void remove(uint32_t featureId) noexcept {
116 ASMJIT_ASSERT(featureId < kMaxFeatures);
117
118 uint32_t idx = featureId / Support::kBitWordSizeInBits;
119 uint32_t bit = featureId % Support::kBitWordSizeInBits;
120
121 _bits[idx] &= ~(BitWord(1) << bit);
122 }
123
124 template<typename... Args>
125 inline void remove(uint32_t featureId, Args... otherIds) noexcept {
126 remove(featureId);
127 remove(otherIds...);
128 }
129
130 inline bool eq(const BaseFeatures& other) const noexcept {
131 for (size_t i = 0; i < kNumBitWords; i++)
132 if (_bits[i] != other._bits[i])
133 return false;
134 return true;
135 }
136
137 //! \}
138 };
139
140 //! \}
141
142 ASMJIT_END_NAMESPACE
143
144 #endif // _ASMJIT_CORE_FEATURES_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/arch.h"
8 #include "../core/func.h"
9 #include "../core/type.h"
10
11 #ifdef ASMJIT_BUILD_X86
12 #include "../x86/x86internal_p.h"
13 #include "../x86/x86operand.h"
14 #endif
15
16 #ifdef ASMJIT_BUILD_ARM
17 #include "../arm/arminternal_p.h"
18 #include "../arm/armoperand.h"
19 #endif
20
21 ASMJIT_BEGIN_NAMESPACE
22
23 // ============================================================================
24 // [asmjit::FuncDetail - Init / Reset]
25 // ============================================================================
26
27 ASMJIT_FAVOR_SIZE Error FuncDetail::init(const FuncSignature& sign) {
28 uint32_t ccId = sign.callConv();
29 CallConv& cc = _callConv;
30
31 uint32_t argCount = sign.argCount();
32 if (ASMJIT_UNLIKELY(argCount > Globals::kMaxFuncArgs))
33 return DebugUtils::errored(kErrorInvalidArgument);
34
35 ASMJIT_PROPAGATE(cc.init(ccId));
36
37 uint32_t gpSize = (cc.archId() == ArchInfo::kIdX86) ? 4 : 8;
38 uint32_t deabstractDelta = Type::deabstractDeltaOfSize(gpSize);
39
40 const uint8_t* args = sign.args();
41 for (uint32_t i = 0; i < argCount; i++) {
42 FuncValue& arg = _args[i];
43 arg.initTypeId(Type::deabstract(args[i], deabstractDelta));
44 }
45 _argCount = uint8_t(argCount);
46 _vaIndex = uint8_t(sign.vaIndex());
47
48 uint32_t ret = sign.ret();
49 if (ret != Type::kIdVoid) {
50 _rets[0].initTypeId(Type::deabstract(ret, deabstractDelta));
51 _retCount = 1;
52 }
53
54 #ifdef ASMJIT_BUILD_X86
55 if (CallConv::isX86Family(ccId))
56 return x86::X86Internal::initFuncDetail(*this, sign, gpSize);
57 #endif
58
59 #ifdef ASMJIT_BUILD_ARM
60 if (CallConv::isArmFamily(ccId))
61 return arm::ArmInternal::initFuncDetail(*this, sign, gpSize);
62 #endif
63
64 // We should never bubble here as if `cc.init()` succeeded then there has to
65 // be an implementation for the current architecture. However, stay safe.
66 return DebugUtils::errored(kErrorInvalidArgument);
67 }
68
69 // ============================================================================
70 // [asmjit::FuncFrame - Init / Reset / Finalize]
71 // ============================================================================
72
73 ASMJIT_FAVOR_SIZE Error FuncFrame::init(const FuncDetail& func) noexcept {
74 uint32_t ccId = func.callConv().id();
75
76 #ifdef ASMJIT_BUILD_X86
77 if (CallConv::isX86Family(ccId))
78 return x86::X86Internal::initFuncFrame(*this, func);
79 #endif
80
81 #ifdef ASMJIT_BUILD_ARM
82 if (CallConv::isArmFamily(ccId))
83 return arm::ArmInternal::initFuncFrame(*this, func);
84 #endif
85
86 return DebugUtils::errored(kErrorInvalidArgument);
87 }
88
89 ASMJIT_FAVOR_SIZE Error FuncFrame::finalize() noexcept {
90 #ifdef ASMJIT_BUILD_X86
91 if (ArchInfo::isX86Family(archId()))
92 return x86::X86Internal::finalizeFuncFrame(*this);
93 #endif
94
95 #ifdef ASMJIT_BUILD_ARM
96 if (ArchInfo::isArmFamily(archId()))
97 return arm::ArmInternal::finalizeFuncFrame(*this);
98 #endif
99
100 return DebugUtils::errored(kErrorInvalidArgument);
101 }
102
103 // ============================================================================
104 // [asmjit::FuncArgsAssignment]
105 // ============================================================================
106
107 ASMJIT_FAVOR_SIZE Error FuncArgsAssignment::updateFuncFrame(FuncFrame& frame) const noexcept {
108 const FuncDetail* func = funcDetail();
109 if (!func) return DebugUtils::errored(kErrorInvalidState);
110
111 uint32_t ccId = func->callConv().id();
112
113 #ifdef ASMJIT_BUILD_X86
114 if (CallConv::isX86Family(ccId))
115 return x86::X86Internal::argsToFuncFrame(*this, frame);
116 #endif
117
118 #ifdef ASMJIT_BUILD_ARM
119 if (CallConv::isArmFamily(ccId))
120 return arm::ArmInternal::argsToFuncFrame(*this, frame);
121 #endif
122
123 return DebugUtils::errored(kErrorInvalidArch);
124 }
125
126 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_FUNC_H
7 #define _ASMJIT_CORE_FUNC_H
8
9 #include "../core/arch.h"
10 #include "../core/callconv.h"
11 #include "../core/operand.h"
12 #include "../core/type.h"
13 #include "../core/support.h"
14
15 ASMJIT_BEGIN_NAMESPACE
16
17 //! \addtogroup asmjit_func
18 //! \{
19
20 // ============================================================================
21 // [asmjit::FuncArgIndex]
22 // ============================================================================
23
24 //! Function argument index (lo/hi).
25 enum FuncArgIndex : uint32_t {
26 //! Maximum number of function arguments supported by AsmJit.
27 kFuncArgCount = Globals::kMaxFuncArgs,
28 //! Extended maximum number of arguments (used internally).
29 kFuncArgCountLoHi = kFuncArgCount * 2,
30
31 //! Index to the LO part of function argument (default).
32 //!
33 //! This value is typically omitted and added only if there is HI argument
34 //! accessed.
35 kFuncArgLo = 0,
36
37 //! Index to the HI part of function argument.
38 //!
39 //! HI part of function argument depends on target architecture. On x86 it's
40 //! typically used to transfer 64-bit integers (they form a pair of 32-bit
41 //! integers).
42 kFuncArgHi = kFuncArgCount
43 };
44
45 // ============================================================================
46 // [asmjit::FuncSignature]
47 // ============================================================================
48
49 //! Function signature.
50 //!
51 //! Contains information about function return type, count of arguments and
52 //! their TypeIds. Function signature is a low level structure which doesn't
53 //! contain platform specific or calling convention specific information.
54 struct FuncSignature {
55 //! Calling convention id.
56 uint8_t _callConv;
57 //! Count of arguments.
58 uint8_t _argCount;
59 //! Index of a first VA or `kNoVarArgs`.
60 uint8_t _vaIndex;
61 //! Return value TypeId.
62 uint8_t _ret;
63 //! Function arguments TypeIds.
64 const uint8_t* _args;
65
66 enum : uint8_t {
67 //! Doesn't have variable number of arguments (`...`).
68 kNoVarArgs = 0xFF
69 };
70
71 //! \name Initializtion & Reset
72 //! \{
73
74 //! Initializes the function signature.
75 inline void init(uint32_t ccId, uint32_t vaIndex, uint32_t ret, const uint8_t* args, uint32_t argCount) noexcept {
76 ASMJIT_ASSERT(ccId <= 0xFF);
77 ASMJIT_ASSERT(argCount <= 0xFF);
78
79 _callConv = uint8_t(ccId);
80 _argCount = uint8_t(argCount);
81 _vaIndex = uint8_t(vaIndex);
82 _ret = uint8_t(ret);
83 _args = args;
84 }
85
86 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
87
88 //! \}
89
90 //! \name Accessors
91 //! \{
92
93 //! Returns the calling convention.
94 inline uint32_t callConv() const noexcept { return _callConv; }
95 //! Sets the calling convention to `ccId`;
96 inline void setCallConv(uint32_t ccId) noexcept { _callConv = uint8_t(ccId); }
97
98 //! Tests whether the function has variable number of arguments (...).
99 inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
100 //! Returns the variable arguments (...) index, `kNoVarArgs` if none.
101 inline uint32_t vaIndex() const noexcept { return _vaIndex; }
102 //! Sets the variable arguments (...) index to `index`.
103 inline void setVaIndex(uint32_t index) noexcept { _vaIndex = uint8_t(index); }
104 //! Resets the variable arguments index (making it a non-va function).
105 inline void resetVaIndex() noexcept { _vaIndex = kNoVarArgs; }
106
107 //! Returns the number of function arguments.
108 inline uint32_t argCount() const noexcept { return _argCount; }
109
110 inline bool hasRet() const noexcept { return _ret != Type::kIdVoid; }
111 //! Returns the return value type.
112 inline uint32_t ret() const noexcept { return _ret; }
113
114 //! Returns the type of the argument at index `i`.
115 inline uint32_t arg(uint32_t i) const noexcept {
116 ASMJIT_ASSERT(i < _argCount);
117 return _args[i];
118 }
119 //! Returns the array of function arguments' types.
120 inline const uint8_t* args() const noexcept { return _args; }
121
122 //! \}
123 };
124
125 // ============================================================================
126 // [asmjit::FuncSignatureT]
127 // ============================================================================
128
129 template<typename... RET_ARGS>
130 class FuncSignatureT : public FuncSignature {
131 public:
132 inline FuncSignatureT(uint32_t ccId = CallConv::kIdHost, uint32_t vaIndex = kNoVarArgs) noexcept {
133 static const uint8_t ret_args[] = { (uint8_t(Type::IdOfT<RET_ARGS>::kTypeId))... };
134 init(ccId, vaIndex, ret_args[0], ret_args + 1, uint32_t(ASMJIT_ARRAY_SIZE(ret_args) - 1));
135 }
136 };
137
138 // ============================================================================
139 // [asmjit::FuncSignatureBuilder]
140 // ============================================================================
141
142 //! Function signature builder.
143 class FuncSignatureBuilder : public FuncSignature {
144 public:
145 uint8_t _builderArgList[kFuncArgCount];
146
147 //! \name Initializtion & Reset
148 //! \{
149
150 inline FuncSignatureBuilder(uint32_t ccId = CallConv::kIdHost, uint32_t vaIndex = kNoVarArgs) noexcept {
151 init(ccId, vaIndex, Type::kIdVoid, _builderArgList, 0);
152 }
153
154 //! \}
155
156 //! \name Accessors
157 //! \{
158
159 //! Sets the return type to `retType`.
160 inline void setRet(uint32_t retType) noexcept { _ret = uint8_t(retType); }
161 //! Sets the return type based on `T`.
162 template<typename T>
163 inline void setRetT() noexcept { setRet(Type::IdOfT<T>::kTypeId); }
164
165 //! Sets the argument at index `index` to `argType`.
166 inline void setArg(uint32_t index, uint32_t argType) noexcept {
167 ASMJIT_ASSERT(index < _argCount);
168 _builderArgList[index] = uint8_t(argType);
169 }
170 //! Sets the argument at index `i` to the type based on `T`.
171 template<typename T>
172 inline void setArgT(uint32_t index) noexcept { setArg(index, Type::IdOfT<T>::kTypeId); }
173
174 //! Appends an argument of `type` to the function prototype.
175 inline void addArg(uint32_t type) noexcept {
176 ASMJIT_ASSERT(_argCount < kFuncArgCount);
177 _builderArgList[_argCount++] = uint8_t(type);
178 }
179 //! Appends an argument of type based on `T` to the function prototype.
180 template<typename T>
181 inline void addArgT() noexcept { addArg(Type::IdOfT<T>::kTypeId); }
182
183 //! \}
184 };
185
186 // ============================================================================
187 // [asmjit::FuncValue]
188 // ============================================================================
189
190 //! Argument or return value as defined by `FuncSignature`, but with register
191 //! or stack address (and other metadata) assigned to it.
192 struct FuncValue {
193 uint32_t _data;
194
195 enum Parts : uint32_t {
196 kTypeIdShift = 0, //!< TypeId shift.
197 kTypeIdMask = 0x000000FFu, //!< TypeId mask.
198
199 kFlagIsReg = 0x00000100u, //!< Passed by register.
200 kFlagIsStack = 0x00000200u, //!< Passed by stack.
201 kFlagIsIndirect = 0x00000400u, //!< Passed indirectly by reference (internally a pointer).
202 kFlagIsDone = 0x00000800u, //!< Used internally by arguments allocator.
203
204 kStackOffsetShift = 12, //!< Stack offset shift.
205 kStackOffsetMask = 0xFFFFF000u, //!< Stack offset mask (must occupy MSB bits).
206
207 kRegIdShift = 16, //!< RegId shift.
208 kRegIdMask = 0x00FF0000u, //!< RegId mask.
209
210 kRegTypeShift = 24, //!< RegType shift.
211 kRegTypeMask = 0xFF000000u //!< RegType mask.
212 };
213
214 //! \name Initializtion & Reset
215 //! \{
216
217 // These initialize the whole `FuncValue` to either register or stack. Useful
218 // when you know all of these properties and wanna just set it up.
219
220 //! Initializes the `typeId` of this `FuncValue`.
221 inline void initTypeId(uint32_t typeId) noexcept {
222 _data = typeId << kTypeIdShift;
223 }
224
225 inline void initReg(uint32_t regType, uint32_t regId, uint32_t typeId, uint32_t flags = 0) noexcept {
226 _data = (regType << kRegTypeShift) | (regId << kRegIdShift) | (typeId << kTypeIdShift) | kFlagIsReg | flags;
227 }
228
229 inline void initStack(int32_t offset, uint32_t typeId) noexcept {
230 _data = (uint32_t(offset) << kStackOffsetShift) | (typeId << kTypeIdShift) | kFlagIsStack;
231 }
232
233 //! Resets the value to its unassigned state.
234 inline void reset() noexcept { _data = 0; }
235
236 //! \}
237
238 //! \name Assign
239 //! \{
240
241 // These initialize only part of `FuncValue`, useful when building `FuncValue`
242 // incrementally. The caller should first init the type-id by caliing `initTypeId`
243 // and then continue building either register or stack.
244
245 inline void assignRegData(uint32_t regType, uint32_t regId) noexcept {
246 ASMJIT_ASSERT((_data & (kRegTypeMask | kRegIdMask)) == 0);
247 _data |= (regType << kRegTypeShift) | (regId << kRegIdShift) | kFlagIsReg;
248 }
249
250 inline void assignStackOffset(int32_t offset) noexcept {
251 ASMJIT_ASSERT((_data & kStackOffsetMask) == 0);
252 _data |= (uint32_t(offset) << kStackOffsetShift) | kFlagIsStack;
253 }
254
255 //! \}
256
257 //! \name Accessors
258 //! \{
259
260 inline void _replaceValue(uint32_t mask, uint32_t value) noexcept { _data = (_data & ~mask) | value; }
261
262 //! Tests whether the `FuncValue` has a flag `flag` set.
263 inline bool hasFlag(uint32_t flag) const noexcept { return (_data & flag) != 0; }
264 //! Adds `flags` to `FuncValue`.
265 inline void addFlags(uint32_t flags) noexcept { _data |= flags; }
266 //! Clears `flags` of `FuncValue`.
267 inline void clearFlags(uint32_t flags) noexcept { _data &= ~flags; }
268
269 //! Tests whether the value is initialized (i.e. contains a valid data).
270 inline bool isInitialized() const noexcept { return _data != 0; }
271 //! Tests whether the argument is passed by register.
272 inline bool isReg() const noexcept { return hasFlag(kFlagIsReg); }
273 //! Tests whether the argument is passed by stack.
274 inline bool isStack() const noexcept { return hasFlag(kFlagIsStack); }
275 //! Tests whether the argument is passed by register.
276 inline bool isAssigned() const noexcept { return hasFlag(kFlagIsReg | kFlagIsStack); }
277 //! Tests whether the argument is passed through a pointer (used by WIN64 to pass XMM|YMM|ZMM).
278 inline bool isIndirect() const noexcept { return hasFlag(kFlagIsIndirect); }
279
280 //! Tests whether the argument was already processed (used internally).
281 inline bool isDone() const noexcept { return hasFlag(kFlagIsDone); }
282
283 //! Returns a register type of the register used to pass function argument or return value.
284 inline uint32_t regType() const noexcept { return (_data & kRegTypeMask) >> kRegTypeShift; }
285 //! Sets a register type of the register used to pass function argument or return value.
286 inline void setRegType(uint32_t regType) noexcept { _replaceValue(kRegTypeMask, regType << kRegTypeShift); }
287
288 //! Returns a physical id of the register used to pass function argument or return value.
289 inline uint32_t regId() const noexcept { return (_data & kRegIdMask) >> kRegIdShift; }
290 //! Sets a physical id of the register used to pass function argument or return value.
291 inline void setRegId(uint32_t regId) noexcept { _replaceValue(kRegIdMask, regId << kRegIdShift); }
292
293 //! Returns a stack offset of this argument.
294 inline int32_t stackOffset() const noexcept { return int32_t(_data & kStackOffsetMask) >> kStackOffsetShift; }
295 //! Sets a stack offset of this argument.
296 inline void setStackOffset(int32_t offset) noexcept { _replaceValue(kStackOffsetMask, uint32_t(offset) << kStackOffsetShift); }
297
298 //! Tests whether the argument or return value has associated `Type::Id`.
299 inline bool hasTypeId() const noexcept { return (_data & kTypeIdMask) != 0; }
300 //! Returns a TypeId of this argument or return value.
301 inline uint32_t typeId() const noexcept { return (_data & kTypeIdMask) >> kTypeIdShift; }
302 //! Sets a TypeId of this argument or return value.
303 inline void setTypeId(uint32_t typeId) noexcept { _replaceValue(kTypeIdMask, typeId << kTypeIdShift); }
304
305 //! \}
306 };
307
308 // ============================================================================
309 // [asmjit::FuncDetail]
310 // ============================================================================
311
312 //! Function detail - CallConv and expanded FuncSignature.
313 //!
314 //! Function detail is architecture and OS dependent representation of a function.
315 //! It contains calling convention and expanded function signature so all
316 //! arguments have assigned either register type & id or stack address.
317 class FuncDetail {
318 public:
319 //! Calling convention.
320 CallConv _callConv;
321 //! Number of function arguments.
322 uint8_t _argCount;
323 //! Number of function return values.
324 uint8_t _retCount;
325 //! Variable arguments index of `kNoVarArgs`.
326 uint8_t _vaIndex;
327 //! Reserved for future use.
328 uint8_t _reserved;
329 //! Registers that contains arguments.
330 uint32_t _usedRegs[BaseReg::kGroupVirt];
331 //! Size of arguments passed by stack.
332 uint32_t _argStackSize;
333 //! Function return values.
334 FuncValue _rets[2];
335 //! Function arguments.
336 FuncValue _args[kFuncArgCountLoHi];
337
338 enum : uint8_t {
339 //! Doesn't have variable number of arguments (`...`).
340 kNoVarArgs = 0xFF
341 };
342
343 //! \name Construction & Destruction
344 //! \{
345
346 inline FuncDetail() noexcept { reset(); }
347 inline FuncDetail(const FuncDetail& other) noexcept = default;
348
349 //! Initializes this `FuncDetail` to the given signature.
350 ASMJIT_API Error init(const FuncSignature& sign);
351 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
352
353 //! \}
354
355 //! \name Accessors
356 //! \{
357
358 //! Returns the function's calling convention, see `CallConv`.
359 inline const CallConv& callConv() const noexcept { return _callConv; }
360
361 //! Returns the associated calling convention flags, see `CallConv::Flags`.
362 inline uint32_t flags() const noexcept { return _callConv.flags(); }
363 //! Checks whether a CallConv `flag` is set, see `CallConv::Flags`.
364 inline bool hasFlag(uint32_t ccFlag) const noexcept { return _callConv.hasFlag(ccFlag); }
365
366 //! Returns count of function return values.
367 inline uint32_t retCount() const noexcept { return _retCount; }
368 //! Returns the number of function arguments.
369 inline uint32_t argCount() const noexcept { return _argCount; }
370
371 //! Tests whether the function has a return value.
372 inline bool hasRet() const noexcept { return _retCount != 0; }
373 //! Returns function return value associated with the given `index`.
374 inline FuncValue& ret(uint32_t index = 0) noexcept {
375 ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_rets));
376 return _rets[index];
377 }
378 //! Returns function return value associated with the given `index` (const).
379 inline const FuncValue& ret(uint32_t index = 0) const noexcept {
380 ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_rets));
381 return _rets[index];
382 }
383
384 //! Returns function arguments array.
385 inline FuncValue* args() noexcept { return _args; }
386 //! Returns function arguments array (const).
387 inline const FuncValue* args() const noexcept { return _args; }
388
389 inline bool hasArg(uint32_t index) const noexcept {
390 ASMJIT_ASSERT(index < kFuncArgCountLoHi);
391 return _args[index].isInitialized();
392 }
393
394 //! Returns function argument at the given `index`.
395 inline FuncValue& arg(uint32_t index) noexcept {
396 ASMJIT_ASSERT(index < kFuncArgCountLoHi);
397 return _args[index];
398 }
399
400 //! Returnsfunction argument at the given index `index` (const).
401 inline const FuncValue& arg(uint32_t index) const noexcept {
402 ASMJIT_ASSERT(index < kFuncArgCountLoHi);
403 return _args[index];
404 }
405
406 inline void resetArg(uint32_t index) noexcept {
407 ASMJIT_ASSERT(index < kFuncArgCountLoHi);
408 _args[index].reset();
409 }
410
411 inline bool hasVarArgs() const noexcept { return _vaIndex != kNoVarArgs; }
412 inline uint32_t vaIndex() const noexcept { return _vaIndex; }
413
414 //! Tests whether the function passes one or more argument by stack.
415 inline bool hasStackArgs() const noexcept { return _argStackSize != 0; }
416 //! Returns stack size needed for function arguments passed on the stack.
417 inline uint32_t argStackSize() const noexcept { return _argStackSize; }
418
419 inline uint32_t redZoneSize() const noexcept { return _callConv.redZoneSize(); }
420 inline uint32_t spillZoneSize() const noexcept { return _callConv.spillZoneSize(); }
421 inline uint32_t naturalStackAlignment() const noexcept { return _callConv.naturalStackAlignment(); }
422
423 inline uint32_t passedRegs(uint32_t group) const noexcept { return _callConv.passedRegs(group); }
424 inline uint32_t preservedRegs(uint32_t group) const noexcept { return _callConv.preservedRegs(group); }
425
426 inline uint32_t usedRegs(uint32_t group) const noexcept {
427 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
428 return _usedRegs[group];
429 }
430
431 inline void addUsedRegs(uint32_t group, uint32_t regs) noexcept {
432 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
433 _usedRegs[group] |= regs;
434 }
435
436 //! \}
437 };
438
439 // ============================================================================
440 // [asmjit::FuncFrame]
441 // ============================================================================
442
443 //! Function frame.
444 //!
445 //! Function frame is used directly by prolog and epilog insertion (PEI) utils.
446 //! It provides information necessary to insert a proper and ABI comforming
447 //! prolog and epilog. Function frame calculation is based on `CallConv` and
448 //! other function attributes.
449 //!
450 //! Function Frame Structure
451 //! ------------------------
452 //!
453 //! Various properties can contribute to the size and structure of the function
454 //! frame. The function frame in most cases won't use all of the properties
455 //! illustrated (for example Spill Zone and Red Zone are never used together).
456 //!
457 //! +-----------------------------+
458 //! | Arguments Passed by Stack |
459 //! +-----------------------------+
460 //! | Spill Zone |
461 //! +-----------------------------+ <- Stack offset (args) starts from here.
462 //! | Return Address if Pushed |
463 //! +-----------------------------+ <- Stack pointer (SP) upon entry.
464 //! | Save/Restore Stack. |
465 //! +-----------------------------+-----------------------------+
466 //! | Local Stack | |
467 //! +-----------------------------+ Final Stack |
468 //! | Call Stack | |
469 //! +-----------------------------+-----------------------------+ <- SP after prolog.
470 //! | Red Zone |
471 //! +-----------------------------+
472 class FuncFrame {
473 public:
474 enum Tag : uint32_t {
475 kTagInvalidOffset = 0xFFFFFFFFu //!< Tag used to inform that some offset is invalid.
476 };
477
478 //! Attributes are designed in a way that all are initially false, and user
479 //! or FuncFrame finalizer adds them when necessary.
480 enum Attributes : uint32_t {
481 kAttrHasVarArgs = 0x00000001u, //!< Function has variable number of arguments.
482 kAttrHasPreservedFP = 0x00000010u, //!< Preserve frame pointer (don't omit FP).
483 kAttrHasFuncCalls = 0x00000020u, //!< Function calls other functions (is not leaf).
484
485 kAttrX86AvxEnabled = 0x00010000u, //!< Use AVX instead of SSE for all operations (X86).
486 kAttrX86AvxCleanup = 0x00020000u, //!< Emit VZEROUPPER instruction in epilog (X86).
487 kAttrX86MmxCleanup = 0x00040000u, //!< Emit EMMS instruction in epilog (X86).
488
489 kAttrAlignedVecSR = 0x40000000u, //!< Function has aligned save/restore of vector registers.
490 kAttrIsFinalized = 0x80000000u //!< FuncFrame is finalized and can be used by PEI.
491 };
492
493 //! Function attributes.
494 uint32_t _attributes;
495
496 //! Architecture ID.
497 uint8_t _archId;
498 //! SP register ID (to access call stack and local stack).
499 uint8_t _spRegId;
500 //! SA register ID (to access stack arguments).
501 uint8_t _saRegId;
502
503 //! Red zone size (copied from CallConv).
504 uint8_t _redZoneSize;
505 //! Spill zone size (copied from CallConv).
506 uint8_t _spillZoneSize;
507 //! Natural stack alignment (copied from CallConv).
508 uint8_t _naturalStackAlignment;
509 //! Minimum stack alignment to turn on dynamic alignment.
510 uint8_t _minDynamicAlignment;
511
512 //! Call stack alignment.
513 uint8_t _callStackAlignment;
514 //! Local stack alignment.
515 uint8_t _localStackAlignment;
516 //! Final stack alignment.
517 uint8_t _finalStackAlignment;
518
519 //! Adjustment of the stack before returning (X86-STDCALL).
520 uint16_t _calleeStackCleanup;
521
522 //! Call stack size.
523 uint32_t _callStackSize;
524 //! Local stack size.
525 uint32_t _localStackSize;
526 //! Final stack size (sum of call stack and local stack).
527 uint32_t _finalStackSize;
528
529 //! Local stack offset (non-zero only if call stack is used).
530 uint32_t _localStackOffset;
531 //! Offset relative to SP that contains previous SP (before alignment).
532 uint32_t _daOffset;
533 //! Offset of the first stack argument relative to SP.
534 uint32_t _saOffsetFromSP;
535 //! Offset of the first stack argument relative to SA (_saRegId or FP).
536 uint32_t _saOffsetFromSA;
537
538 //! Local stack adjustment in prolog/epilog.
539 uint32_t _stackAdjustment;
540
541 //! Registers that are dirty.
542 uint32_t _dirtyRegs[BaseReg::kGroupVirt];
543 //! Registers that must be preserved (copied from CallConv).
544 uint32_t _preservedRegs[BaseReg::kGroupVirt];
545
546 //! Final stack size required to save GP regs.
547 uint16_t _gpSaveSize;
548 //! Final Stack size required to save other than GP regs.
549 uint16_t _nonGpSaveSize;
550 //! Final offset where saved GP regs are stored.
551 uint32_t _gpSaveOffset;
552 //! Final offset where saved other than GP regs are stored.
553 uint32_t _nonGpSaveOffset;
554
555 //! \name Construction & Destruction
556 //! \{
557
558 inline FuncFrame() noexcept { reset(); }
559 inline FuncFrame(const FuncFrame& other) noexcept = default;
560
561 ASMJIT_API Error init(const FuncDetail& func) noexcept;
562
563 inline void reset() noexcept {
564 memset(this, 0, sizeof(FuncFrame));
565 _spRegId = BaseReg::kIdBad;
566 _saRegId = BaseReg::kIdBad;
567 _daOffset = kTagInvalidOffset;
568 }
569
570 //! \}
571
572 //! \name Accessors
573 //! \{
574
575 //! Returns the target architecture of the function frame.
576 inline uint32_t archId() const noexcept { return _archId; }
577
578 //! Returns function frame attributes, see `Attributes`.
579 inline uint32_t attributes() const noexcept { return _attributes; }
580 //! Checks whether the FuncFame contains an attribute `attr`.
581 inline bool hasAttribute(uint32_t attr) const noexcept { return (_attributes & attr) != 0; }
582 //! Adds attributes `attrs` to the FuncFrame.
583 inline void addAttributes(uint32_t attrs) noexcept { _attributes |= attrs; }
584 //! Clears attributes `attrs` from the FrameFrame.
585 inline void clearAttributes(uint32_t attrs) noexcept { _attributes &= ~attrs; }
586
587 //! Tests whether the function has variable number of arguments.
588 inline bool hasVarArgs() const noexcept { return hasAttribute(kAttrHasVarArgs); }
589 //! Sets the variable arguments flag.
590 inline void setVarArgs() noexcept { addAttributes(kAttrHasVarArgs); }
591 //! Resets variable arguments flag.
592 inline void resetVarArgs() noexcept { clearAttributes(kAttrHasVarArgs); }
593
594 //! Tests whether the function preserves frame pointer (EBP|ESP on X86).
595 inline bool hasPreservedFP() const noexcept { return hasAttribute(kAttrHasPreservedFP); }
596 //! Enables preserved frame pointer.
597 inline void setPreservedFP() noexcept { addAttributes(kAttrHasPreservedFP); }
598 //! Disables preserved frame pointer.
599 inline void resetPreservedFP() noexcept { clearAttributes(kAttrHasPreservedFP); }
600
601 //! Tests whether the function calls other functions.
602 inline bool hasFuncCalls() const noexcept { return hasAttribute(kAttrHasFuncCalls); }
603 //! Sets `kFlagHasCalls` to true.
604 inline void setFuncCalls() noexcept { addAttributes(kAttrHasFuncCalls); }
605 //! Sets `kFlagHasCalls` to false.
606 inline void resetFuncCalls() noexcept { clearAttributes(kAttrHasFuncCalls); }
607
608 //! Tests whether the function contains AVX cleanup - 'vzeroupper' instruction in epilog.
609 inline bool hasAvxCleanup() const noexcept { return hasAttribute(kAttrX86AvxCleanup); }
610 //! Enables AVX cleanup.
611 inline void setAvxCleanup() noexcept { addAttributes(kAttrX86AvxCleanup); }
612 //! Disables AVX cleanup.
613 inline void resetAvxCleanup() noexcept { clearAttributes(kAttrX86AvxCleanup); }
614
615 //! Tests whether the function contains AVX cleanup - 'vzeroupper' instruction in epilog.
616 inline bool isAvxEnabled() const noexcept { return hasAttribute(kAttrX86AvxEnabled); }
617 //! Enables AVX cleanup.
618 inline void setAvxEnabled() noexcept { addAttributes(kAttrX86AvxEnabled); }
619 //! Disables AVX cleanup.
620 inline void resetAvxEnabled() noexcept { clearAttributes(kAttrX86AvxEnabled); }
621
622 //! Tests whether the function contains MMX cleanup - 'emms' instruction in epilog.
623 inline bool hasMmxCleanup() const noexcept { return hasAttribute(kAttrX86MmxCleanup); }
624 //! Enables MMX cleanup.
625 inline void setMmxCleanup() noexcept { addAttributes(kAttrX86MmxCleanup); }
626 //! Disables MMX cleanup.
627 inline void resetMmxCleanup() noexcept { clearAttributes(kAttrX86MmxCleanup); }
628
629 //! Tests whether the function uses call stack.
630 inline bool hasCallStack() const noexcept { return _callStackSize != 0; }
631 //! Tests whether the function uses local stack.
632 inline bool hasLocalStack() const noexcept { return _localStackSize != 0; }
633 //! Tests whether vector registers can be saved and restored by using aligned reads and writes.
634 inline bool hasAlignedVecSR() const noexcept { return hasAttribute(kAttrAlignedVecSR); }
635 //! Tests whether the function has to align stack dynamically.
636 inline bool hasDynamicAlignment() const noexcept { return _finalStackAlignment >= _minDynamicAlignment; }
637
638 //! Tests whether the calling convention specifies 'RedZone'.
639 inline bool hasRedZone() const noexcept { return _redZoneSize != 0; }
640 //! Tests whether the calling convention specifies 'SpillZone'.
641 inline bool hasSpillZone() const noexcept { return _spillZoneSize != 0; }
642
643 //! Returns the size of 'RedZone'.
644 inline uint32_t redZoneSize() const noexcept { return _redZoneSize; }
645 //! Returns the size of 'SpillZone'.
646 inline uint32_t spillZoneSize() const noexcept { return _spillZoneSize; }
647 //! Returns natural stack alignment (guaranteed stack alignment upon entry).
648 inline uint32_t naturalStackAlignment() const noexcept { return _naturalStackAlignment; }
649 //! Returns natural stack alignment (guaranteed stack alignment upon entry).
650 inline uint32_t minDynamicAlignment() const noexcept { return _minDynamicAlignment; }
651
652 //! Tests whether the callee must adjust SP before returning (X86-STDCALL only)
653 inline bool hasCalleeStackCleanup() const noexcept { return _calleeStackCleanup != 0; }
654 //! Returns home many bytes of the stack the the callee must adjust before returning (X86-STDCALL only)
655 inline uint32_t calleeStackCleanup() const noexcept { return _calleeStackCleanup; }
656
657 //! Returns call stack alignment.
658 inline uint32_t callStackAlignment() const noexcept { return _callStackAlignment; }
659 //! Returns local stack alignment.
660 inline uint32_t localStackAlignment() const noexcept { return _localStackAlignment; }
661 //! Returns final stack alignment (the maximum value of call, local, and natural stack alignments).
662 inline uint32_t finalStackAlignment() const noexcept { return _finalStackAlignment; }
663
664 //! Sets call stack alignment.
665 //!
666 //! \note This also updates the final stack alignment.
667 inline void setCallStackAlignment(uint32_t alignment) noexcept {
668 _callStackAlignment = uint8_t(alignment);
669 _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
670 }
671
672 //! Sets local stack alignment.
673 //!
674 //! \note This also updates the final stack alignment.
675 inline void setLocalStackAlignment(uint32_t value) noexcept {
676 _localStackAlignment = uint8_t(value);
677 _finalStackAlignment = Support::max(_naturalStackAlignment, _callStackAlignment, _localStackAlignment);
678 }
679
680 //! Combines call stack alignment with `alignment`, updating it to the greater value.
681 //!
682 //! \note This also updates the final stack alignment.
683 inline void updateCallStackAlignment(uint32_t alignment) noexcept {
684 _callStackAlignment = uint8_t(Support::max<uint32_t>(_callStackAlignment, alignment));
685 _finalStackAlignment = Support::max(_finalStackAlignment, _callStackAlignment);
686 }
687
688 //! Combines local stack alignment with `alignment`, updating it to the greater value.
689 //!
690 //! \note This also updates the final stack alignment.
691 inline void updateLocalStackAlignment(uint32_t alignment) noexcept {
692 _localStackAlignment = uint8_t(Support::max<uint32_t>(_localStackAlignment, alignment));
693 _finalStackAlignment = Support::max(_finalStackAlignment, _localStackAlignment);
694 }
695
696 //! Returns call stack size.
697 inline uint32_t callStackSize() const noexcept { return _callStackSize; }
698 //! Returns local stack size.
699 inline uint32_t localStackSize() const noexcept { return _localStackSize; }
700
701 //! Sets call stack size.
702 inline void setCallStackSize(uint32_t size) noexcept { _callStackSize = size; }
703 //! Sets local stack size.
704 inline void setLocalStackSize(uint32_t size) noexcept { _localStackSize = size; }
705
706 //! Combines call stack size with `size`, updating it to the greater value.
707 inline void updateCallStackSize(uint32_t size) noexcept { _callStackSize = Support::max(_callStackSize, size); }
708 //! Combines local stack size with `size`, updating it to the greater value.
709 inline void updateLocalStackSize(uint32_t size) noexcept { _localStackSize = Support::max(_localStackSize, size); }
710
711 //! Returns final stack size (only valid after the FuncFrame is finalized).
712 inline uint32_t finalStackSize() const noexcept { return _finalStackSize; }
713
714 //! Returns an offset to access the local stack (non-zero only if call stack is used).
715 inline uint32_t localStackOffset() const noexcept { return _localStackOffset; }
716
717 //! Tests whether the function prolog/epilog requires a memory slot for storing unaligned SP.
718 inline bool hasDAOffset() const noexcept { return _daOffset != kTagInvalidOffset; }
719 //! Returns a memory offset used to store DA (dynamic alignment) slot (relative to SP).
720 inline uint32_t daOffset() const noexcept { return _daOffset; }
721
722 inline uint32_t saOffset(uint32_t regId) const noexcept {
723 return regId == _spRegId ? saOffsetFromSP()
724 : saOffsetFromSA();
725 }
726
727 inline uint32_t saOffsetFromSP() const noexcept { return _saOffsetFromSP; }
728 inline uint32_t saOffsetFromSA() const noexcept { return _saOffsetFromSA; }
729
730 //! Returns mask of registers of the given register `group` that are modified
731 //! by the function. The engine would then calculate which registers must be
732 //! saved & restored by the function by using the data provided by the calling
733 //! convention.
734 inline uint32_t dirtyRegs(uint32_t group) const noexcept {
735 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
736 return _dirtyRegs[group];
737 }
738
739 //! Sets which registers (as a mask) are modified by the function.
740 //!
741 //! \remarks Please note that this will completely overwrite the existing
742 //! register mask, use `addDirtyRegs()` to modify the existing register
743 //! mask.
744 inline void setDirtyRegs(uint32_t group, uint32_t regs) noexcept {
745 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
746 _dirtyRegs[group] = regs;
747 }
748
749 //! Adds which registers (as a mask) are modified by the function.
750 inline void addDirtyRegs(uint32_t group, uint32_t regs) noexcept {
751 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
752 _dirtyRegs[group] |= regs;
753 }
754
755 //! \overload
756 inline void addDirtyRegs(const BaseReg& reg) noexcept {
757 ASMJIT_ASSERT(reg.id() < Globals::kMaxPhysRegs);
758 addDirtyRegs(reg.group(), Support::bitMask(reg.id()));
759 }
760
761 //! \overload
762 template<typename... Args>
763 ASMJIT_INLINE void addDirtyRegs(const BaseReg& reg, Args&&... args) noexcept {
764 addDirtyRegs(reg);
765 addDirtyRegs(std::forward<Args>(args)...);
766 }
767
768 inline void setAllDirty() noexcept {
769 _dirtyRegs[0] = 0xFFFFFFFFu;
770 _dirtyRegs[1] = 0xFFFFFFFFu;
771 _dirtyRegs[2] = 0xFFFFFFFFu;
772 _dirtyRegs[3] = 0xFFFFFFFFu;
773 }
774
775 inline void setAllDirty(uint32_t group) noexcept {
776 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
777 _dirtyRegs[group] = 0xFFFFFFFFu;
778 }
779
780 //! Returns a calculated mask of registers of the given `group` that will be
781 //! saved and restored in the function's prolog and epilog, respectively. The
782 //! register mask is calculated from both `dirtyRegs` (provided by user) and
783 //! `preservedMask` (provided by the calling convention).
784 inline uint32_t savedRegs(uint32_t group) const noexcept {
785 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
786 return _dirtyRegs[group] & _preservedRegs[group];
787 }
788
789 //! Returns the mask of preserved registers of the given register `group`.
790 //!
791 //! Preserved registers are those that must survive the function call
792 //! unmodified. The function can only modify preserved registers it they
793 //! are saved and restored in funciton's prolog and epilog, respectively.
794 inline uint32_t preservedRegs(uint32_t group) const noexcept {
795 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
796 return _preservedRegs[group];
797 }
798
799 inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
800 inline uint32_t saRegId() const noexcept { return _saRegId; }
801 inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
802 inline void resetSARegId() { setSARegId(BaseReg::kIdBad); }
803
804 //! Returns stack size required to save GP registers.
805 inline uint32_t gpSaveSize() const noexcept { return _gpSaveSize; }
806 //! Returns stack size required to save other than GP registers (MM, XMM|YMM|ZMM, K, VFP, etc...).
807 inline uint32_t nonGpSaveSize() const noexcept { return _nonGpSaveSize; }
808
809 //! Returns an offset to the stack where general purpose registers are saved.
810 inline uint32_t gpSaveOffset() const noexcept { return _gpSaveOffset; }
811 //! Returns an offset to the stack where other than GP registers are saved.
812 inline uint32_t nonGpSaveOffset() const noexcept { return _nonGpSaveOffset; }
813
814 //! Tests whether the functions contains stack adjustment.
815 inline bool hasStackAdjustment() const noexcept { return _stackAdjustment != 0; }
816 //! Returns function's stack adjustment used in function's prolog and epilog.
817 //!
818 //! If the returned value is zero it means that the stack is not adjusted.
819 //! This can mean both that the stack is not used and/or the stack is only
820 //! adjusted by instructions that pust/pop registers into/from stack.
821 inline uint32_t stackAdjustment() const noexcept { return _stackAdjustment; }
822
823 //! \}
824
825 //! \name Finaliztion
826 //! \{
827
828 ASMJIT_API Error finalize() noexcept;
829
830 //! \}
831 };
832
833 // ============================================================================
834 // [asmjit::FuncArgsAssignment]
835 // ============================================================================
836
837 //! A helper class that can be used to assign a physical register for each
838 //! function argument. Use with `BaseEmitter::emitArgsAssignment()`.
839 class FuncArgsAssignment {
840 public:
841 //! Function detail.
842 const FuncDetail* _funcDetail;
843 //! Register that can be used to access arguments passed by stack.
844 uint8_t _saRegId;
845 //! Reserved for future use.
846 uint8_t _reserved[3];
847 //! Mapping of each function argument.
848 FuncValue _args[kFuncArgCountLoHi];
849
850 //! \name Construction & Destruction
851 //! \{
852
853 inline explicit FuncArgsAssignment(const FuncDetail* fd = nullptr) noexcept { reset(fd); }
854
855 inline FuncArgsAssignment(const FuncArgsAssignment& other) noexcept {
856 memcpy(this, &other, sizeof(*this));
857 }
858
859 inline void reset(const FuncDetail* fd = nullptr) noexcept {
860 _funcDetail = fd;
861 _saRegId = uint8_t(BaseReg::kIdBad);
862 memset(_reserved, 0, sizeof(_reserved));
863 memset(_args, 0, sizeof(_args));
864 }
865
866 //! \}
867
868 //! \name Accessors
869 //! \{
870
871 inline const FuncDetail* funcDetail() const noexcept { return _funcDetail; }
872 inline void setFuncDetail(const FuncDetail* fd) noexcept { _funcDetail = fd; }
873
874 inline bool hasSARegId() const noexcept { return _saRegId != BaseReg::kIdBad; }
875 inline uint32_t saRegId() const noexcept { return _saRegId; }
876 inline void setSARegId(uint32_t regId) { _saRegId = uint8_t(regId); }
877 inline void resetSARegId() { _saRegId = uint8_t(BaseReg::kIdBad); }
878
879 inline FuncValue& arg(uint32_t index) noexcept {
880 ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_args));
881 return _args[index];
882 }
883 inline const FuncValue& arg(uint32_t index) const noexcept {
884 ASMJIT_ASSERT(index < ASMJIT_ARRAY_SIZE(_args));
885 return _args[index];
886 }
887
888 inline bool isAssigned(uint32_t argIndex) const noexcept {
889 ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args));
890 return _args[argIndex].isAssigned();
891 }
892
893 inline void assignReg(uint32_t argIndex, const BaseReg& reg, uint32_t typeId = Type::kIdVoid) noexcept {
894 ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args));
895 ASMJIT_ASSERT(reg.isPhysReg());
896 _args[argIndex].initReg(reg.type(), reg.id(), typeId);
897 }
898
899 inline void assignReg(uint32_t argIndex, uint32_t regType, uint32_t regId, uint32_t typeId = Type::kIdVoid) noexcept {
900 ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args));
901 _args[argIndex].initReg(regType, regId, typeId);
902 }
903
904 inline void assignStack(uint32_t argIndex, int32_t offset, uint32_t typeId = Type::kIdVoid) {
905 ASMJIT_ASSERT(argIndex < ASMJIT_ARRAY_SIZE(_args));
906 _args[argIndex].initStack(offset, typeId);
907 }
908
909 // NOTE: All `assignAll()` methods are shortcuts to assign all arguments at
910 // once, however, since registers are passed all at once these initializers
911 // don't provide any way to pass TypeId and/or to keep any argument between
912 // the arguments passed unassigned.
913 inline void _assignAllInternal(uint32_t argIndex, const BaseReg& reg) noexcept {
914 assignReg(argIndex, reg);
915 }
916
917 template<typename... Args>
918 inline void _assignAllInternal(uint32_t argIndex, const BaseReg& reg, Args&&... args) noexcept {
919 assignReg(argIndex, reg);
920 _assignAllInternal(argIndex + 1, std::forward<Args>(args)...);
921 }
922
923 template<typename... Args>
924 inline void assignAll(Args&&... args) noexcept {
925 _assignAllInternal(0, std::forward<Args>(args)...);
926 }
927
928 //! \}
929
930 //! \name Utilities
931 //! \{
932
933 //! Update `FuncFrame` based on function's arguments assignment.
934 //!
935 //! \note You MUST call this in orher to use `BaseEmitter::emitArgsAssignment()`,
936 //! otherwise the FuncFrame would not contain the information necessary to
937 //! assign all arguments into the registers and/or stack specified.
938 ASMJIT_API Error updateFuncFrame(FuncFrame& frame) const noexcept;
939
940 //! \}
941 };
942
943 //! \}
944
945 ASMJIT_END_NAMESPACE
946
947 #endif // _ASMJIT_CORE_FUNC_H
948
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/globals.h"
8 #include "../core/support.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::DebugUtils]
14 // ============================================================================
15
16 ASMJIT_FAVOR_SIZE const char* DebugUtils::errorAsString(Error err) noexcept {
17 #ifndef ASMJIT_NO_TEXT
18 static const char errorMessages[] =
19 "Ok\0"
20 "Out of memory\0"
21 "Invalid argument\0"
22 "Invalid state\0"
23 "Invalid architecture\0"
24 "Not initialized\0"
25 "Already initialized\0"
26 "Feature not enabled\0"
27 "Too many handles or file descriptors\0"
28 "Too large (code or memory request)\0"
29 "No code generated\0"
30 "Invalid directive\0"
31 "Invalid label\0"
32 "Too many labels\0"
33 "Label already bound\0"
34 "Label already defined\0"
35 "Label name too long\0"
36 "Invalid label name\0"
37 "Invalid parent label\0"
38 "Non-local label can't have parent\0"
39 "Invalid section\0"
40 "Too many sections\0"
41 "Invalid section name\0"
42 "Too many relocations\0"
43 "Invalid relocation entry\0"
44 "Relocation offset out of range\0"
45 "Invalid assignment\0"
46 "Invalid instruction\0"
47 "Invalid register type\0"
48 "Invalid register group\0"
49 "Invalid register physical id\0"
50 "Invalid register virtual id\0"
51 "Invalid prefix combination\0"
52 "Invalid lock prefix\0"
53 "Invalid xacquire prefix\0"
54 "Invalid xrelease prefix\0"
55 "Invalid rep prefix\0"
56 "Invalid rex prefix\0"
57 "Invalid {...} register \0"
58 "Invalid use of {k}\0"
59 "Invalid use of {k}{z}\0"
60 "Invalid broadcast {1tox}\0"
61 "Invalid {er} or {sae} option\0"
62 "Invalid address\0"
63 "Invalid address index\0"
64 "Invalid address scale\0"
65 "Invalid use of 64-bit address or offset\0"
66 "Invalid use of 64-bit address or offset that requires 32-bit zero-extension\0"
67 "Invalid displacement\0"
68 "Invalid segment\0"
69 "Invalid immediate value\0"
70 "Invalid operand size\0"
71 "Ambiguous operand size\0"
72 "Operand size mismatch\0"
73 "Invalid option\0"
74 "Option already defined\0"
75 "Invalid type-info\0"
76 "Invalid use of a low 8-bit GPB register\0"
77 "Invalid use of a 64-bit GPQ register in 32-bit mode\0"
78 "Invalid use of an 80-bit float\0"
79 "Not consecutive registers\0"
80 "No more physical registers\0"
81 "Overlapped registers\0"
82 "Overlapping register and arguments base-address register\0"
83 "Unbound label cannot be evaluated by expression\0"
84 "Arithmetic overflow during expression evaluation\0"
85 "Unknown error\0";
86 return Support::findPackedString(errorMessages, Support::min<Error>(err, kErrorCount));
87 #else
88 ASMJIT_UNUSED(err);
89 static const char noMessage[] = "";
90 return noMessage;
91 #endif
92 }
93
94 ASMJIT_FAVOR_SIZE void DebugUtils::debugOutput(const char* str) noexcept {
95 #if defined(_WIN32)
96 ::OutputDebugStringA(str);
97 #else
98 ::fputs(str, stderr);
99 #endif
100 }
101
102 ASMJIT_FAVOR_SIZE void DebugUtils::assertionFailed(const char* file, int line, const char* msg) noexcept {
103 char str[1024];
104
105 snprintf(str, 1024,
106 "[asmjit] Assertion failed at %s (line %d):\n"
107 "[asmjit] %s\n", file, line, msg);
108
109 debugOutput(str);
110 ::abort();
111 }
112
113 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_GLOBALS_H
7 #define _ASMJIT_CORE_GLOBALS_H
8
9 #include "../core/api-config.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [asmjit::Support]
15 // ============================================================================
16
17 //! \cond INTERNAL
18 //! \addtogroup Support
19 //! \{
20 namespace Support {
21 //! Cast designed to cast between function and void* pointers.
22 template<typename Dst, typename Src>
23 static inline Dst ptr_cast_impl(Src p) noexcept { return (Dst)p; }
24 } // {Support}
25
26 #if defined(ASMJIT_NO_STDCXX)
27 namespace Support {
28 ASMJIT_INLINE void* operatorNew(size_t n) noexcept { return malloc(n); }
29 ASMJIT_INLINE void operatorDelete(void* p) noexcept { if (p) free(p); }
30 } // {Support}
31
32 #define ASMJIT_BASE_CLASS(TYPE) \
33 ASMJIT_INLINE void* operator new(size_t n) noexcept { \
34 return Support::operatorNew(n); \
35 } \
36 \
37 ASMJIT_INLINE void operator delete(void* p) noexcept { \
38 Support::operatorDelete(p); \
39 } \
40 \
41 ASMJIT_INLINE void* operator new(size_t, void* p) noexcept { return p; } \
42 ASMJIT_INLINE void operator delete(void*, void*) noexcept {}
43 #else
44 #define ASMJIT_BASE_CLASS(TYPE)
45 #endif
46
47 //! \}
48 //! \endcond
49
50 // ============================================================================
51 // [asmjit::Globals]
52 // ============================================================================
53
54 //! \addtogroup asmjit_core
55 //! \{
56
57 //! Contains typedefs, constants, and variables used globally by AsmJit.
58 namespace Globals {
59
60 // ============================================================================
61 // [asmjit::Globals::<global>]
62 // ============================================================================
63
64 //! Host memory allocator overhead.
65 constexpr uint32_t kAllocOverhead = uint32_t(sizeof(intptr_t) * 4);
66
67 //! Host memory allocator alignment.
68 constexpr uint32_t kAllocAlignment = 8;
69
70 //! Aggressive growing strategy threshold.
71 constexpr uint32_t kGrowThreshold = 1024 * 1024 * 16;
72
73 //! Maximum height of RB-Tree is:
74 //!
75 //! `2 * log2(n + 1)`.
76 //!
77 //! Size of RB node is at least two pointers (without data),
78 //! so a theoretical architecture limit would be:
79 //!
80 //! `2 * log2(addressableMemorySize / sizeof(Node) + 1)`
81 //!
82 //! Which yields 30 on 32-bit arch and 61 on 64-bit arch.
83 //! The final value was adjusted by +1 for safety reasons.
84 constexpr uint32_t kMaxTreeHeight = (ASMJIT_ARCH_BITS == 32 ? 30 : 61) + 1;
85
86 //! Maximum number of operands per a single instruction.
87 constexpr uint32_t kMaxOpCount = 6;
88
89 // TODO: Use this one.
90 constexpr uint32_t kMaxFuncArgs = 16;
91
92 //! Maximum number of physical registers AsmJit can use per register group.
93 constexpr uint32_t kMaxPhysRegs = 32;
94
95 //! Maximum alignment.
96 constexpr uint32_t kMaxAlignment = 64;
97
98 //! Maximum label or symbol size in bytes.
99 constexpr uint32_t kMaxLabelNameSize = 2048;
100
101 //! Maximum section name size.
102 constexpr uint32_t kMaxSectionNameSize = 35;
103
104 //! Maximum size of comment.
105 constexpr uint32_t kMaxCommentSize = 1024;
106
107 //! Invalid identifier.
108 constexpr uint32_t kInvalidId = 0xFFFFFFFFu;
109
110 //! Returned by `indexOf()` and similar when working with containers that use 32-bit index/size.
111 constexpr uint32_t kNotFound = 0xFFFFFFFFu;
112
113 //! Invalid base address.
114 constexpr uint64_t kNoBaseAddress = ~uint64_t(0);
115
116 // ============================================================================
117 // [asmjit::Globals::ResetPolicy]
118 // ============================================================================
119
120 //! Reset policy used by most `reset()` functions.
121 enum ResetPolicy : uint32_t {
122 //! Soft reset, doesn't deallocate memory (default).
123 kResetSoft = 0,
124 //! Hard reset, releases all memory used, if any.
125 kResetHard = 1
126 };
127
128 // ============================================================================
129 // [asmjit::Globals::Link]
130 // ============================================================================
131
132 enum Link : uint32_t {
133 kLinkLeft = 0,
134 kLinkRight = 1,
135
136 kLinkPrev = 0,
137 kLinkNext = 1,
138
139 kLinkFirst = 0,
140 kLinkLast = 1,
141
142 kLinkCount = 2
143 };
144
145 struct Init_ {};
146 struct NoInit_ {};
147
148 static const constexpr Init_ Init {};
149 static const constexpr NoInit_ NoInit {};
150
151 } // {Globals}
152
153 // ============================================================================
154 // [asmjit::Error]
155 // ============================================================================
156
157 //! AsmJit error type (uint32_t).
158 typedef uint32_t Error;
159
160 //! AsmJit error codes.
161 enum ErrorCode : uint32_t {
162 //! No error (success).
163 kErrorOk = 0,
164
165 //! Out of memory.
166 kErrorOutOfMemory,
167
168 //! Invalid argument.
169 kErrorInvalidArgument,
170
171 //! Invalid state.
172 //!
173 //! If this error is returned it means that either you are doing something
174 //! wrong or AsmJit caught itself by doing something wrong. This error should
175 //! never be ignored.
176 kErrorInvalidState,
177
178 //! Invalid or incompatible architecture.
179 kErrorInvalidArch,
180
181 //! The object is not initialized.
182 kErrorNotInitialized,
183 //! The object is already initialized.
184 kErrorAlreadyInitialized,
185
186 //! Built-in feature was disabled at compile time and it's not available.
187 kErrorFeatureNotEnabled,
188
189 //! Too many handles (Windows) or file descriptors (Unix/Posix).
190 kErrorTooManyHandles,
191 //! Code generated is larger than allowed.
192 kErrorTooLarge,
193
194 //! No code generated.
195 //!
196 //! Returned by runtime if the `CodeHolder` contains no code.
197 kErrorNoCodeGenerated,
198
199 //! Invalid directive.
200 kErrorInvalidDirective,
201 //! Attempt to use uninitialized label.
202 kErrorInvalidLabel,
203 //! Label index overflow - a single `Assembler` instance can hold almost
204 //! 2^32 (4 billion) labels. If there is an attempt to create more labels
205 //! then this error is returned.
206 kErrorTooManyLabels,
207 //! Label is already bound.
208 kErrorLabelAlreadyBound,
209 //! Label is already defined (named labels).
210 kErrorLabelAlreadyDefined,
211 //! Label name is too long.
212 kErrorLabelNameTooLong,
213 //! Label must always be local if it's anonymous (without a name).
214 kErrorInvalidLabelName,
215 //! Parent id passed to `CodeHolder::newNamedLabelId()` was invalid.
216 kErrorInvalidParentLabel,
217 //! Parent id specified for a non-local (global) label.
218 kErrorNonLocalLabelCantHaveParent,
219
220 //! Invalid section.
221 kErrorInvalidSection,
222 //! Too many sections (section index overflow).
223 kErrorTooManySections,
224 //! Invalid section name (most probably too long).
225 kErrorInvalidSectionName,
226
227 //! Relocation index overflow (too many relocations).
228 kErrorTooManyRelocations,
229 //! Invalid relocation entry.
230 kErrorInvalidRelocEntry,
231 //! Reloc entry contains address that is out of range (unencodable).
232 kErrorRelocOffsetOutOfRange,
233
234 //! Invalid assignment to a register, function argument, or function return value.
235 kErrorInvalidAssignment,
236 //! Invalid instruction.
237 kErrorInvalidInstruction,
238 //! Invalid register type.
239 kErrorInvalidRegType,
240 //! Invalid register group.
241 kErrorInvalidRegGroup,
242 //! Invalid register's physical id.
243 kErrorInvalidPhysId,
244 //! Invalid register's virtual id.
245 kErrorInvalidVirtId,
246 //! Invalid prefix combination.
247 kErrorInvalidPrefixCombination,
248 //! Invalid LOCK prefix.
249 kErrorInvalidLockPrefix,
250 //! Invalid XACQUIRE prefix.
251 kErrorInvalidXAcquirePrefix,
252 //! Invalid XRELEASE prefix.
253 kErrorInvalidXReleasePrefix,
254 //! Invalid REP prefix.
255 kErrorInvalidRepPrefix,
256 //! Invalid REX prefix.
257 kErrorInvalidRexPrefix,
258 //! Invalid {...} register.
259 kErrorInvalidExtraReg,
260 //! Invalid {k} use (not supported by the instruction).
261 kErrorInvalidKMaskUse,
262 //! Invalid {k}{z} use (not supported by the instruction).
263 kErrorInvalidKZeroUse,
264 //! Invalid broadcast - Currently only related to invalid use of AVX-512 {1tox}.
265 kErrorInvalidBroadcast,
266 //! Invalid 'embedded-rounding' {er} or 'suppress-all-exceptions' {sae} (AVX-512).
267 kErrorInvalidEROrSAE,
268 //! Invalid address used (not encodable).
269 kErrorInvalidAddress,
270 //! Invalid index register used in memory address (not encodable).
271 kErrorInvalidAddressIndex,
272 //! Invalid address scale (not encodable).
273 kErrorInvalidAddressScale,
274 //! Invalid use of 64-bit address.
275 kErrorInvalidAddress64Bit,
276 //! Invalid use of 64-bit address that require 32-bit zero-extension (X64).
277 kErrorInvalidAddress64BitZeroExtension,
278 //! Invalid displacement (not encodable).
279 kErrorInvalidDisplacement,
280 //! Invalid segment (X86).
281 kErrorInvalidSegment,
282
283 //! Invalid immediate (out of bounds on X86 and invalid pattern on ARM).
284 kErrorInvalidImmediate,
285
286 //! Invalid operand size.
287 kErrorInvalidOperandSize,
288 //! Ambiguous operand size (memory has zero size while it's required to determine the operation type.
289 kErrorAmbiguousOperandSize,
290 //! Mismatching operand size (size of multiple operands doesn't match the operation size).
291 kErrorOperandSizeMismatch,
292
293 //! Invalid option.
294 kErrorInvalidOption,
295 //! Option already defined.
296 kErrorOptionAlreadyDefined,
297
298 //! Invalid TypeId.
299 kErrorInvalidTypeId,
300 //! Invalid use of a 8-bit GPB-HIGH register.
301 kErrorInvalidUseOfGpbHi,
302 //! Invalid use of a 64-bit GPQ register in 32-bit mode.
303 kErrorInvalidUseOfGpq,
304 //! Invalid use of an 80-bit float (Type::kIdF80).
305 kErrorInvalidUseOfF80,
306 //! Some registers in the instruction muse be consecutive (some ARM and AVX512 neural-net instructions).
307 kErrorNotConsecutiveRegs,
308
309 //! AsmJit requires a physical register, but no one is available.
310 kErrorNoMorePhysRegs,
311 //! A variable has been assigned more than once to a function argument (BaseCompiler).
312 kErrorOverlappedRegs,
313 //! Invalid register to hold stack arguments offset.
314 kErrorOverlappingStackRegWithRegArg,
315
316 //! Unbound label cannot be evaluated by expression.
317 kErrorExpressionLabelNotBound,
318 //! Arithmetic overflow during expression evaluation.
319 kErrorExpressionOverflow,
320
321 //! Count of AsmJit error codes.
322 kErrorCount
323 };
324
325 // ============================================================================
326 // [asmjit::ByteOrder]
327 // ============================================================================
328
329 //! Byte order.
330 namespace ByteOrder {
331 enum : uint32_t {
332 kLE = 0,
333 kBE = 1,
334 kNative = ASMJIT_ARCH_LE ? kLE : kBE,
335 kSwapped = ASMJIT_ARCH_LE ? kBE : kLE
336 };
337 }
338
339 // ============================================================================
340 // [asmjit::ptr_as_func / func_as_ptr]
341 // ============================================================================
342
343 template<typename Func>
344 static inline Func ptr_as_func(void* func) noexcept { return Support::ptr_cast_impl<Func, void*>(func); }
345 template<typename Func>
346 static inline void* func_as_ptr(Func func) noexcept { return Support::ptr_cast_impl<void*, Func>(func); }
347
348 // ============================================================================
349 // [asmjit::DebugUtils]
350 // ============================================================================
351
352 //! Debugging utilities.
353 namespace DebugUtils {
354
355 //! Returns the error `err` passed.
356 //!
357 //! Provided for debugging purposes. Putting a breakpoint inside `errored` can
358 //! help with tracing the origin of any error reported / returned by AsmJit.
359 static constexpr Error errored(Error err) noexcept { return err; }
360
361 //! Returns a printable version of `asmjit::Error` code.
362 ASMJIT_API const char* errorAsString(Error err) noexcept;
363
364 //! Called to output debugging message(s).
365 ASMJIT_API void debugOutput(const char* str) noexcept;
366
367 //! Called on assertion failure.
368 //!
369 //! \param file Source file name where it happened.
370 //! \param line Line in the source file.
371 //! \param msg Message to display.
372 //!
373 //! If you have problems with assertions put a breakpoint at assertionFailed()
374 //! function (asmjit/core/globals.cpp) and check the call stack to locate the
375 //! failing code.
376 ASMJIT_API void ASMJIT_NORETURN assertionFailed(const char* file, int line, const char* msg) noexcept;
377
378 #if defined(ASMJIT_BUILD_DEBUG)
379 #define ASMJIT_ASSERT(EXP) \
380 do { \
381 if (ASMJIT_LIKELY(EXP)) \
382 break; \
383 ::asmjit::DebugUtils::assertionFailed(__FILE__, __LINE__, #EXP); \
384 } while (0)
385 #else
386 #define ASMJIT_ASSERT(EXP) ((void)0)
387 #endif
388
389 //! Used by AsmJit to propagate a possible `Error` produced by `...` to the caller.
390 #define ASMJIT_PROPAGATE(...) \
391 do { \
392 ::asmjit::Error _err = __VA_ARGS__; \
393 if (ASMJIT_UNLIKELY(_err)) \
394 return _err; \
395 } while (0)
396
397 } // {DebugUtils}
398
399 //! \}
400
401 ASMJIT_END_NAMESPACE
402
403 #endif // _ASMJIT_CORE_GLOBALS_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifdef ASMJIT_BUILD_X86
8
9 #include "../core/arch.h"
10 #include "../core/inst.h"
11
12 #ifdef ASMJIT_BUILD_X86
13 #include "../x86/x86instapi_p.h"
14 #endif
15
16 #ifdef ASMJIT_BUILD_ARM
17 #include "../arm/arminstapi_p.h"
18 #endif
19
20 ASMJIT_BEGIN_NAMESPACE
21
22 // ============================================================================
23 // [asmjit::InstAPI - Text]
24 // ============================================================================
25
26 #ifndef ASMJIT_NO_TEXT
27 Error InstAPI::instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept {
28 #ifdef ASMJIT_BUILD_X86
29 if (ArchInfo::isX86Family(archId))
30 return x86::InstInternal::instIdToString(archId, instId, output);
31 #endif
32
33 #ifdef ASMJIT_BUILD_ARM
34 if (ArchInfo::isArmFamily(archId))
35 return arm::InstInternal::instIdToString(archId, instId, output);
36 #endif
37
38 return DebugUtils::errored(kErrorInvalidArch);
39 }
40
41 uint32_t InstAPI::stringToInstId(uint32_t archId, const char* s, size_t len) noexcept {
42 #ifdef ASMJIT_BUILD_X86
43 if (ArchInfo::isX86Family(archId))
44 return x86::InstInternal::stringToInstId(archId, s, len);
45 #endif
46
47 #ifdef ASMJIT_BUILD_ARM
48 if (ArchInfo::isArmFamily(archId))
49 return arm::InstInternal::stringToInstId(archId, s, len);
50 #endif
51
52 return 0;
53 }
54 #endif // !ASMJIT_NO_TEXT
55
56 // ============================================================================
57 // [asmjit::InstAPI - Validate]
58 // ============================================================================
59
60 #ifndef ASMJIT_NO_VALIDATION
61 Error InstAPI::validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept {
62 #ifdef ASMJIT_BUILD_X86
63 if (ArchInfo::isX86Family(archId))
64 return x86::InstInternal::validate(archId, inst, operands, opCount);
65 #endif
66
67 #ifdef ASMJIT_BUILD_ARM
68 if (ArchInfo::isArmFamily(archId))
69 return arm::InstInternal::validate(archId, inst, operands, opCount);
70 #endif
71
72 return DebugUtils::errored(kErrorInvalidArch);
73 }
74 #endif // !ASMJIT_NO_VALIDATION
75
76 // ============================================================================
77 // [asmjit::InstAPI - QueryRWInfo]
78 // ============================================================================
79
80 #ifndef ASMJIT_NO_INTROSPECTION
81 Error InstAPI::queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept {
82 if (ASMJIT_UNLIKELY(opCount > 6))
83 return DebugUtils::errored(kErrorInvalidArgument);
84
85 #ifdef ASMJIT_BUILD_X86
86 if (ArchInfo::isX86Family(archId))
87 return x86::InstInternal::queryRWInfo(archId, inst, operands, opCount, out);
88 #endif
89
90 #ifdef ASMJIT_BUILD_ARM
91 if (ArchInfo::isArmFamily(archId))
92 return arm::InstInternal::queryRWInfo(archId, inst, operands, opCount, out);
93 #endif
94
95 return DebugUtils::errored(kErrorInvalidArch);
96 }
97 #endif // !ASMJIT_NO_INTROSPECTION
98
99 // ============================================================================
100 // [asmjit::InstAPI - QueryFeatures]
101 // ============================================================================
102
103 #ifndef ASMJIT_NO_INTROSPECTION
104 Error InstAPI::queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept {
105 #ifdef ASMJIT_BUILD_X86
106 if (ArchInfo::isX86Family(archId))
107 return x86::InstInternal::queryFeatures(archId, inst, operands, opCount, out);
108 #endif
109
110 #ifdef ASMJIT_BUILD_ARM
111 if (ArchInfo::isArmFamily(archId))
112 return arm::InstInternal::queryFeatures(archId, inst, operands, opCount, out);
113 #endif
114
115 return DebugUtils::errored(kErrorInvalidArch);
116 }
117 #endif // !ASMJIT_NO_INTROSPECTION
118
119 ASMJIT_END_NAMESPACE
120
121 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_INST_H
7 #define _ASMJIT_CORE_INST_H
8
9 #include "../core/cpuinfo.h"
10 #include "../core/operand.h"
11 #include "../core/string.h"
12 #include "../core/support.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \addtogroup asmjit_core
17 //! \{
18
19 // ============================================================================
20 // [asmjit::InstInfo]
21 // ============================================================================
22
23 // TODO: Finalize instruction info and make more x86::InstDB methods/structs private.
24
25 /*
26
27 struct InstInfo {
28 //! Architecture agnostic attributes.
29 enum Attributes : uint32_t {
30
31
32 };
33
34 //! Instruction attributes.
35 uint32_t _attributes;
36
37 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
38
39 inline uint32_t attributes() const noexcept { return _attributes; }
40 inline bool hasAttribute(uint32_t attr) const noexcept { return (_attributes & attr) != 0; }
41 };
42
43 //! Gets attributes of the given instruction.
44 ASMJIT_API Error queryCommonInfo(uint32_t archId, uint32_t instId, InstInfo& out) noexcept;
45
46 */
47
48 // ============================================================================
49 // [asmjit::InstRWInfo / OpRWInfo]
50 // ============================================================================
51
52 //! Read/Write information related to a single operand, used by `InstRWInfo`.
53 struct OpRWInfo {
54 //! Read/Write flags, see `OpRWInfo::Flags`.
55 uint32_t _opFlags;
56 //! Physical register index, if required.
57 uint8_t _physId;
58 //! Size of a possible memory operand that can replace a register operand.
59 uint8_t _rmSize;
60 //! Reserved for future use.
61 uint8_t _reserved[2];
62 //! Read bit-mask where each bit represents one byte read from Reg/Mem.
63 uint64_t _readByteMask;
64 //! Write bit-mask where each bit represents one byte written to Reg/Mem.
65 uint64_t _writeByteMask;
66 //! Zero/Sign extend bit-mask where each bit represents one byte written to Reg/Mem.
67 uint64_t _extendByteMask;
68
69 //! Flags describe how the operand is accessed and some additional information.
70 enum Flags : uint32_t {
71 //! Operand is read.
72 //!
73 //! \note This flag must be `0x00000001`.
74 kRead = 0x00000001u,
75
76 //! Operand is written.
77 //!
78 //! \note This flag must be `0x00000002`.
79 kWrite = 0x00000002u,
80
81 //! Operand is both read and written.
82 //!
83 //! \note This combination of flags must be `0x00000003`.
84 kRW = 0x00000003u,
85
86 //! Register operand can be replaced by a memory operand.
87 kRegMem = 0x00000004u,
88
89 //! The `extendByteMask()` represents a zero extension.
90 kZExt = 0x00000010u,
91
92 //! Register operand must use `physId()`.
93 kRegPhysId = 0x00000100u,
94 //! Base register of a memory operand must use `physId()`.
95 kMemPhysId = 0x00000200u,
96
97 //! This memory operand is only used to encode registers and doesn't access memory.
98 //!
99 //! X86 Specific
100 //! ------------
101 //!
102 //! Instructions that use such feature include BNDLDX, BNDSTX, and LEA.
103 kMemFake = 0x000000400u,
104
105 //! Base register of the memory operand will be read.
106 kMemBaseRead = 0x00001000u,
107 //! Base register of the memory operand will be written.
108 kMemBaseWrite = 0x00002000u,
109 //! Base register of the memory operand will be read & written.
110 kMemBaseRW = 0x00003000u,
111
112 //! Index register of the memory operand will be read.
113 kMemIndexRead = 0x00004000u,
114 //! Index register of the memory operand will be written.
115 kMemIndexWrite = 0x00008000u,
116 //! Index register of the memory operand will be read & written.
117 kMemIndexRW = 0x0000C000u,
118
119 //! Base register of the memory operand will be modified before the operation.
120 kMemBasePreModify = 0x00010000u,
121 //! Base register of the memory operand will be modified after the operation.
122 kMemBasePostModify = 0x00020000u
123 };
124
125 static_assert(kRead == 0x1, "OpRWInfo::kRead flag must be 0x1");
126 static_assert(kWrite == 0x2, "OpRWInfo::kWrite flag must be 0x2");
127 static_assert(kRegMem == 0x4, "OpRWInfo::kRegMem flag must be 0x4");
128
129 //! \name Reset
130 //! \{
131
132 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
133 inline void reset(uint32_t opFlags, uint32_t regSize, uint32_t physId = BaseReg::kIdBad) noexcept {
134 _opFlags = opFlags;
135 _physId = uint8_t(physId);
136 _rmSize = uint8_t((opFlags & kRegMem) ? regSize : uint32_t(0));
137 _resetReserved();
138
139 uint64_t mask = Support::lsbMask<uint64_t>(regSize);
140 _readByteMask = opFlags & kRead ? mask : uint64_t(0);
141 _writeByteMask = opFlags & kWrite ? mask : uint64_t(0);
142 _extendByteMask = 0;
143 }
144
145 inline void _resetReserved() noexcept {
146 memset(_reserved, 0, sizeof(_reserved));
147 }
148
149 //! \}
150
151 //! \name Operand Flags
152 //! \{
153
154 inline uint32_t opFlags() const noexcept { return _opFlags; }
155 inline bool hasOpFlag(uint32_t flag) const noexcept { return (_opFlags & flag) != 0; }
156
157 inline void addOpFlags(uint32_t flags) noexcept { _opFlags |= flags; }
158 inline void clearOpFlags(uint32_t flags) noexcept { _opFlags &= ~flags; }
159
160 inline bool isRead() const noexcept { return hasOpFlag(kRead); }
161 inline bool isWrite() const noexcept { return hasOpFlag(kWrite); }
162 inline bool isReadWrite() const noexcept { return (_opFlags & kRW) == kRW; }
163 inline bool isReadOnly() const noexcept { return (_opFlags & kRW) == kRead; }
164 inline bool isWriteOnly() const noexcept { return (_opFlags & kRW) == kWrite; }
165 inline bool isRm() const noexcept { return hasOpFlag(kRegMem); }
166 inline bool isZExt() const noexcept { return hasOpFlag(kZExt); }
167
168 //! \}
169
170 //! \name Physical Register ID
171 //! \{
172
173 inline uint32_t physId() const noexcept { return _physId; }
174 inline bool hasPhysId() const noexcept { return _physId != BaseReg::kIdBad; }
175 inline void setPhysId(uint32_t physId) noexcept { _physId = uint8_t(physId); }
176
177 //! \}
178
179 //! \name Reg/Mem
180 //! \{
181
182 inline uint32_t rmSize() const noexcept { return _rmSize; }
183 inline void setRmSize(uint32_t rmSize) noexcept { _rmSize = uint8_t(rmSize); }
184
185 //! \}
186
187 //! \name Read & Write Masks
188 //! \{
189
190 inline uint64_t readByteMask() const noexcept { return _readByteMask; }
191 inline uint64_t writeByteMask() const noexcept { return _writeByteMask; }
192 inline uint64_t extendByteMask() const noexcept { return _extendByteMask; }
193
194 inline void setReadByteMask(uint64_t mask) noexcept { _readByteMask = mask; }
195 inline void setWriteByteMask(uint64_t mask) noexcept { _writeByteMask = mask; }
196 inline void setExtendByteMask(uint64_t mask) noexcept { _extendByteMask = mask; }
197
198 //! \}
199 };
200
201 //! Read/Write information of an instruction.
202 struct InstRWInfo {
203 //! Instruction flags.
204 uint32_t _instFlags;
205 //! Mask of flags read.
206 uint32_t _readFlags;
207 //! Mask of flags written.
208 uint32_t _writeFlags;
209 //! Count of operands.
210 uint8_t _opCount;
211 //! CPU feature required for replacing register operand with memory operand.
212 uint8_t _rmFeature;
213 //! Reserved for future use.
214 uint8_t _reserved[19];
215 //! Read/Write onfo of extra register (rep{} or kz{}).
216 OpRWInfo _extraReg;
217 //! Read/Write info of instruction operands.
218 OpRWInfo _operands[Globals::kMaxOpCount];
219
220 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
221
222 inline uint32_t instFlags() const noexcept { return _instFlags; }
223 inline bool hasInstFlag(uint32_t flag) const noexcept { return (_instFlags & flag) != 0; }
224
225 inline uint32_t opCount() const noexcept { return _opCount; }
226
227 inline uint32_t readFlags() const noexcept { return _readFlags; }
228 inline uint32_t writeFlags() const noexcept { return _writeFlags; }
229
230 //! Returns the CPU feature required to replace a register operand with memory
231 //! operand. If the returned feature is zero (none) then this instruction
232 //! either doesn't provide memory operand combination or there is no extra
233 //! CPU feature required.
234 //!
235 //! X86 Specific
236 //! ------------
237 //!
238 //! Some AVX+ instructions may require extra features for replacing registers
239 //! with memory operands, for example VPSLLDQ instruction only supports
240 //! 'reg/reg/imm' combination on AVX/AVX2 capable CPUs and requires AVX-512 for
241 //! 'reg/mem/imm' combination.
242 inline uint32_t rmFeature() const noexcept { return _rmFeature; }
243
244 inline const OpRWInfo& extraReg() const noexcept { return _extraReg; }
245 inline const OpRWInfo* operands() const noexcept { return _operands; }
246
247 inline const OpRWInfo& operand(size_t index) const noexcept {
248 ASMJIT_ASSERT(index < Globals::kMaxOpCount);
249 return _operands[index];
250 }
251 };
252
253 // ============================================================================
254 // [asmjit::BaseInst]
255 // ============================================================================
256
257 //! Instruction id, options, and extraReg in a single structure. This structure
258 //! exists mainly to simplify analysis and validation API that requires `BaseInst`
259 //! and `Operand[]` array.
260 class BaseInst {
261 public:
262 //! Instruction id.
263 uint32_t _id;
264 //! Instruction options.
265 uint32_t _options;
266 //! Extra register used by instruction (either REP register or AVX-512 selector).
267 RegOnly _extraReg;
268
269 enum Id : uint32_t {
270 //! Invalid or uninitialized instruction id.
271 kIdNone = 0x00000000u,
272 //! Abstract instruction (BaseBuilder and BaseCompiler).
273 kIdAbstract = 0x80000000u
274 };
275
276 enum Options : uint32_t {
277 //! Used internally by emitters for handling errors and rare cases.
278 kOptionReserved = 0x00000001u,
279
280 //! Used only by Assembler to mark that `_op4` and `_op5` are used (internal).
281 kOptionOp4Op5Used = 0x00000002u,
282
283 //! Prevents following a jump during compilation (BaseCompiler).
284 kOptionUnfollow = 0x00000010u,
285
286 //! Overwrite the destination operand(s) (BaseCompiler).
287 //!
288 //! Hint that is important for register liveness analysis. It tells the
289 //! compiler that the destination operand will be overwritten now or by
290 //! adjacent instructions. BaseCompiler knows when a register is completely
291 //! overwritten by a single instruction, for example you don't have to
292 //! mark "movaps" or "pxor x, x", however, if a pair of instructions is
293 //! used and the first of them doesn't completely overwrite the content
294 //! of the destination, BaseCompiler fails to mark that register as dead.
295 //!
296 //! X86 Specific
297 //! ------------
298 //!
299 //! - All instructions that always overwrite at least the size of the
300 //! register the virtual-register uses , for example "mov", "movq",
301 //! "movaps" don't need the overwrite option to be used - conversion,
302 //! shuffle, and other miscellaneous instructions included.
303 //!
304 //! - All instructions that clear the destination register if all operands
305 //! are the same, for example "xor x, x", "pcmpeqb x x", etc...
306 //!
307 //! - Consecutive instructions that partially overwrite the variable until
308 //! there is no old content require `BaseCompiler::overwrite()` to be used.
309 //! Some examples (not always the best use cases thought):
310 //!
311 //! - `movlps xmm0, ?` followed by `movhps xmm0, ?` and vice versa
312 //! - `movlpd xmm0, ?` followed by `movhpd xmm0, ?` and vice versa
313 //! - `mov al, ?` followed by `and ax, 0xFF`
314 //! - `mov al, ?` followed by `mov ah, al`
315 //! - `pinsrq xmm0, ?, 0` followed by `pinsrq xmm0, ?, 1`
316 //!
317 //! - If allocated variable is used temporarily for scalar operations. For
318 //! example if you allocate a full vector like `x86::Compiler::newXmm()`
319 //! and then use that vector for scalar operations you should use
320 //! `overwrite()` directive:
321 //!
322 //! - `sqrtss x, y` - only LO element of `x` is changed, if you don't
323 //! use HI elements, use `compiler.overwrite().sqrtss(x, y)`.
324 kOptionOverwrite = 0x00000020u,
325
326 //! Emit short-form of the instruction.
327 kOptionShortForm = 0x00000040u,
328 //! Emit long-form of the instruction.
329 kOptionLongForm = 0x00000080u,
330
331 //! Conditional jump is likely to be taken.
332 kOptionTaken = 0x00000100u,
333 //! Conditional jump is unlikely to be taken.
334 kOptionNotTaken = 0x00000200u
335 };
336
337 //! Control type.
338 enum ControlType : uint32_t {
339 //! No control type (doesn't jump).
340 kControlNone = 0u,
341 //! Unconditional jump.
342 kControlJump = 1u,
343 //! Conditional jump (branch).
344 kControlBranch = 2u,
345 //! Function call.
346 kControlCall = 3u,
347 //! Function return.
348 kControlReturn = 4u
349 };
350
351 //! \name Construction & Destruction
352 //! \{
353
354 inline explicit BaseInst(uint32_t id = 0, uint32_t options = 0) noexcept
355 : _id(id),
356 _options(options),
357 _extraReg() {}
358
359 inline BaseInst(uint32_t id, uint32_t options, const RegOnly& extraReg) noexcept
360 : _id(id),
361 _options(options),
362 _extraReg(extraReg) {}
363
364 inline BaseInst(uint32_t id, uint32_t options, const BaseReg& extraReg) noexcept
365 : _id(id),
366 _options(options),
367 _extraReg { extraReg.signature(), extraReg.id() } {}
368
369 //! \}
370
371 //! \name Instruction ID
372 //! \{
373
374 inline uint32_t id() const noexcept { return _id; }
375 inline void setId(uint32_t id) noexcept { _id = id; }
376 inline void resetId() noexcept { _id = 0; }
377
378 //! \}
379
380 //! \name Instruction Options
381 //! \{
382
383 inline uint32_t options() const noexcept { return _options; }
384 inline void setOptions(uint32_t options) noexcept { _options = options; }
385 inline void addOptions(uint32_t options) noexcept { _options |= options; }
386 inline void clearOptions(uint32_t options) noexcept { _options &= ~options; }
387 inline void resetOptions() noexcept { _options = 0; }
388
389 //! \}
390
391 //! \name Extra Register
392 //! \{
393
394 inline bool hasExtraReg() const noexcept { return _extraReg.isReg(); }
395 inline RegOnly& extraReg() noexcept { return _extraReg; }
396 inline const RegOnly& extraReg() const noexcept { return _extraReg; }
397 inline void setExtraReg(const BaseReg& reg) noexcept { _extraReg.init(reg); }
398 inline void setExtraReg(const RegOnly& reg) noexcept { _extraReg.init(reg); }
399 inline void resetExtraReg() noexcept { _extraReg.reset(); }
400
401 //! \}
402 };
403
404 // ============================================================================
405 // [asmjit::InstAPI]
406 // ============================================================================
407
408 //! Instruction API.
409 namespace InstAPI {
410
411 #ifndef ASMJIT_NO_TEXT
412 //! Appends the name of the instruction specified by `instId` and `instOptions`
413 //! into the `output` string.
414 //!
415 //! \note Instruction options would only affect instruction prefix & suffix,
416 //! other options would be ignored. If `instOptions` is zero then only raw
417 //! instruction name (without any additional text) will be appended.
418 ASMJIT_API Error instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept;
419
420 //! Parses an instruction name in the given string `s`. Length is specified
421 //! by `len` argument, which can be `SIZE_MAX` if `s` is known to be null
422 //! terminated.
423 //!
424 //! The output is stored in `instId`.
425 ASMJIT_API uint32_t stringToInstId(uint32_t archId, const char* s, size_t len) noexcept;
426 #endif // !ASMJIT_NO_TEXT
427
428 #ifndef ASMJIT_NO_VALIDATION
429 //! Validates the given instruction.
430 ASMJIT_API Error validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept;
431 #endif // !ASMJIT_NO_VALIDATION
432
433 #ifndef ASMJIT_NO_INTROSPECTION
434 //! Gets Read/Write information of the given instruction.
435 ASMJIT_API Error queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept;
436
437 //! Gets CPU features required by the given instruction.
438 ASMJIT_API Error queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept;
439 #endif // !ASMJIT_NO_INTROSPECTION
440
441 } // {InstAPI}
442
443 //! \}
444
445 ASMJIT_END_NAMESPACE
446
447 #endif // _ASMJIT_CORE_INST_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_JIT
8
9 #include "../core/arch.h"
10 #include "../core/jitallocator.h"
11 #include "../core/osutils.h"
12 #include "../core/support.h"
13 #include "../core/virtmem.h"
14 #include "../core/zone.h"
15 #include "../core/zonelist.h"
16 #include "../core/zonetree.h"
17
18 ASMJIT_BEGIN_NAMESPACE
19
20 // ============================================================================
21 // [asmjit::JitAllocator - Constants]
22 // ============================================================================
23
24 enum JitAllocatorConstants : uint32_t {
25 //! Number of pools to use when `JitAllocator::kOptionUseMultiplePools` is set.
26 //!
27 //! Each pool increases granularity twice to make memory management more
28 //! efficient. Ideal number of pools appears to be 3 to 4 as it distributes
29 //! small and large functions properly.
30 kJitAllocatorMultiPoolCount = 3,
31
32 //! Minimum granularity (and the default granularity for pool #0).
33 kJitAllocatorBaseGranularity = 64,
34
35 //! Maximum block size (16MB).
36 kJitAllocatorMaxBlockSize = 1024 * 1024 * 16
37 };
38
39 static inline uint32_t JitAllocator_defaultFillPattern() noexcept {
40 // X86 and X86_64 - 4x 'int3' instruction.
41 if (ASMJIT_ARCH_X86)
42 return 0xCCCCCCCCu;
43
44 // Unknown...
45 return 0u;
46 }
47
48 // ============================================================================
49 // [asmjit::JitAllocator - BitFlipIterator]
50 // ============================================================================
51
52 //! BitWord[] iterator used by `JitAllocator` that can flip the search pattern
53 //! during iteration.
54 template<typename T>
55 class BitFlipIterator {
56 public:
57 ASMJIT_INLINE BitFlipIterator(const T* data, size_t numBitWords, size_t start = 0, T xorMask = 0) noexcept {
58 init(data, numBitWords, start, xorMask);
59 }
60
61 ASMJIT_INLINE void init(const T* data, size_t numBitWords, size_t start = 0, T xorMask = 0) noexcept {
62 const T* ptr = data + (start / Support::bitSizeOf<T>());
63 size_t idx = Support::alignDown(start, Support::bitSizeOf<T>());
64 size_t end = numBitWords * Support::bitSizeOf<T>();
65
66 T bitWord = T(0);
67 if (idx < end) {
68 bitWord = (*ptr++ ^ xorMask) & (Support::allOnes<T>() << (start % Support::bitSizeOf<T>()));
69 while (!bitWord && (idx += Support::bitSizeOf<T>()) < end)
70 bitWord = *ptr++ ^ xorMask;
71 }
72
73 _ptr = ptr;
74 _idx = idx;
75 _end = end;
76 _current = bitWord;
77 _xorMask = xorMask;
78 }
79
80 ASMJIT_INLINE bool hasNext() const noexcept {
81 return _current != T(0);
82 }
83
84 ASMJIT_INLINE size_t next() noexcept {
85 T bitWord = _current;
86 ASMJIT_ASSERT(bitWord != T(0));
87
88 uint32_t bit = Support::ctz(bitWord);
89 bitWord ^= T(1u) << bit;
90
91 size_t n = _idx + bit;
92 while (!bitWord && (_idx += Support::bitSizeOf<T>()) < _end)
93 bitWord = *_ptr++ ^ _xorMask;
94
95 _current = bitWord;
96 return n;
97 }
98
99 ASMJIT_INLINE size_t nextAndFlip() noexcept {
100 T bitWord = _current;
101 ASMJIT_ASSERT(bitWord != T(0));
102
103 uint32_t bit = Support::ctz(bitWord);
104 bitWord ^= Support::allOnes<T>() << bit;
105 _xorMask ^= Support::allOnes<T>();
106
107 size_t n = _idx + bit;
108 while (!bitWord && (_idx += Support::bitSizeOf<T>()) < _end)
109 bitWord = *_ptr++ ^ _xorMask;
110
111 _current = bitWord;
112 return n;
113 }
114
115 ASMJIT_INLINE size_t peekNext() const noexcept {
116 ASMJIT_ASSERT(_current != T(0));
117 return _idx + Support::ctz(_current);
118 }
119
120 const T* _ptr;
121 size_t _idx;
122 size_t _end;
123 T _current;
124 T _xorMask;
125 };
126
127 // ============================================================================
128 // [asmjit::JitAllocator - Pool]
129 // ============================================================================
130
131 class JitAllocatorBlock;
132
133 class JitAllocatorPool {
134 public:
135 ASMJIT_NONCOPYABLE(JitAllocatorPool)
136
137 inline JitAllocatorPool(uint32_t granularity) noexcept
138 : blocks(),
139 cursor(nullptr),
140 blockCount(0),
141 granularity(uint16_t(granularity)),
142 granularityLog2(uint8_t(Support::ctz(granularity))),
143 emptyBlockCount(0),
144 totalAreaSize(0),
145 totalAreaUsed(0),
146 totalOverheadBytes(0) {}
147
148 inline void reset() noexcept {
149 blocks.reset();
150 cursor = nullptr;
151 blockCount = 0;
152 totalAreaSize = 0;
153 totalAreaUsed = 0;
154 totalOverheadBytes = 0;
155 }
156
157 inline size_t byteSizeFromAreaSize(uint32_t areaSize) const noexcept { return size_t(areaSize) * granularity; }
158 inline uint32_t areaSizeFromByteSize(size_t size) const noexcept { return uint32_t((size + granularity - 1) >> granularityLog2); }
159
160 inline size_t bitWordCountFromAreaSize(uint32_t areaSize) const noexcept {
161 using namespace Support;
162 return alignUp<size_t>(areaSize, kBitWordSizeInBits) / kBitWordSizeInBits;
163 }
164
165 //! Double linked list of blocks.
166 ZoneList<JitAllocatorBlock> blocks;
167 //! Where to start looking first.
168 JitAllocatorBlock* cursor;
169
170 //! Count of blocks.
171 uint32_t blockCount;
172 //! Allocation granularity.
173 uint16_t granularity;
174 //! Log2(granularity).
175 uint8_t granularityLog2;
176 //! Count of empty blocks (either 0 or 1 as we won't keep more blocks empty).
177 uint8_t emptyBlockCount;
178
179 //! Number of bits reserved across all blocks.
180 size_t totalAreaSize;
181 //! Number of bits used across all blocks.
182 size_t totalAreaUsed;
183 //! Overhead of all blocks (in bytes).
184 size_t totalOverheadBytes;
185 };
186
187 // ============================================================================
188 // [asmjit::JitAllocator - Block]
189 // ============================================================================
190
191 class JitAllocatorBlock : public ZoneTreeNodeT<JitAllocatorBlock>,
192 public ZoneListNode<JitAllocatorBlock> {
193 public:
194 ASMJIT_NONCOPYABLE(JitAllocatorBlock)
195
196 enum Flags : uint32_t {
197 //! Block is empty.
198 kFlagEmpty = 0x00000001u,
199 //! Block is dirty (largestUnusedArea, searchStart, searchEnd).
200 kFlagDirty = 0x00000002u,
201 //! Block is dual-mapped.
202 kFlagDualMapped = 0x00000004u
203 };
204
205 inline JitAllocatorBlock(
206 JitAllocatorPool* pool,
207 VirtMem::DualMapping mapping,
208 size_t blockSize,
209 uint32_t blockFlags,
210 Support::BitWord* usedBitVector,
211 Support::BitWord* stopBitVector,
212 uint32_t areaSize) noexcept
213 : ZoneTreeNodeT(),
214 pool(pool),
215 mapping(mapping),
216 blockSize(blockSize),
217 flags(blockFlags),
218 areaSize(areaSize),
219 areaUsed(0),
220 largestUnusedArea(areaSize),
221 searchStart(0),
222 searchEnd(areaSize),
223 usedBitVector(usedBitVector),
224 stopBitVector(stopBitVector) {}
225
226 inline uint8_t* roPtr() const noexcept { return static_cast<uint8_t*>(mapping.ro); }
227 inline uint8_t* rwPtr() const noexcept { return static_cast<uint8_t*>(mapping.rw); }
228
229 inline bool hasFlag(uint32_t f) const noexcept { return (flags & f) != 0; }
230 inline void addFlags(uint32_t f) noexcept { flags |= f; }
231 inline void clearFlags(uint32_t f) noexcept { flags &= ~f; }
232
233 inline uint32_t areaAvailable() const noexcept { return areaSize - areaUsed; }
234
235 inline void increaseUsedArea(uint32_t value) noexcept {
236 areaUsed += value;
237 pool->totalAreaUsed += value;
238 }
239
240 inline void decreaseUsedArea(uint32_t value) noexcept {
241 areaUsed -= value;
242 pool->totalAreaUsed -= value;
243 }
244
245 // RBTree default CMP uses '<' and '>' operators.
246 inline bool operator<(const JitAllocatorBlock& other) const noexcept { return roPtr() < other.roPtr(); }
247 inline bool operator>(const JitAllocatorBlock& other) const noexcept { return roPtr() > other.roPtr(); }
248
249 // Special implementation for querying blocks by `key`, which must be in `[BlockPtr, BlockPtr + BlockSize)` range.
250 inline bool operator<(const uint8_t* key) const noexcept { return roPtr() + blockSize <= key; }
251 inline bool operator>(const uint8_t* key) const noexcept { return roPtr() > key; }
252
253 //! Link to the pool that owns this block.
254 JitAllocatorPool* pool;
255 //! Virtual memory mapping - either single mapping (both pointers equal) or
256 //! dual mapping, where one pointer is Read+Execute and the second Read+Write.
257 VirtMem::DualMapping mapping;
258 //! Virtual memory size (block size) [bytes].
259 size_t blockSize;
260
261 //! Block flags.
262 uint32_t flags;
263 //! Size of the whole block area (bit-vector size).
264 uint32_t areaSize;
265 //! Used area (number of bits in bit-vector used).
266 uint32_t areaUsed;
267 //! The largest unused continuous area in the bit-vector (or `areaSize` to initiate rescan).
268 uint32_t largestUnusedArea;
269 //! Start of a search range (for unused bits).
270 uint32_t searchStart;
271 //! End of a search range (for unused bits).
272 uint32_t searchEnd;
273
274 //! Used bit-vector (0 = unused, 1 = used).
275 Support::BitWord* usedBitVector;
276 //! Stop bit-vector (0 = don't care, 1 = stop).
277 Support::BitWord* stopBitVector;
278 };
279
280 // ============================================================================
281 // [asmjit::JitAllocator - PrivateImpl]
282 // ============================================================================
283
284 class JitAllocatorPrivateImpl : public JitAllocator::Impl {
285 public:
286 inline JitAllocatorPrivateImpl(JitAllocatorPool* pools, size_t poolCount) noexcept
287 : JitAllocator::Impl {},
288 pools(pools),
289 poolCount(poolCount) {}
290 inline ~JitAllocatorPrivateImpl() noexcept {}
291
292 //! Lock for thread safety.
293 mutable Lock lock;
294 //! System page size (also a minimum block size).
295 uint32_t pageSize;
296
297 //! Blocks from all pools in RBTree.
298 ZoneTree<JitAllocatorBlock> tree;
299 //! Allocator pools.
300 JitAllocatorPool* pools;
301 //! Number of allocator pools.
302 size_t poolCount;
303 };
304
305 static const JitAllocator::Impl JitAllocatorImpl_none {};
306 static const JitAllocator::CreateParams JitAllocatorParams_none {};
307
308 // ============================================================================
309 // [asmjit::JitAllocator - Utilities]
310 // ============================================================================
311
312 static inline JitAllocatorPrivateImpl* JitAllocatorImpl_new(const JitAllocator::CreateParams* params) noexcept {
313 VirtMem::Info vmInfo = VirtMem::info();
314
315 if (!params)
316 params = &JitAllocatorParams_none;
317
318 uint32_t options = params->options;
319 uint32_t blockSize = params->blockSize;
320 uint32_t granularity = params->granularity;
321 uint32_t fillPattern = params->fillPattern;
322
323 // Setup pool count to [1..3].
324 size_t poolCount = 1;
325 if (options & JitAllocator::kOptionUseMultiplePools)
326 poolCount = kJitAllocatorMultiPoolCount;;
327
328 // Setup block size [64kB..256MB].
329 if (blockSize < 64 * 1024 || blockSize > 256 * 1024 * 1024 || !Support::isPowerOf2(blockSize))
330 blockSize = vmInfo.pageGranularity;
331
332 // Setup granularity [64..256].
333 if (granularity < 64 || granularity > 256 || !Support::isPowerOf2(granularity))
334 granularity = kJitAllocatorBaseGranularity;
335
336 // Setup fill-pattern.
337 if (!(options & JitAllocator::kOptionCustomFillPattern))
338 fillPattern = JitAllocator_defaultFillPattern();
339
340 size_t size = sizeof(JitAllocatorPrivateImpl) + sizeof(JitAllocatorPool) * poolCount;
341 void* p = ::malloc(size);
342 if (ASMJIT_UNLIKELY(!p))
343 return nullptr;
344
345 JitAllocatorPool* pools = reinterpret_cast<JitAllocatorPool*>((uint8_t*)p + sizeof(JitAllocatorPrivateImpl));
346 JitAllocatorPrivateImpl* impl = new(p) JitAllocatorPrivateImpl(pools, poolCount);
347
348 impl->options = options;
349 impl->blockSize = blockSize;
350 impl->granularity = granularity;
351 impl->fillPattern = fillPattern;
352 impl->pageSize = vmInfo.pageSize;
353
354 for (size_t poolId = 0; poolId < poolCount; poolId++)
355 new(&pools[poolId]) JitAllocatorPool(granularity << poolId);
356
357 return impl;
358 }
359
360 static inline void JitAllocatorImpl_destroy(JitAllocatorPrivateImpl* impl) noexcept {
361 impl->~JitAllocatorPrivateImpl();
362 ::free(impl);
363 }
364
365 static inline size_t JitAllocatorImpl_sizeToPoolId(const JitAllocatorPrivateImpl* impl, size_t size) noexcept {
366 size_t poolId = impl->poolCount - 1;
367 size_t granularity = size_t(impl->granularity) << poolId;
368
369 while (poolId) {
370 if (Support::alignUp(size, granularity) == size)
371 break;
372 poolId--;
373 granularity >>= 1;
374 }
375
376 return poolId;
377 }
378
379 static inline size_t JitAllocatorImpl_bitVectorSizeToByteSize(uint32_t areaSize) noexcept {
380 using Support::kBitWordSizeInBits;
381 return ((areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits) * sizeof(Support::BitWord);
382 }
383
384 static inline size_t JitAllocatorImpl_calculateIdealBlockSize(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t allocationSize) noexcept {
385 JitAllocatorBlock* last = pool->blocks.last();
386 size_t blockSize = last ? last->blockSize : size_t(impl->blockSize);
387
388 if (blockSize < kJitAllocatorMaxBlockSize)
389 blockSize *= 2u;
390
391 if (allocationSize > blockSize) {
392 blockSize = Support::alignUp(allocationSize, impl->blockSize);
393 if (ASMJIT_UNLIKELY(blockSize < allocationSize))
394 return 0; // Overflown.
395 }
396
397 return blockSize;
398 }
399
400 ASMJIT_FAVOR_SPEED static void JitAllocatorImpl_fillPattern(void* mem, uint32_t pattern, size_t sizeInBytes) noexcept {
401 size_t n = sizeInBytes / 4u;
402 uint32_t* p = static_cast<uint32_t*>(mem);
403
404 for (size_t i = 0; i < n; i++)
405 p[i] = pattern;
406 }
407
408 // Allocate a new `JitAllocatorBlock` for the given `blockSize`.
409 //
410 // NOTE: The block doesn't have `kFlagEmpty` flag set, because the new block
411 // is only allocated when it's actually needed, so it would be cleared anyway.
412 static JitAllocatorBlock* JitAllocatorImpl_newBlock(JitAllocatorPrivateImpl* impl, JitAllocatorPool* pool, size_t blockSize) noexcept {
413 using Support::BitWord;
414 using Support::kBitWordSizeInBits;
415
416 uint32_t areaSize = uint32_t((blockSize + pool->granularity - 1) >> pool->granularityLog2);
417 uint32_t numBitWords = (areaSize + kBitWordSizeInBits - 1u) / kBitWordSizeInBits;
418
419 JitAllocatorBlock* block = static_cast<JitAllocatorBlock*>(::malloc(sizeof(JitAllocatorBlock)));
420 BitWord* bitWords = nullptr;
421 VirtMem::DualMapping virtMem {};
422 Error err = kErrorOutOfMemory;
423
424 if (block != nullptr)
425 bitWords = static_cast<BitWord*>(::malloc(size_t(numBitWords) * 2 * sizeof(BitWord)));
426
427 uint32_t blockFlags = 0;
428 if (bitWords != nullptr) {
429 if (impl->options & JitAllocator::kOptionUseDualMapping) {
430 err = VirtMem::allocDualMapping(&virtMem, blockSize, VirtMem::kAccessReadWrite | VirtMem::kAccessExecute);
431 blockFlags |= JitAllocatorBlock::kFlagDualMapped;
432 }
433 else {
434 err = VirtMem::alloc(&virtMem.ro, blockSize, VirtMem::kAccessReadWrite | VirtMem::kAccessExecute);
435 virtMem.rw = virtMem.ro;
436 }
437 }
438
439 // Out of memory.
440 if (ASMJIT_UNLIKELY(!block || !bitWords || err != kErrorOk)) {
441 if (bitWords) ::free(bitWords);
442 if (block) ::free(block);
443 return nullptr;
444 }
445
446 // Fill the memory if the secure mode is enabled.
447 if (impl->options & JitAllocator::kOptionFillUnusedMemory)
448 JitAllocatorImpl_fillPattern(virtMem.rw, impl->fillPattern, blockSize);
449
450 memset(bitWords, 0, size_t(numBitWords) * 2 * sizeof(BitWord));
451 return new(block) JitAllocatorBlock(pool, virtMem, blockSize, blockFlags, bitWords, bitWords + numBitWords, areaSize);
452 }
453
454 static void JitAllocatorImpl_deleteBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
455 ASMJIT_UNUSED(impl);
456
457 if (block->flags & JitAllocatorBlock::kFlagDualMapped)
458 VirtMem::releaseDualMapping(&block->mapping, block->blockSize);
459 else
460 VirtMem::release(block->mapping.ro, block->blockSize);
461
462 ::free(block->usedBitVector);
463 ::free(block);
464 }
465
466 static void JitAllocatorImpl_insertBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
467 JitAllocatorPool* pool = block->pool;
468
469 if (!pool->cursor)
470 pool->cursor = block;
471
472 // Add to RBTree and List.
473 impl->tree.insert(block);
474 pool->blocks.append(block);
475
476 // Update statistics.
477 pool->blockCount++;
478 pool->totalAreaSize += block->areaSize;
479 pool->totalOverheadBytes += sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize) * 2u;
480 }
481
482 static void JitAllocatorImpl_removeBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
483 JitAllocatorPool* pool = block->pool;
484
485 // Remove from RBTree and List.
486 if (pool->cursor == block)
487 pool->cursor = block->hasPrev() ? block->prev() : block->next();
488
489 impl->tree.remove(block);
490 pool->blocks.unlink(block);
491
492 // Update statistics.
493 pool->blockCount--;
494 pool->totalAreaSize -= block->areaSize;
495 pool->totalOverheadBytes -= sizeof(JitAllocatorBlock) + JitAllocatorImpl_bitVectorSizeToByteSize(block->areaSize) * 2u;
496 }
497
498 static void JitAllocatorImpl_wipeOutBlock(JitAllocatorPrivateImpl* impl, JitAllocatorBlock* block) noexcept {
499 JitAllocatorPool* pool = block->pool;
500
501 if (block->hasFlag(JitAllocatorBlock::kFlagEmpty))
502 return;
503
504 uint32_t areaSize = block->areaSize;
505 uint32_t granularity = pool->granularity;
506 size_t numBitWords = pool->bitWordCountFromAreaSize(areaSize);
507
508 if (impl->options & JitAllocator::kOptionFillUnusedMemory) {
509 BitFlipIterator<Support::BitWord> it(block->usedBitVector, numBitWords);
510
511 while (it.hasNext()) {
512 uint32_t start = uint32_t(it.nextAndFlip());
513 uint32_t end = areaSize;
514
515 if (it.hasNext())
516 end = uint32_t(it.nextAndFlip());
517
518 JitAllocatorImpl_fillPattern(block->rwPtr() + start * granularity, impl->fillPattern, (end - start) * granularity);
519 }
520 }
521
522 memset(block->usedBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
523 memset(block->stopBitVector, 0, size_t(numBitWords) * sizeof(Support::BitWord));
524
525 block->areaUsed = 0;
526 block->largestUnusedArea = areaSize;
527 block->searchStart = 0;
528 block->searchEnd = areaSize;
529 block->addFlags(JitAllocatorBlock::kFlagEmpty);
530 block->clearFlags(JitAllocatorBlock::kFlagDirty);
531 }
532
533 // ============================================================================
534 // [asmjit::JitAllocator - Construction / Destruction]
535 // ============================================================================
536
537 JitAllocator::JitAllocator(const CreateParams* params) noexcept {
538 _impl = JitAllocatorImpl_new(params);
539 if (ASMJIT_UNLIKELY(!_impl))
540 _impl = const_cast<JitAllocator::Impl*>(&JitAllocatorImpl_none);
541 }
542
543 JitAllocator::~JitAllocator() noexcept {
544 if (_impl == &JitAllocatorImpl_none)
545 return;
546
547 reset(Globals::kResetHard);
548 JitAllocatorImpl_destroy(static_cast<JitAllocatorPrivateImpl*>(_impl));
549 }
550
551 // ============================================================================
552 // [asmjit::JitAllocator - Reset]
553 // ============================================================================
554
555 void JitAllocator::reset(uint32_t resetPolicy) noexcept {
556 if (_impl == &JitAllocatorImpl_none)
557 return;
558
559 JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
560 impl->tree.reset();
561 size_t poolCount = impl->poolCount;
562
563 for (size_t poolId = 0; poolId < poolCount; poolId++) {
564 JitAllocatorPool& pool = impl->pools[poolId];
565 JitAllocatorBlock* block = pool.blocks.first();
566
567 JitAllocatorBlock* blockToKeep = nullptr;
568 if (resetPolicy != Globals::kResetHard && !(impl->options & kOptionImmediateRelease)) {
569 blockToKeep = block;
570 block = block->next();
571 }
572
573 while (block) {
574 JitAllocatorBlock* next = block->next();
575 JitAllocatorImpl_deleteBlock(impl, block);
576 block = next;
577 }
578
579 pool.reset();
580
581 if (blockToKeep) {
582 blockToKeep->_listNodes[0] = nullptr;
583 blockToKeep->_listNodes[1] = nullptr;
584 JitAllocatorImpl_wipeOutBlock(impl, blockToKeep);
585 JitAllocatorImpl_insertBlock(impl, blockToKeep);
586 pool.emptyBlockCount = 1;
587 }
588 }
589 }
590
591 // ============================================================================
592 // [asmjit::JitAllocator - Statistics]
593 // ============================================================================
594
595 JitAllocator::Statistics JitAllocator::statistics() const noexcept {
596 Statistics statistics;
597 statistics.reset();
598
599 if (ASMJIT_LIKELY(_impl != &JitAllocatorImpl_none)) {
600 JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
601 LockGuard guard(impl->lock);
602
603 size_t poolCount = impl->poolCount;
604 for (size_t poolId = 0; poolId < poolCount; poolId++) {
605 const JitAllocatorPool& pool = impl->pools[poolId];
606 statistics._blockCount += size_t(pool.blockCount);
607 statistics._reservedSize += size_t(pool.totalAreaSize) * pool.granularity;
608 statistics._usedSize += size_t(pool.totalAreaUsed) * pool.granularity;
609 statistics._overheadSize += size_t(pool.totalOverheadBytes);
610 }
611 }
612
613 return statistics;
614 }
615
616 // ============================================================================
617 // [asmjit::JitAllocator - Alloc / Release]
618 // ============================================================================
619
620 Error JitAllocator::alloc(void** roPtrOut, void** rwPtrOut, size_t size) noexcept {
621 if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
622 return DebugUtils::errored(kErrorNotInitialized);
623
624 JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
625 constexpr uint32_t kNoIndex = std::numeric_limits<uint32_t>::max();
626
627 *roPtrOut = nullptr;
628 *rwPtrOut = nullptr;
629
630 // Align to the minimum granularity by default.
631 size = Support::alignUp<size_t>(size, impl->granularity);
632 if (ASMJIT_UNLIKELY(size == 0))
633 return DebugUtils::errored(kErrorInvalidArgument);
634
635 if (ASMJIT_UNLIKELY(size > std::numeric_limits<uint32_t>::max() / 2))
636 return DebugUtils::errored(kErrorTooLarge);
637
638 LockGuard guard(impl->lock);
639 JitAllocatorPool* pool = &impl->pools[JitAllocatorImpl_sizeToPoolId(impl, size)];
640
641 uint32_t areaIndex = kNoIndex;
642 uint32_t areaSize = uint32_t(pool->areaSizeFromByteSize(size));
643
644 // Try to find the requested memory area in existing blocks.
645 JitAllocatorBlock* block = pool->blocks.first();
646 if (block) {
647 JitAllocatorBlock* initial = block;
648 do {
649 JitAllocatorBlock* next = block->hasNext() ? block->next() : pool->blocks.first();
650 if (block->areaAvailable() >= areaSize) {
651 if (block->hasFlag(JitAllocatorBlock::kFlagDirty) || block->largestUnusedArea >= areaSize) {
652 uint32_t blockAreaSize = block->areaSize;
653 uint32_t searchStart = block->searchStart;
654 uint32_t searchEnd = block->searchEnd;
655
656 BitFlipIterator<Support::BitWord> it(
657 block->usedBitVector,
658 pool->bitWordCountFromAreaSize(searchEnd),
659 searchStart,
660 Support::allOnes<Support::BitWord>());
661
662 // If there is unused area available then there has to be at least one match.
663 ASMJIT_ASSERT(it.hasNext());
664
665 uint32_t bestArea = blockAreaSize;
666 uint32_t largestArea = 0;
667 uint32_t holeIndex = uint32_t(it.peekNext());
668 uint32_t holeEnd = holeIndex;
669
670 searchStart = holeIndex;
671 do {
672 holeIndex = uint32_t(it.nextAndFlip());
673 if (holeIndex >= searchEnd) break;
674
675 holeEnd = it.hasNext() ? Support::min(searchEnd, uint32_t(it.nextAndFlip())) : searchEnd;
676 uint32_t holeSize = holeEnd - holeIndex;
677
678 if (holeSize >= areaSize && bestArea >= holeSize) {
679 largestArea = Support::max(largestArea, bestArea);
680 bestArea = holeSize;
681 areaIndex = holeIndex;
682 }
683 else {
684 largestArea = Support::max(largestArea, holeSize);
685 }
686 } while (it.hasNext());
687 searchEnd = holeEnd;
688
689 // Because we have traversed the entire block, we can now mark the
690 // largest unused area that can be used to cache the next traversal.
691 block->searchStart = searchStart;
692 block->searchEnd = searchEnd;
693 block->largestUnusedArea = largestArea;
694 block->clearFlags(JitAllocatorBlock::kFlagDirty);
695
696 if (areaIndex != kNoIndex) {
697 if (searchStart == areaIndex)
698 block->searchStart += areaSize;
699 break;
700 }
701 }
702 }
703
704 block = next;
705 } while (block != initial);
706 }
707
708 // Allocate a new block if there is no region of a required width.
709 if (areaIndex == kNoIndex) {
710 size_t blockSize = JitAllocatorImpl_calculateIdealBlockSize(impl, pool, size);
711 if (ASMJIT_UNLIKELY(!blockSize))
712 return DebugUtils::errored(kErrorOutOfMemory);
713
714 block = JitAllocatorImpl_newBlock(impl, pool, blockSize);
715
716 if (ASMJIT_UNLIKELY(!block))
717 return DebugUtils::errored(kErrorOutOfMemory);
718
719 JitAllocatorImpl_insertBlock(impl, block);
720 areaIndex = 0;
721 block->searchStart = areaSize;
722 block->largestUnusedArea = block->areaSize - areaSize;
723 }
724
725 // Update statistics.
726 block->increaseUsedArea(areaSize);
727
728 // Handle special cases.
729 if (block->hasFlag(JitAllocatorBlock::kFlagEmpty)) {
730 pool->emptyBlockCount--;
731 block->clearFlags(JitAllocatorBlock::kFlagEmpty);
732 }
733
734 if (block->areaAvailable() == 0) {
735 // The whole block is filled.
736 block->searchStart = block->areaSize;
737 block->searchEnd = 0;
738 block->largestUnusedArea = 0;
739 block->clearFlags(JitAllocatorBlock::kFlagDirty);
740 }
741
742 // Mark the newly allocated space as occupied and also the sentinel.
743 Support::bitVectorFill(block->usedBitVector, areaIndex, areaSize);
744 Support::bitVectorSetBit(block->stopBitVector, areaIndex + areaSize - 1, true);
745
746 // Return a pointer to the allocated memory.
747 size_t offset = pool->byteSizeFromAreaSize(areaIndex);
748 ASMJIT_ASSERT(offset <= block->blockSize - size);
749
750 *roPtrOut = block->roPtr() + offset;
751 *rwPtrOut = block->rwPtr() + offset;
752 return kErrorOk;
753 }
754
755 Error JitAllocator::release(void* ro) noexcept {
756 if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
757 return DebugUtils::errored(kErrorNotInitialized);
758
759 if (ASMJIT_UNLIKELY(!ro))
760 return DebugUtils::errored(kErrorInvalidArgument);
761
762 JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
763 LockGuard guard(impl->lock);
764
765 JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(ro));
766 if (ASMJIT_UNLIKELY(!block))
767 return DebugUtils::errored(kErrorInvalidState);
768
769 // Offset relative to the start of the block.
770 JitAllocatorPool* pool = block->pool;
771 size_t offset = (size_t)((uint8_t*)ro - block->roPtr());
772
773 // The first bit representing the allocated area and its size.
774 uint32_t areaIndex = uint32_t(offset >> pool->granularityLog2);
775 uint32_t areaLast = uint32_t(Support::bitVectorIndexOf(block->stopBitVector, areaIndex, true));
776 uint32_t areaSize = areaLast - areaIndex + 1;
777
778 // Update the search region and statistics.
779 block->searchStart = Support::min(block->searchStart, areaIndex);
780 block->searchEnd = Support::max(block->searchEnd, areaLast + 1);
781 block->addFlags(JitAllocatorBlock::kFlagDirty);
782 block->decreaseUsedArea(areaSize);
783
784 // Clear all occupied bits and also the sentinel.
785 Support::bitVectorClear(block->usedBitVector, areaIndex, areaSize);
786 Support::bitVectorSetBit(block->stopBitVector, areaLast, false);
787
788 // Fill the released memory if the secure mode is enabled.
789 if (impl->options & kOptionFillUnusedMemory)
790 JitAllocatorImpl_fillPattern(block->rwPtr() + areaIndex * pool->granularity, impl->fillPattern, areaSize * pool->granularity);
791
792 // Release the whole block if it became empty.
793 if (block->areaUsed == 0) {
794 if (pool->emptyBlockCount || (impl->options & kOptionImmediateRelease)) {
795 JitAllocatorImpl_removeBlock(impl, block);
796 JitAllocatorImpl_deleteBlock(impl, block);
797 }
798 else {
799 pool->emptyBlockCount++;
800 block->largestUnusedArea = areaSize;
801 block->searchStart = 0;
802 block->searchEnd = areaSize;
803 block->addFlags(JitAllocatorBlock::kFlagEmpty);
804 block->clearFlags(JitAllocatorBlock::kFlagDirty);
805 }
806 }
807
808 return kErrorOk;
809 }
810
811 Error JitAllocator::shrink(void* ro, size_t newSize) noexcept {
812 if (ASMJIT_UNLIKELY(_impl == &JitAllocatorImpl_none))
813 return DebugUtils::errored(kErrorNotInitialized);
814
815 if (ASMJIT_UNLIKELY(!ro))
816 return DebugUtils::errored(kErrorInvalidArgument);
817
818 if (ASMJIT_UNLIKELY(newSize == 0))
819 return release(ro);
820
821 JitAllocatorPrivateImpl* impl = static_cast<JitAllocatorPrivateImpl*>(_impl);
822 LockGuard guard(impl->lock);
823 JitAllocatorBlock* block = impl->tree.get(static_cast<uint8_t*>(ro));
824
825 if (ASMJIT_UNLIKELY(!block))
826 return DebugUtils::errored(kErrorInvalidArgument);
827
828 // Offset relative to the start of the block.
829 JitAllocatorPool* pool = block->pool;
830 size_t offset = (size_t)((uint8_t*)ro - block->roPtr());
831
832 // The first bit representing the allocated area and its size.
833 uint32_t areaIndex = uint32_t(offset >> pool->granularityLog2);
834 uint32_t areaOldSize = uint32_t(Support::bitVectorIndexOf(block->stopBitVector, areaIndex, true)) + 1 - areaIndex;
835 uint32_t areaNewSize = pool->areaSizeFromByteSize(newSize);
836
837 if (ASMJIT_UNLIKELY(areaNewSize > areaOldSize))
838 return DebugUtils::errored(kErrorInvalidState);
839
840 uint32_t areaDiff = areaOldSize - areaNewSize;
841 if (!areaDiff)
842 return kErrorOk;
843
844 // Update the search region and statistics.
845 block->searchStart = Support::min(block->searchStart, areaIndex + areaNewSize);
846 block->searchEnd = Support::max(block->searchEnd, areaIndex + areaOldSize);
847 block->addFlags(JitAllocatorBlock::kFlagDirty);
848 block->decreaseUsedArea(areaDiff);
849
850 // Unmark the released space and move the sentinel.
851 Support::bitVectorClear(block->usedBitVector, areaIndex + areaNewSize, areaDiff);
852 Support::bitVectorSetBit(block->stopBitVector, areaIndex + areaOldSize - 1, false);
853 Support::bitVectorSetBit(block->stopBitVector, areaIndex + areaNewSize - 1, true);
854
855 // Fill released memory if the secure mode is enabled.
856 if (impl->options & kOptionFillUnusedMemory)
857 JitAllocatorImpl_fillPattern(
858 block->rwPtr() + (areaIndex + areaOldSize) * pool->granularity,
859 fillPattern(),
860 areaDiff * pool->granularity);
861
862 return kErrorOk;
863 }
864
865 // ============================================================================
866 // [asmjit::JitAllocator - Unit]
867 // ============================================================================
868
869 #if defined(ASMJIT_TEST)
870 // A pseudo random number generator based on a paper by Sebastiano Vigna:
871 // http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
872 class Random {
873 public:
874 // Constants suggested as `23/18/5`.
875 enum Steps : uint32_t {
876 kStep1_SHL = 23,
877 kStep2_SHR = 18,
878 kStep3_SHR = 5
879 };
880
881 inline explicit Random(uint64_t seed = 0) noexcept { reset(seed); }
882 inline Random(const Random& other) noexcept = default;
883
884 inline void reset(uint64_t seed = 0) noexcept {
885 // The number is arbitrary, it means nothing.
886 constexpr uint64_t kZeroSeed = 0x1F0A2BE71D163FA0u;
887
888 // Generate the state data by using splitmix64.
889 for (uint32_t i = 0; i < 2; i++) {
890 seed += 0x9E3779B97F4A7C15u;
891 uint64_t x = seed;
892 x = (x ^ (x >> 30)) * 0xBF58476D1CE4E5B9u;
893 x = (x ^ (x >> 27)) * 0x94D049BB133111EBu;
894 x = (x ^ (x >> 31));
895 _state[i] = x != 0 ? x : kZeroSeed;
896 }
897 }
898
899 inline uint32_t nextUInt32() noexcept {
900 return uint32_t(nextUInt64() >> 32);
901 }
902
903 inline uint64_t nextUInt64() noexcept {
904 uint64_t x = _state[0];
905 uint64_t y = _state[1];
906
907 x ^= x << kStep1_SHL;
908 y ^= y >> kStep3_SHR;
909 x ^= x >> kStep2_SHR;
910 x ^= y;
911
912 _state[0] = y;
913 _state[1] = x;
914 return x + y;
915 }
916
917 uint64_t _state[2];
918 };
919
920 // Helper class to verify that JitAllocator doesn't return addresses that overlap.
921 class JitAllocatorWrapper {
922 public:
923 explicit inline JitAllocatorWrapper(const JitAllocator::CreateParams* params) noexcept
924 : _zone(1024 * 1024),
925 _heap(&_zone),
926 _allocator(params) {}
927
928 // Address to a memory region of a given size.
929 class Range {
930 public:
931 inline Range(uint8_t* addr, size_t size) noexcept
932 : addr(addr),
933 size(size) {}
934 uint8_t* addr;
935 size_t size;
936 };
937
938 // Based on JitAllocator::Block, serves our purpose well...
939 class Record : public ZoneTreeNodeT<Record>,
940 public Range {
941 public:
942 inline Record(uint8_t* addr, size_t size)
943 : ZoneTreeNodeT<Record>(),
944 Range(addr, size) {}
945
946 inline bool operator<(const Record& other) const noexcept { return addr < other.addr; }
947 inline bool operator>(const Record& other) const noexcept { return addr > other.addr; }
948
949 inline bool operator<(const uint8_t* key) const noexcept { return addr + size <= key; }
950 inline bool operator>(const uint8_t* key) const noexcept { return addr > key; }
951 };
952
953 void _insert(void* p_, size_t size) noexcept {
954 uint8_t* p = static_cast<uint8_t*>(p_);
955 uint8_t* pEnd = p + size - 1;
956
957 Record* record;
958
959 record = _records.get(p);
960 if (record)
961 EXPECT(record == nullptr,
962 "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
963
964 record = _records.get(pEnd);
965 if (record)
966 EXPECT(record == nullptr,
967 "Address [%p:%p] collides with a newly allocated [%p:%p]\n", record->addr, record->addr + record->size, p, p + size);
968
969 record = _heap.newT<Record>(p, size);
970 EXPECT(record != nullptr,
971 "Out of memory, cannot allocate 'Record'");
972
973 _records.insert(record);
974 }
975
976 void _remove(void* p) noexcept {
977 Record* record = _records.get(static_cast<uint8_t*>(p));
978 EXPECT(record != nullptr,
979 "Address [%p] doesn't exist\n", p);
980
981 _records.remove(record);
982 _heap.release(record, sizeof(Record));
983 }
984
985 void* alloc(size_t size) noexcept {
986 void* roPtr;
987 void* rwPtr;
988
989 Error err = _allocator.alloc(&roPtr, &rwPtr, size);
990 EXPECT(err == kErrorOk,
991 "JitAllocator failed to allocate '%u' bytes\n", unsigned(size));
992
993 _insert(roPtr, size);
994 return roPtr;
995 }
996
997 void release(void* p) noexcept {
998 _remove(p);
999 EXPECT(_allocator.release(p) == kErrorOk,
1000 "JitAllocator failed to release '%p'\n", p);
1001 }
1002
1003 Zone _zone;
1004 ZoneAllocator _heap;
1005 ZoneTree<Record> _records;
1006 JitAllocator _allocator;
1007 };
1008
1009 static void JitAllocatorTest_shuffle(void** ptrArray, size_t count, Random& prng) noexcept {
1010 for (size_t i = 0; i < count; ++i)
1011 std::swap(ptrArray[i], ptrArray[size_t(prng.nextUInt32() % count)]);
1012 }
1013
1014 static void JitAllocatorTest_usage(JitAllocator& allocator) noexcept {
1015 JitAllocator::Statistics stats = allocator.statistics();
1016 INFO(" Block Count : %9llu [Blocks]" , (unsigned long long)(stats.blockCount()));
1017 INFO(" Reserved (VirtMem): %9llu [Bytes]" , (unsigned long long)(stats.reservedSize()));
1018 INFO(" Used (VirtMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.usedSize()), stats.usedSizeAsPercent());
1019 INFO(" Overhead (HeapMem): %9llu [Bytes] (%.1f%%)", (unsigned long long)(stats.overheadSize()), stats.overheadSizeAsPercent());
1020 }
1021
1022 UNIT(jit_allocator) {
1023 size_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 100000;
1024
1025 struct TestParams {
1026 const char* name;
1027 uint32_t options;
1028 uint32_t blockSize;
1029 uint32_t granularity;
1030 };
1031
1032 #define OPT(OPTION) JitAllocator::OPTION
1033 static TestParams testParams[] = {
1034 { "Default", 0, 0, 0 },
1035 { "16MB blocks", 0, 16 * 1024 * 1024, 0 },
1036 { "256B granularity", 0, 0, 256 },
1037 { "kOptionUseDualMapping", OPT(kOptionUseDualMapping), 0, 0 },
1038 { "kOptionUseMultiplePools", OPT(kOptionUseMultiplePools), 0, 0 },
1039 { "kOptionFillUnusedMemory", OPT(kOptionFillUnusedMemory), 0, 0 },
1040 { "kOptionImmediateRelease", OPT(kOptionImmediateRelease), 0, 0 },
1041 { "kOptionUseDualMapping | kOptionFillUnusedMemory", OPT(kOptionUseDualMapping) | OPT(kOptionFillUnusedMemory), 0, 0 }
1042 };
1043 #undef OPT
1044
1045 INFO("BitFlipIterator<uint32_t>");
1046 {
1047 static const uint32_t bits[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
1048 BitFlipIterator<uint32_t> it(bits, ASMJIT_ARRAY_SIZE(bits));
1049
1050 EXPECT(it.hasNext());
1051 EXPECT(it.nextAndFlip() == 31);
1052 EXPECT(it.hasNext());
1053 EXPECT(it.nextAndFlip() == 32);
1054 EXPECT(it.hasNext());
1055 EXPECT(it.nextAndFlip() == 63);
1056 EXPECT(it.hasNext());
1057 EXPECT(it.nextAndFlip() == 64);
1058 EXPECT(it.hasNext());
1059 EXPECT(it.nextAndFlip() == 127);
1060 EXPECT(!it.hasNext());
1061 }
1062
1063 INFO("BitFlipIterator<uint64_t>");
1064 {
1065 static const uint64_t bits[] = { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFF, 0, 0 };
1066 BitFlipIterator<uint64_t> it(bits, ASMJIT_ARRAY_SIZE(bits));
1067
1068 EXPECT(it.hasNext());
1069 EXPECT(it.nextAndFlip() == 0);
1070 EXPECT(it.hasNext());
1071 EXPECT(it.nextAndFlip() == 128);
1072 EXPECT(!it.hasNext());
1073 }
1074
1075 for (uint32_t testId = 0; testId < ASMJIT_ARRAY_SIZE(testParams); testId++) {
1076 INFO("Testing JitAllocator: %s", testParams[testId].name);
1077
1078 JitAllocator::CreateParams params {};
1079 params.options = testParams[testId].options;
1080 params.blockSize = testParams[testId].blockSize;
1081 params.granularity = testParams[testId].granularity;
1082
1083 JitAllocatorWrapper wrapper(&params);
1084 Random prng(100);
1085
1086 size_t i;
1087
1088 INFO(" Memory alloc/release test - %d allocations", kCount);
1089
1090 void** ptrArray = (void**)::malloc(sizeof(void*) * size_t(kCount));
1091 EXPECT(ptrArray != nullptr,
1092 "Couldn't allocate '%u' bytes for pointer-array", unsigned(sizeof(void*) * size_t(kCount)));
1093
1094 INFO(" Allocating virtual memory...");
1095 for (i = 0; i < kCount; i++)
1096 ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
1097 JitAllocatorTest_usage(wrapper._allocator);
1098
1099 INFO(" Releasing virtual memory...");
1100 for (i = 0; i < kCount; i++)
1101 wrapper.release(ptrArray[i]);
1102 JitAllocatorTest_usage(wrapper._allocator);
1103
1104 INFO(" Allocating virtual memory...", kCount);
1105 for (i = 0; i < kCount; i++)
1106 ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
1107 JitAllocatorTest_usage(wrapper._allocator);
1108
1109 INFO(" Shuffling...");
1110 JitAllocatorTest_shuffle(ptrArray, unsigned(kCount), prng);
1111
1112 INFO(" Releasing 50%% blocks...");
1113 for (i = 0; i < kCount / 2; i++)
1114 wrapper.release(ptrArray[i]);
1115 JitAllocatorTest_usage(wrapper._allocator);
1116
1117 INFO(" Allocating 50%% blocks again...");
1118 for (i = 0; i < kCount / 2; i++)
1119 ptrArray[i] = wrapper.alloc((prng.nextUInt32() % 1024) + 8);
1120 JitAllocatorTest_usage(wrapper._allocator);
1121
1122 INFO(" Releasing virtual memory...");
1123 for (i = 0; i < kCount; i++)
1124 wrapper.release(ptrArray[i]);
1125 JitAllocatorTest_usage(wrapper._allocator);
1126
1127 ::free(ptrArray);
1128 }
1129 }
1130 #endif
1131
1132 ASMJIT_END_NAMESPACE
1133
1134 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_JITALLOCATOR_H
7 #define _ASMJIT_CORE_JITALLOCATOR_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_JIT
11
12 #include "../core/globals.h"
13 #include "../core/virtmem.h"
14
15 ASMJIT_BEGIN_NAMESPACE
16
17 //! \addtogroup asmjit_jit
18 //! \{
19
20 // ============================================================================
21 // [asmjit::JitAllocator]
22 // ============================================================================
23
24 //! A simple implementation of memory manager that uses `asmjit::VirtMem`
25 //! functions to manage virtual memory for JIT compiled code.
26 //!
27 //! Implementation notes:
28 //!
29 //! - Granularity of allocated blocks is different than granularity for a typical
30 //! C malloc. In addition, the allocator can use several memory pools having a
31 //! different granularity to minimize the maintenance overhead. Multiple pools
32 //! feature requires `kFlagUseMultiplePools` flag to be set.
33 //!
34 //! - The allocator doesn't store any information in executable memory, instead,
35 //! the implementation uses two bit-vectors to manage allocated memory of each
36 //! allocator-block. The first bit-vector called 'used' is used to track used
37 //! memory (where each bit represents memory size defined by granularity) and
38 //! the second bit vector called 'stop' is used as a sentinel to mark where
39 //! the allocated area ends.
40 //!
41 //! - Internally, the allocator also uses RB tree to keep track of all blocks
42 //! across all pools. Each inserted block is added to the tree so it can be
43 //! matched fast during `release()` and `shrink()`.
44 class JitAllocator {
45 public:
46 ASMJIT_NONCOPYABLE(JitAllocator)
47
48 struct Impl {
49 //! Allocator options, see \ref JitAllocator::Options.
50 uint32_t options;
51 //! Base block size (0 if the allocator is not initialized).
52 uint32_t blockSize;
53 //! Base granularity (0 if the allocator is not initialized).
54 uint32_t granularity;
55 //! A pattern that is used to fill unused memory if secure mode is enabled.
56 uint32_t fillPattern;
57 };
58
59 //! Allocator implementation (private).
60 Impl* _impl;
61
62 enum Options : uint32_t {
63 //! Enables the use of an anonymous memory-mapped memory that is mapped into
64 //! two buffers having a different pointer. The first buffer has read and
65 //! execute permissions and the second buffer has read+write permissions.
66 //!
67 //! See \ref VirtMem::allocDualMapping() for more details about this feature.
68 kOptionUseDualMapping = 0x00000001u,
69
70 //! Enables the use of multiple pools with increasing granularity instead of
71 //! a single pool. This flag would enable 3 internal pools in total having
72 //! 64, 128, and 256 bytes granularity.
73 //!
74 //! This feature is only recommended for users that generate a lot of code
75 //! and would like to minimize the overhead of `JitAllocator` itself by
76 //! having blocks of different allocation granularities. Using this feature
77 //! only for few allocations won't pay off as the allocator may need to
78 //! create more blocks initially before it can take the advantage of
79 //! variable block granularity.
80 kOptionUseMultiplePools = 0x00000002u,
81
82 //! Always fill reserved memory by a fill-pattern.
83 //!
84 //! Causes a new block to be cleared by the fill pattern and freshly
85 //! released memory to be cleared before making it ready for another use.
86 kOptionFillUnusedMemory = 0x00000004u,
87
88 //! When this flag is set the allocator would immediately release unused
89 //! blocks during `release()` or `reset()`. When this flag is not set the
90 //! allocator would keep one empty block in each pool to prevent excessive
91 //! virtual memory allocations and deallocations in border cases, which
92 //! involve constantly allocating and deallocating a single block caused
93 //! by repetitive calling `alloc()` and `release()` when the allocator has
94 //! either no blocks or have all blocks fully occupied.
95 kOptionImmediateRelease = 0x00000008u,
96
97 //! Use a custom fill pattern, must be combined with `kFlagFillUnusedMemory`.
98 kOptionCustomFillPattern = 0x10000000u
99 };
100
101 //! \name Construction & Destruction
102 //! \{
103
104 //! Parameters that can be passed to `JitAllocator` constructor.
105 //!
106 //! Use it like this:
107 //!
108 //! ```
109 //! // Zero initialize (zero means the default value) and change what you need.
110 //! JitAllocator::CreateParams params {};
111 //! params.blockSize = 1024 * 1024;
112 //!
113 //! // Create the allocator.
114 //! JitAllocator allocator(&params);
115 //! ```
116 struct CreateParams {
117 // Reset the content of `CreateParams`.
118 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
119
120 //! Allocator options, see \ref JitAllocator::Options.
121 //!
122 //! No options are used by default.
123 uint32_t options;
124
125 //! Base size of a single block in bytes (default 64kB).
126 //!
127 //! \remarks Block size must be equal or greater to page size and must be
128 //! power of 2. If the input is not valid then the default block size will
129 //! be used instead.
130 uint32_t blockSize;
131
132 //! Base granularity (and also natural alignment) of allocations in bytes
133 //! (default 64).
134 //!
135 //! Since the `JitAllocator` uses bit-arrays to mark used memory the
136 //! granularity also specifies how many bytes correspond to a single bit in
137 //! such bit-array. Higher granularity means more waste of virtual memory
138 //! (as it increases the natural alignment), but smaller bit-arrays as less
139 //! bits would be required per a single block.
140 uint32_t granularity;
141
142 //! Patter to use to fill unused memory.
143 //!
144 //! Only used if \ref kOptionCustomFillPattern is set.
145 uint32_t fillPattern;
146 };
147
148 //! Creates a `JitAllocator` instance.
149 explicit ASMJIT_API JitAllocator(const CreateParams* params = nullptr) noexcept;
150 //! Destroys the `JitAllocator` instance and release all blocks held.
151 ASMJIT_API ~JitAllocator() noexcept;
152
153 inline bool isInitialized() const noexcept { return _impl->blockSize == 0; }
154
155 //! Free all allocated memory - makes all pointers returned by `alloc()` invalid.
156 //!
157 //! \remarks This function is not thread-safe as it's designed to be used when
158 //! nobody else is using allocator. The reason is that there is no point of
159 //1 calling `reset()` when the allocator is still in use.
160 ASMJIT_API void reset(uint32_t resetPolicy = Globals::kResetSoft) noexcept;
161
162 //! \}
163
164 //! \name Accessors
165 //! \{
166
167 //! Returns allocator options, see `Flags`.
168 inline uint32_t options() const noexcept { return _impl->options; }
169 //! Tests whether the allocator has the given `option` set.
170 inline bool hasOption(uint32_t option) const noexcept { return (_impl->options & option) != 0; }
171
172 //! Returns a base block size (a minimum size of block that the allocator would allocate).
173 inline uint32_t blockSize() const noexcept { return _impl->blockSize; }
174 //! Returns granularity of the allocator.
175 inline uint32_t granularity() const noexcept { return _impl->granularity; }
176 //! Returns pattern that is used to fill unused memory if `kFlagUseFillPattern` is set.
177 inline uint32_t fillPattern() const noexcept { return _impl->fillPattern; }
178
179 //! \}
180
181 //! \name Alloc & Release
182 //! \{
183
184 //! Allocate `size` bytes of virtual memory.
185 //!
186 //! \remarks This function is thread-safe.
187 ASMJIT_API Error alloc(void** roPtrOut, void** rwPtrOut, size_t size) noexcept;
188
189 //! Release a memory returned by `alloc()`.
190 //!
191 //! \remarks This function is thread-safe.
192 ASMJIT_API Error release(void* ro) noexcept;
193
194 //! Free extra memory allocated with `p` by restricting it to `newSize` size.
195 //!
196 //! \remarks This function is thread-safe.
197 ASMJIT_API Error shrink(void* ro, size_t newSize) noexcept;
198
199 //! \}
200
201 //! \name Statistics
202 //! \{
203
204 //! Statistics about `JitAllocator`.
205 struct Statistics {
206 inline void reset() noexcept {
207 _blockCount = 0;
208 _usedSize = 0;
209 _reservedSize = 0;
210 _overheadSize = 0;
211 }
212
213 //! Returns count of blocks managed by `JitAllocator` at the moment.
214 inline size_t blockCount() const noexcept { return _blockCount; }
215
216 //! Returns how many bytes are currently used.
217 inline size_t usedSize() const noexcept { return _usedSize; }
218 //! Returns the number of bytes unused by the allocator at the moment.
219 inline size_t unusedSize() const noexcept { return _reservedSize - _usedSize; }
220 //! Returns the total number of bytes bytes reserved by the allocator (sum of sizes of all blocks).
221 inline size_t reservedSize() const noexcept { return _reservedSize; }
222 //! Returns the number of bytes the allocator needs to manage the allocated memory.
223 inline size_t overheadSize() const noexcept { return _overheadSize; }
224
225 inline double usedSizeAsPercent() const noexcept {
226 return (double(usedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
227 }
228
229 inline double unusedSizeAsPercent() const noexcept {
230 return (double(unusedSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
231 }
232
233 inline double overheadSizeAsPercent() const noexcept {
234 return (double(overheadSize()) / (double(reservedSize()) + 1e-16)) * 100.0;
235 }
236
237 //! Number of blocks `JitAllocator` maintains.
238 size_t _blockCount;
239 //! How many bytes are currently used / allocated.
240 size_t _usedSize;
241 //! How many bytes are currently reserved by the allocator.
242 size_t _reservedSize;
243 //! Allocation overhead (in bytes) required to maintain all blocks.
244 size_t _overheadSize;
245 };
246
247 //! Returns JIT allocator statistics.
248 //!
249 //! \remarks This function is thread-safe.
250 ASMJIT_API Statistics statistics() const noexcept;
251
252 //! \}
253 };
254
255 //! \}
256
257 ASMJIT_END_NAMESPACE
258
259 #endif
260 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_JIT
8
9 #include "../core/cpuinfo.h"
10 #include "../core/jitruntime.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 // ============================================================================
15 // [asmjit::JitRuntime - Utilities]
16 // ============================================================================
17
18 // Only useful on non-x86 architectures.
19 static inline void JitRuntime_flushInstructionCache(const void* p, size_t size) noexcept {
20 #if defined(_WIN32) && !ASMJIT_ARCH_X86
21 // Windows has a built-in support in `kernel32.dll`.
22 ::FlushInstructionCache(::GetCurrentProcess(), p, size);
23 #else
24 ASMJIT_UNUSED(p);
25 ASMJIT_UNUSED(size);
26 #endif
27 }
28
29 // X86 Target
30 // ----------
31 //
32 // - 32-bit - Linux, OSX, BSD, and apparently also Haiku guarantee 16-byte
33 // stack alignment. Other operating systems are assumed to have
34 // 4-byte alignment by default for safety reasons.
35 // - 64-bit - stack must be aligned to 16 bytes.
36 //
37 // ARM Target
38 // ----------
39 //
40 // - 32-bit - Stack must be aligned to 8 bytes.
41 // - 64-bit - Stack must be aligned to 16 bytes (hardware requirement).
42 static inline uint32_t JitRuntime_detectNaturalStackAlignment() noexcept {
43 #if ASMJIT_ARCH_BITS == 64 || \
44 defined(__APPLE__ ) || \
45 defined(__DragonFly__) || \
46 defined(__HAIKU__ ) || \
47 defined(__FreeBSD__ ) || \
48 defined(__NetBSD__ ) || \
49 defined(__OpenBSD__ ) || \
50 defined(__bsdi__ ) || \
51 defined(__linux__ )
52 return 16;
53 #elif ASMJIT_ARCH_ARM
54 return 8;
55 #else
56 return uint32_t(sizeof(uintptr_t));
57 #endif
58 }
59
60 // ============================================================================
61 // [asmjit::JitRuntime - Construction / Destruction]
62 // ============================================================================
63
64 JitRuntime::JitRuntime(const JitAllocator::CreateParams* params) noexcept
65 : _allocator(params) {
66
67 // Setup target properties.
68 _targetType = kTargetJit;
69 _codeInfo._archInfo = CpuInfo::host().archInfo();
70 _codeInfo._stackAlignment = uint8_t(JitRuntime_detectNaturalStackAlignment());
71 _codeInfo._cdeclCallConv = CallConv::kIdHostCDecl;
72 _codeInfo._stdCallConv = CallConv::kIdHostStdCall;
73 _codeInfo._fastCallConv = CallConv::kIdHostFastCall;
74 }
75 JitRuntime::~JitRuntime() noexcept {}
76
77 // ============================================================================
78 // [asmjit::JitRuntime - Interface]
79 // ============================================================================
80
81 Error JitRuntime::_add(void** dst, CodeHolder* code) noexcept {
82 *dst = nullptr;
83
84 ASMJIT_PROPAGATE(code->flatten());
85 ASMJIT_PROPAGATE(code->resolveUnresolvedLinks());
86
87 size_t estimatedCodeSize = code->codeSize();
88 if (ASMJIT_UNLIKELY(estimatedCodeSize == 0))
89 return DebugUtils::errored(kErrorNoCodeGenerated);
90
91 uint8_t* ro;
92 uint8_t* rw;
93 ASMJIT_PROPAGATE(_allocator.alloc((void**)&ro, (void**)&rw, estimatedCodeSize));
94
95 // Relocate the code.
96 Error err = code->relocateToBase(uintptr_t((void*)ro));
97 if (ASMJIT_UNLIKELY(err)) {
98 _allocator.release(ro);
99 return err;
100 }
101
102 // Recalculate the final code size and shrink the memory we allocated for it
103 // in case that some relocations didn't require records in an address table.
104 size_t codeSize = code->codeSize();
105
106 for (Section* section : code->_sections) {
107 size_t offset = size_t(section->offset());
108 size_t bufferSize = size_t(section->bufferSize());
109 size_t virtualSize = size_t(section->virtualSize());
110
111 ASMJIT_ASSERT(offset + bufferSize <= codeSize);
112 memcpy(rw + offset, section->data(), bufferSize);
113
114 if (virtualSize > bufferSize) {
115 ASMJIT_ASSERT(offset + virtualSize <= codeSize);
116 memset(rw + offset + bufferSize, 0, virtualSize - bufferSize);
117 }
118 }
119
120 if (codeSize < estimatedCodeSize)
121 _allocator.shrink(ro, codeSize);
122
123 flush(ro, codeSize);
124 *dst = ro;
125
126 return kErrorOk;
127 }
128
129 Error JitRuntime::_release(void* p) noexcept {
130 return _allocator.release(p);
131 }
132
133 void JitRuntime::flush(const void* p, size_t size) noexcept {
134 JitRuntime_flushInstructionCache(p, size);
135 }
136
137 ASMJIT_END_NAMESPACE
138
139 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_JITRUNTIME_H
7 #define _ASMJIT_CORE_JITRUNTIME_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_JIT
11
12 #include "../core/codeholder.h"
13 #include "../core/jitallocator.h"
14 #include "../core/target.h"
15
16 ASMJIT_BEGIN_NAMESPACE
17
18 class CodeHolder;
19
20 //! \addtogroup asmjit_jit
21 //! \{
22
23 // ============================================================================
24 // [asmjit::JitRuntime]
25 // ============================================================================
26
27 //! JIT execution runtime is a special `Target` that is designed to store and
28 //! execute the generated code.
29 class ASMJIT_VIRTAPI JitRuntime : public Target {
30 public:
31 ASMJIT_NONCOPYABLE(JitRuntime)
32
33 //! Virtual memory allocator.
34 JitAllocator _allocator;
35
36 //! \name Construction & Destruction
37 //! \{
38
39 //! Creates a `JitRuntime` instance.
40 explicit ASMJIT_API JitRuntime(const JitAllocator::CreateParams* params = nullptr) noexcept;
41 //! Destroys the `JitRuntime` instance.
42 ASMJIT_API virtual ~JitRuntime() noexcept;
43
44 inline void reset(uint32_t resetPolicy = Globals::kResetSoft) noexcept {
45 _allocator.reset(resetPolicy);
46 }
47
48 //! \}
49
50 //! \name Accessors
51 //! \{
52
53 //! Returns the associated `JitAllocator`.
54 inline JitAllocator* allocator() const noexcept { return const_cast<JitAllocator*>(&_allocator); }
55
56 //! \}
57
58 //! \name Utilities
59 //! \{
60
61 // NOTE: To allow passing function pointers to `add()` and `release()` the
62 // virtual methods are prefixed with `_` and called from templates instead.
63
64 //! Allocates memory needed for a code stored in the `CodeHolder` and relocates
65 //! the code to the pointer allocated.
66 //!
67 //! The beginning of the memory allocated for the function is returned in `dst`.
68 //! If failed `Error` code is returned and `dst` is explicitly set to `nullptr`
69 //! (this means that you don't have to set it to null before calling `add()`).
70 template<typename Func>
71 inline Error add(Func* dst, CodeHolder* code) noexcept {
72 return _add(Support::ptr_cast_impl<void**, Func*>(dst), code);
73 }
74
75 //! Releases `p` which was obtained by calling `add()`.
76 template<typename Func>
77 inline Error release(Func p) noexcept {
78 return _release(Support::ptr_cast_impl<void*, Func>(p));
79 }
80
81 //! Type-unsafe version of `add()`.
82 ASMJIT_API virtual Error _add(void** dst, CodeHolder* code) noexcept;
83
84 //! Type-unsafe version of `release()`.
85 ASMJIT_API virtual Error _release(void* p) noexcept;
86
87 //! Flushes an instruction cache.
88 //!
89 //! This member function is called after the code has been copied to the
90 //! destination buffer. It is only useful for JIT code generation as it
91 //! causes a flush of the processor's cache.
92 //!
93 //! Flushing is basically a NOP under X86, but is needed by architectures
94 //! that do not have a transparent instruction cache like ARM.
95 //!
96 //! This function can also be overridden to improve compatibility with tools
97 //! such as Valgrind, however, it's not an official part of AsmJit.
98 ASMJIT_API virtual void flush(const void* p, size_t size) noexcept;
99
100 //! \}
101 };
102
103 //! \}
104
105 ASMJIT_END_NAMESPACE
106
107 #endif
108 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_LOGGING
8
9 #include "../core/builder.h"
10 #include "../core/codeholder.h"
11 #include "../core/compiler.h"
12 #include "../core/emitter.h"
13 #include "../core/logging.h"
14 #include "../core/string.h"
15 #include "../core/support.h"
16 #include "../core/type.h"
17
18 #ifdef ASMJIT_BUILD_X86
19 #include "../x86/x86logging_p.h"
20 #endif
21
22 #ifdef ASMJIT_BUILD_ARM
23 #include "../arm/armlogging_p.h"
24 #endif
25
26 ASMJIT_BEGIN_NAMESPACE
27
28 #if defined(ASMJIT_NO_COMPILER)
29 class VirtReg;
30 #endif
31
32 // ============================================================================
33 // [asmjit::Logger - Construction / Destruction]
34 // ============================================================================
35
36 Logger::Logger() noexcept
37 : _options() {}
38 Logger::~Logger() noexcept {}
39
40 // ============================================================================
41 // [asmjit::Logger - Logging]
42 // ============================================================================
43
44 Error Logger::logf(const char* fmt, ...) noexcept {
45 Error err;
46 va_list ap;
47
48 va_start(ap, fmt);
49 err = logv(fmt, ap);
50 va_end(ap);
51
52 return err;
53 }
54
55 Error Logger::logv(const char* fmt, va_list ap) noexcept {
56 StringTmp<2048> sb;
57 ASMJIT_PROPAGATE(sb.appendVFormat(fmt, ap));
58 return log(sb);
59 }
60
61 Error Logger::logBinary(const void* data, size_t size) noexcept {
62 static const char prefix[] = "db ";
63
64 StringTmp<256> sb;
65 sb.appendString(prefix, ASMJIT_ARRAY_SIZE(prefix) - 1);
66
67 size_t i = size;
68 const uint8_t* s = static_cast<const uint8_t*>(data);
69
70 while (i) {
71 uint32_t n = uint32_t(Support::min<size_t>(i, 16));
72 sb.truncate(ASMJIT_ARRAY_SIZE(prefix) - 1);
73 sb.appendHex(s, n);
74 sb.appendChar('\n');
75 ASMJIT_PROPAGATE(log(sb));
76 s += n;
77 i -= n;
78 }
79
80 return kErrorOk;
81 }
82
83 // ============================================================================
84 // [asmjit::FileLogger - Construction / Destruction]
85 // ============================================================================
86
87 FileLogger::FileLogger(FILE* file) noexcept
88 : _file(nullptr) { setFile(file); }
89 FileLogger::~FileLogger() noexcept {}
90
91 // ============================================================================
92 // [asmjit::FileLogger - Logging]
93 // ============================================================================
94
95 Error FileLogger::_log(const char* data, size_t size) noexcept {
96 if (!_file)
97 return kErrorOk;
98
99 if (size == SIZE_MAX)
100 size = strlen(data);
101
102 fwrite(data, 1, size, _file);
103 return kErrorOk;
104 }
105
106 // ============================================================================
107 // [asmjit::StringLogger - Construction / Destruction]
108 // ============================================================================
109
110 StringLogger::StringLogger() noexcept {}
111 StringLogger::~StringLogger() noexcept {}
112
113 // ============================================================================
114 // [asmjit::StringLogger - Logging]
115 // ============================================================================
116
117 Error StringLogger::_log(const char* data, size_t size) noexcept {
118 return _content.appendString(data, size);
119 }
120
121 // ============================================================================
122 // [asmjit::Logging]
123 // ============================================================================
124
125 Error Logging::formatLabel(
126 String& sb,
127 uint32_t flags,
128 const BaseEmitter* emitter,
129 uint32_t labelId) noexcept {
130
131 ASMJIT_UNUSED(flags);
132
133 const LabelEntry* le = emitter->code()->labelEntry(labelId);
134 if (ASMJIT_UNLIKELY(!le))
135 return sb.appendFormat("InvalidLabel[Id=%u]", labelId);
136
137 if (le->hasName()) {
138 if (le->hasParent()) {
139 uint32_t parentId = le->parentId();
140 const LabelEntry* pe = emitter->code()->labelEntry(parentId);
141
142 if (ASMJIT_UNLIKELY(!pe))
143 ASMJIT_PROPAGATE(sb.appendFormat("InvalidLabel[Id=%u]", labelId));
144 else if (ASMJIT_UNLIKELY(!pe->hasName()))
145 ASMJIT_PROPAGATE(sb.appendFormat("L%u", parentId));
146 else
147 ASMJIT_PROPAGATE(sb.appendString(pe->name()));
148
149 ASMJIT_PROPAGATE(sb.appendChar('.'));
150 }
151 return sb.appendString(le->name());
152 }
153 else {
154 return sb.appendFormat("L%u", labelId);
155 }
156 }
157
158 Error Logging::formatRegister(
159 String& sb,
160 uint32_t flags,
161 const BaseEmitter* emitter,
162 uint32_t archId,
163 uint32_t regType,
164 uint32_t regId) noexcept {
165
166 #ifdef ASMJIT_BUILD_X86
167 if (ArchInfo::isX86Family(archId))
168 return x86::LoggingInternal::formatRegister(sb, flags, emitter, archId, regType, regId);
169 #endif
170
171 #ifdef ASMJIT_BUILD_ARM
172 if (ArchInfo::isArmFamily(archId))
173 return arm::LoggingInternal::formatRegister(sb, flags, emitter, archId, regType, regId);
174 #endif
175
176 return kErrorInvalidArch;
177 }
178
179 Error Logging::formatOperand(
180 String& sb,
181 uint32_t flags,
182 const BaseEmitter* emitter,
183 uint32_t archId,
184 const Operand_& op) noexcept {
185
186 #ifdef ASMJIT_BUILD_X86
187 if (ArchInfo::isX86Family(archId))
188 return x86::LoggingInternal::formatOperand(sb, flags, emitter, archId, op);
189 #endif
190
191 #ifdef ASMJIT_BUILD_ARM
192 if (ArchInfo::isArmFamily(archId))
193 return arm::LoggingInternal::formatOperand(sb, flags, emitter, archId, op);
194 #endif
195
196 return kErrorInvalidArch;
197 }
198
199 Error Logging::formatInstruction(
200 String& sb,
201 uint32_t flags,
202 const BaseEmitter* emitter,
203 uint32_t archId,
204 const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept {
205
206 #ifdef ASMJIT_BUILD_X86
207 if (ArchInfo::isX86Family(archId))
208 return x86::LoggingInternal::formatInstruction(sb, flags, emitter, archId, inst, operands, opCount);
209 #endif
210
211 #ifdef ASMJIT_BUILD_ARM
212 if (ArchInfo::isArmFamily(archId))
213 return arm::LoggingInternal::formatInstruction(sb, flags, emitter, archId, inst, operands, opCount);
214 #endif
215
216 return kErrorInvalidArch;
217 }
218
219 Error Logging::formatTypeId(String& sb, uint32_t typeId) noexcept {
220 if (typeId == Type::kIdVoid)
221 return sb.appendString("void");
222
223 if (!Type::isValid(typeId))
224 return sb.appendString("unknown");
225
226 const char* typeName = "unknown";
227 uint32_t typeSize = Type::sizeOf(typeId);
228
229 uint32_t baseId = Type::baseOf(typeId);
230 switch (baseId) {
231 case Type::kIdIntPtr : typeName = "iptr" ; break;
232 case Type::kIdUIntPtr: typeName = "uptr" ; break;
233 case Type::kIdI8 : typeName = "i8" ; break;
234 case Type::kIdU8 : typeName = "u8" ; break;
235 case Type::kIdI16 : typeName = "i16" ; break;
236 case Type::kIdU16 : typeName = "u16" ; break;
237 case Type::kIdI32 : typeName = "i32" ; break;
238 case Type::kIdU32 : typeName = "u32" ; break;
239 case Type::kIdI64 : typeName = "i64" ; break;
240 case Type::kIdU64 : typeName = "u64" ; break;
241 case Type::kIdF32 : typeName = "f32" ; break;
242 case Type::kIdF64 : typeName = "f64" ; break;
243 case Type::kIdF80 : typeName = "f80" ; break;
244 case Type::kIdMask8 : typeName = "mask8" ; break;
245 case Type::kIdMask16 : typeName = "mask16"; break;
246 case Type::kIdMask32 : typeName = "mask32"; break;
247 case Type::kIdMask64 : typeName = "mask64"; break;
248 case Type::kIdMmx32 : typeName = "mmx32" ; break;
249 case Type::kIdMmx64 : typeName = "mmx64" ; break;
250 }
251
252 uint32_t baseSize = Type::sizeOf(baseId);
253 if (typeSize > baseSize) {
254 uint32_t count = typeSize / baseSize;
255 return sb.appendFormat("%sx%u", typeName, unsigned(count));
256 }
257 else {
258 return sb.appendString(typeName);
259 }
260
261 }
262
263 #ifndef ASMJIT_NO_BUILDER
264 static Error formatFuncValue(String& sb, uint32_t flags, const BaseEmitter* emitter, FuncValue value) noexcept {
265 uint32_t typeId = value.typeId();
266 ASMJIT_PROPAGATE(Logging::formatTypeId(sb, typeId));
267
268 if (value.isReg()) {
269 ASMJIT_PROPAGATE(sb.appendChar('@'));
270 ASMJIT_PROPAGATE(Logging::formatRegister(sb, flags, emitter, emitter->archId(), value.regType(), value.regId()));
271 }
272
273 if (value.isStack()) {
274 ASMJIT_PROPAGATE(sb.appendFormat("@[%d]", int(value.stackOffset())));
275 }
276
277 return kErrorOk;
278 }
279
280 static Error formatFuncRets(
281 String& sb,
282 uint32_t flags,
283 const BaseEmitter* emitter,
284 const FuncDetail& fd,
285 VirtReg* const* vRegs) noexcept {
286
287 if (!fd.hasRet())
288 return sb.appendString("void");
289
290 for (uint32_t i = 0; i < fd.retCount(); i++) {
291 if (i) ASMJIT_PROPAGATE(sb.appendString(", "));
292 ASMJIT_PROPAGATE(formatFuncValue(sb, flags, emitter, fd.ret(i)));
293
294 #ifndef ASMJIT_NO_COMPILER
295 if (vRegs) {
296 static const char nullRet[] = "<none>";
297 ASMJIT_PROPAGATE(sb.appendFormat(" %s", vRegs[i] ? vRegs[i]->name() : nullRet));
298 }
299 #endif
300 }
301
302 return kErrorOk;
303 }
304
305 static Error formatFuncArgs(
306 String& sb,
307 uint32_t flags,
308 const BaseEmitter* emitter,
309 const FuncDetail& fd,
310 VirtReg* const* vRegs) noexcept {
311
312 uint32_t count = fd.argCount();
313 if (!count)
314 return sb.appendString("void");
315
316 for (uint32_t i = 0; i < count; i++) {
317 if (i) ASMJIT_PROPAGATE(sb.appendString(", "));
318 ASMJIT_PROPAGATE(formatFuncValue(sb, flags, emitter, fd.arg(i)));
319
320 #ifndef ASMJIT_NO_COMPILER
321 if (vRegs) {
322 static const char nullArg[] = "<none>";
323 ASMJIT_PROPAGATE(sb.appendFormat(" %s", vRegs[i] ? vRegs[i]->name() : nullArg));
324 }
325 #endif
326 }
327
328 return kErrorOk;
329 }
330
331 Error Logging::formatNode(
332 String& sb,
333 uint32_t flags,
334 const BaseBuilder* cb,
335 const BaseNode* node_) noexcept {
336
337 if (node_->hasPosition() && (flags & FormatOptions::kFlagPositions) != 0)
338 ASMJIT_PROPAGATE(sb.appendFormat("<%05u> ", node_->position()));
339
340 switch (node_->type()) {
341 case BaseNode::kNodeInst: {
342 const InstNode* node = node_->as<InstNode>();
343 ASMJIT_PROPAGATE(
344 Logging::formatInstruction(sb, flags, cb,
345 cb->archId(),
346 node->baseInst(), node->operands(), node->opCount()));
347 break;
348 }
349
350 case BaseNode::kNodeSection: {
351 const SectionNode* node = node_->as<SectionNode>();
352 if (cb->_code->isSectionValid(node->id())) {
353 const Section* section = cb->_code->sectionById(node->id());
354 ASMJIT_PROPAGATE(sb.appendFormat(".section %s", section->name()));
355 }
356 break;
357 }
358
359 case BaseNode::kNodeLabel: {
360 const LabelNode* node = node_->as<LabelNode>();
361 ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id()));
362 ASMJIT_PROPAGATE(sb.appendString(":"));
363 break;
364 }
365
366 case BaseNode::kNodeAlign: {
367 const AlignNode* node = node_->as<AlignNode>();
368 ASMJIT_PROPAGATE(
369 sb.appendFormat(".align %u (%s)",
370 node->alignment(),
371 node->alignMode() == kAlignCode ? "code" : "data"));
372 break;
373 }
374
375 case BaseNode::kNodeEmbedData: {
376 const EmbedDataNode* node = node_->as<EmbedDataNode>();
377 ASMJIT_PROPAGATE(sb.appendFormat(".embed (%u bytes)", node->size()));
378 break;
379 }
380
381 case BaseNode::kNodeEmbedLabel: {
382 const EmbedLabelNode* node = node_->as<EmbedLabelNode>();
383 ASMJIT_PROPAGATE(sb.appendString(".label "));
384 ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id()));
385 break;
386 }
387
388 case BaseNode::kNodeEmbedLabelDelta: {
389 const EmbedLabelDeltaNode* node = node_->as<EmbedLabelDeltaNode>();
390 ASMJIT_PROPAGATE(sb.appendString(".label ("));
391 ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id()));
392 ASMJIT_PROPAGATE(sb.appendString(" - "));
393 ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->baseId()));
394 ASMJIT_PROPAGATE(sb.appendString(")"));
395 break;
396 }
397
398 case BaseNode::kNodeComment: {
399 const CommentNode* node = node_->as<CommentNode>();
400 ASMJIT_PROPAGATE(sb.appendFormat("; %s", node->inlineComment()));
401 break;
402 }
403
404 case BaseNode::kNodeSentinel: {
405 const SentinelNode* node = node_->as<SentinelNode>();
406 const char* sentinelName = nullptr;
407
408 switch (node->sentinelType()) {
409 case SentinelNode::kSentinelFuncEnd:
410 sentinelName = "[FuncEnd]";
411 break;
412
413 default:
414 sentinelName = "[Sentinel]";
415 break;
416 }
417
418 ASMJIT_PROPAGATE(sb.appendString(sentinelName));
419 break;
420 }
421
422 #ifndef ASMJIT_NO_COMPILER
423 case BaseNode::kNodeFunc: {
424 const FuncNode* node = node_->as<FuncNode>();
425
426 ASMJIT_PROPAGATE(formatLabel(sb, flags, cb, node->id()));
427 ASMJIT_PROPAGATE(sb.appendString(": "));
428
429 ASMJIT_PROPAGATE(formatFuncRets(sb, flags, cb, node->detail(), nullptr));
430 ASMJIT_PROPAGATE(sb.appendString(" Func("));
431 ASMJIT_PROPAGATE(formatFuncArgs(sb, flags, cb, node->detail(), node->args()));
432 ASMJIT_PROPAGATE(sb.appendString(")"));
433 break;
434 }
435
436 case BaseNode::kNodeFuncRet: {
437 const FuncRetNode* node = node_->as<FuncRetNode>();
438 ASMJIT_PROPAGATE(sb.appendString("[FuncRet]"));
439
440 for (uint32_t i = 0; i < 2; i++) {
441 const Operand_& op = node->_opArray[i];
442 if (!op.isNone()) {
443 ASMJIT_PROPAGATE(sb.appendString(i == 0 ? " " : ", "));
444 ASMJIT_PROPAGATE(formatOperand(sb, flags, cb, cb->archId(), op));
445 }
446 }
447 break;
448 }
449
450 case BaseNode::kNodeFuncCall: {
451 const FuncCallNode* node = node_->as<FuncCallNode>();
452 ASMJIT_PROPAGATE(
453 Logging::formatInstruction(sb, flags, cb,
454 cb->archId(),
455 node->baseInst(), node->operands(), node->opCount()));
456 break;
457 }
458 #endif
459
460 default: {
461 ASMJIT_PROPAGATE(sb.appendFormat("[User:%u]", node_->type()));
462 break;
463 }
464 }
465
466 return kErrorOk;
467 }
468 #endif
469
470 Error Logging::formatLine(String& sb, const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept {
471 size_t currentSize = sb.size();
472 size_t commentSize = comment ? Support::strLen(comment, Globals::kMaxCommentSize) : 0;
473
474 ASMJIT_ASSERT(binSize >= dispSize);
475 const size_t kNoBinSize = std::numeric_limits<size_t>::max();
476
477 if ((binSize != 0 && binSize != kNoBinSize) || commentSize) {
478 size_t align = kMaxInstLineSize;
479 char sep = ';';
480
481 for (size_t i = (binSize == kNoBinSize); i < 2; i++) {
482 size_t begin = sb.size();
483 ASMJIT_PROPAGATE(sb.padEnd(align));
484
485 if (sep) {
486 ASMJIT_PROPAGATE(sb.appendChar(sep));
487 ASMJIT_PROPAGATE(sb.appendChar(' '));
488 }
489
490 // Append binary data or comment.
491 if (i == 0) {
492 ASMJIT_PROPAGATE(sb.appendHex(binData, binSize - dispSize - immSize));
493 ASMJIT_PROPAGATE(sb.appendChars('.', dispSize * 2));
494 ASMJIT_PROPAGATE(sb.appendHex(binData + binSize - immSize, immSize));
495 if (commentSize == 0) break;
496 }
497 else {
498 ASMJIT_PROPAGATE(sb.appendString(comment, commentSize));
499 }
500
501 currentSize += sb.size() - begin;
502 align += kMaxBinarySize;
503 sep = '|';
504 }
505 }
506
507 return sb.appendChar('\n');
508 }
509
510 ASMJIT_END_NAMESPACE
511
512 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_LOGGING_H
7 #define _ASMJIT_CORE_LOGGING_H
8
9 #include "../core/inst.h"
10 #include "../core/string.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_core
15 //! \{
16
17 #ifndef ASMJIT_NO_LOGGING
18
19 // ============================================================================
20 // [Forward Declarations]
21 // ============================================================================
22
23 class BaseEmitter;
24 class BaseReg;
25 class Logger;
26 struct Operand_;
27
28 #ifndef ASMJIT_NO_BUILDER
29 class BaseBuilder;
30 class BaseNode;
31 #endif
32
33 // ============================================================================
34 // [asmjit::FormatOptions]
35 // ============================================================================
36
37 class FormatOptions {
38 public:
39 uint32_t _flags;
40 uint8_t _indentation[4];
41
42 enum Flags : uint32_t {
43 //!< Show also binary form of each logged instruction (assembler).
44 kFlagMachineCode = 0x00000001u,
45 //!< Show a text explanation of some immediate values.
46 kFlagExplainImms = 0x00000002u,
47 //!< Use hexadecimal notation of immediate values.
48 kFlagHexImms = 0x00000004u,
49 //!< Use hexadecimal notation of address offsets.
50 kFlagHexOffsets = 0x00000008u,
51 //!< Show casts between virtual register types (compiler).
52 kFlagRegCasts = 0x00000010u,
53 //!< Show positions associated with nodes (compiler).
54 kFlagPositions = 0x00000020u,
55 //!< Annotate nodes that are lowered by passes.
56 kFlagAnnotations = 0x00000040u,
57
58 // TODO: These must go, keep this only for formatting.
59 //!< Show an additional output from passes.
60 kFlagDebugPasses = 0x00000080u,
61 //!< Show an additional output from RA.
62 kFlagDebugRA = 0x00000100u
63 };
64
65 enum IndentationType : uint32_t {
66 //! Indentation used for instructions and directives.
67 kIndentationCode = 0u,
68 //! Indentation used for labels and function nodes.
69 kIndentationLabel = 1u,
70 //! Indentation used for comments (not inline comments).
71 kIndentationComment = 2u,
72 kIndentationReserved = 3u
73 };
74
75 //! \name Construction & Destruction
76 //! \{
77
78 constexpr FormatOptions() noexcept
79 : _flags(0),
80 _indentation { 0, 0, 0, 0 } {}
81
82 constexpr FormatOptions(const FormatOptions& other) noexcept = default;
83 inline FormatOptions& operator=(const FormatOptions& other) noexcept = default;
84
85 inline void reset() noexcept {
86 _flags = 0;
87 _indentation[0] = 0;
88 _indentation[1] = 0;
89 _indentation[2] = 0;
90 _indentation[3] = 0;
91 }
92
93 //! \}
94
95 //! \name Accessors
96 //! \{
97
98 constexpr uint32_t flags() const noexcept { return _flags; }
99 constexpr bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
100 inline void setFlags(uint32_t flags) noexcept { _flags = flags; }
101 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
102 inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; }
103
104 constexpr uint8_t indentation(uint32_t type) const noexcept { return _indentation[type]; }
105 inline void setIndentation(uint32_t type, uint32_t n) noexcept { _indentation[type] = uint8_t(n); }
106 inline void resetIndentation(uint32_t type) noexcept { _indentation[type] = uint8_t(0); }
107
108 //! \}
109 };
110
111 // ============================================================================
112 // [asmjit::Logger]
113 // ============================================================================
114
115 //! Abstract logging interface and helpers.
116 //!
117 //! This class can be inherited and reimplemented to fit into your logging
118 //! subsystem. When reimplementing use `Logger::_log()` method to log into
119 //! a custom stream.
120 //!
121 //! There are two `Logger` implementations offered by AsmJit:
122 //! - `FileLogger` - allows to log into `FILE*`.
123 //! - `StringLogger` - logs into a `String`.
124 class ASMJIT_VIRTAPI Logger {
125 public:
126 ASMJIT_BASE_CLASS(Logger)
127 ASMJIT_NONCOPYABLE(Logger)
128
129 //! Format options.
130 FormatOptions _options;
131
132 //! \name Construction & Destruction
133 //! \{
134
135 //! Creates a `Logger` instance.
136 ASMJIT_API Logger() noexcept;
137 //! Destroys the `Logger` instance.
138 ASMJIT_API virtual ~Logger() noexcept;
139
140 //! \}
141
142 //! \name Format Options
143 //! \{
144
145 inline FormatOptions& options() noexcept { return _options; }
146 inline const FormatOptions& options() const noexcept { return _options; }
147
148 inline uint32_t flags() const noexcept { return _options.flags(); }
149 inline bool hasFlag(uint32_t flag) const noexcept { return _options.hasFlag(flag); }
150 inline void setFlags(uint32_t flags) noexcept { _options.setFlags(flags); }
151 inline void addFlags(uint32_t flags) noexcept { _options.addFlags(flags); }
152 inline void clearFlags(uint32_t flags) noexcept { _options.clearFlags(flags); }
153
154 inline uint32_t indentation(uint32_t type) const noexcept { return _options.indentation(type); }
155 inline void setIndentation(uint32_t type, uint32_t n) noexcept { _options.setIndentation(type, n); }
156 inline void resetIndentation(uint32_t type) noexcept { _options.resetIndentation(type); }
157
158 //! \}
159
160 //! \name Logging Interface
161 //! \{
162
163 //! Logs `str` - must be reimplemented.
164 virtual Error _log(const char* data, size_t size) noexcept = 0;
165
166 //! Logs string `str`, which is either null terminated or having size `size`.
167 inline Error log(const char* data, size_t size = SIZE_MAX) noexcept { return _log(data, size); }
168 //! Logs content of a string `str`.
169 inline Error log(const String& str) noexcept { return _log(str.data(), str.size()); }
170
171 //! Formats the message by using `snprintf()` and then sends the result
172 //! to `log()`.
173 ASMJIT_API Error logf(const char* fmt, ...) noexcept;
174
175 //! Formats the message by using `vsnprintf()` and then sends the result
176 //! to `log()`.
177 ASMJIT_API Error logv(const char* fmt, va_list ap) noexcept;
178
179 //! Logs binary data.
180 ASMJIT_API Error logBinary(const void* data, size_t size) noexcept;
181
182 //! \}
183 };
184
185 // ============================================================================
186 // [asmjit::FileLogger]
187 // ============================================================================
188
189 //! Logger that can log to a `FILE*`.
190 class ASMJIT_VIRTAPI FileLogger : public Logger {
191 public:
192 ASMJIT_NONCOPYABLE(FileLogger)
193
194 FILE* _file;
195
196 //! \name Construction & Destruction
197 //! \{
198
199 //! Creates a new `FileLogger` that logs to `FILE*`.
200 ASMJIT_API FileLogger(FILE* file = nullptr) noexcept;
201 //! Destroys the `FileLogger`.
202 ASMJIT_API virtual ~FileLogger() noexcept;
203
204 //! \}
205
206 //! \name Accessors
207 //! \{
208
209 //! Returns the logging output stream or null if the logger has no output
210 //! stream.
211 inline FILE* file() const noexcept { return _file; }
212
213 //! Sets the logging output stream to `stream` or null.
214 //!
215 //! \note If the `file` is null the logging will be disabled. When a logger
216 //! is attached to `CodeHolder` or any emitter the logging API will always
217 //! be called regardless of the output file. This means that if you really
218 //! want to disable logging at emitter level you must not attach a logger
219 //! to it.
220 inline void setFile(FILE* file) noexcept { _file = file; }
221
222 //! \}
223
224 ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
225 };
226
227 // ============================================================================
228 // [asmjit::StringLogger]
229 // ============================================================================
230
231 //! Logger that stores everything in an internal string buffer.
232 class ASMJIT_VIRTAPI StringLogger : public Logger {
233 public:
234 ASMJIT_NONCOPYABLE(StringLogger)
235
236 //! Logger data as string.
237 String _content;
238
239 //! \name Construction & Destruction
240 //! \{
241
242 //! Create new `StringLogger`.
243 ASMJIT_API StringLogger() noexcept;
244 //! Destroys the `StringLogger`.
245 ASMJIT_API virtual ~StringLogger() noexcept;
246
247 //! \}
248
249 //! \name Logger Data Accessors
250 //! \{
251
252 //! Returns aggregated logger data as `char*` pointer.
253 //!
254 //! The pointer is owned by `StringLogger`, it can't be modified or freed.
255 inline const char* data() const noexcept { return _content.data(); }
256 //! Returns size of the data returned by `data()`.
257 inline size_t dataSize() const noexcept { return _content.size(); }
258
259 //! \}
260
261 //! \name Logger Data Manipulation
262 //! \{
263
264 //! Clears the accumulated logger data.
265 inline void clear() noexcept { _content.clear(); }
266
267 //! \}
268
269 ASMJIT_API Error _log(const char* data, size_t size = SIZE_MAX) noexcept override;
270 };
271
272 // ============================================================================
273 // [asmjit::Logging]
274 // ============================================================================
275
276 struct Logging {
277 ASMJIT_API static Error formatRegister(
278 String& sb,
279 uint32_t flags,
280 const BaseEmitter* emitter,
281 uint32_t archId,
282 uint32_t regType,
283 uint32_t regId) noexcept;
284
285 ASMJIT_API static Error formatLabel(
286 String& sb,
287 uint32_t flags,
288 const BaseEmitter* emitter,
289 uint32_t labelId) noexcept;
290
291 ASMJIT_API static Error formatOperand(
292 String& sb,
293 uint32_t flags,
294 const BaseEmitter* emitter,
295 uint32_t archId,
296 const Operand_& op) noexcept;
297
298 ASMJIT_API static Error formatInstruction(
299 String& sb,
300 uint32_t flags,
301 const BaseEmitter* emitter,
302 uint32_t archId,
303 const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept;
304
305 ASMJIT_API static Error formatTypeId(
306 String& sb,
307 uint32_t typeId) noexcept;
308
309 #ifndef ASMJIT_NO_BUILDER
310 ASMJIT_API static Error formatNode(
311 String& sb,
312 uint32_t flags,
313 const BaseBuilder* cb,
314 const BaseNode* node_) noexcept;
315 #endif
316
317 // Only used by AsmJit internals, not available to users.
318 #ifdef ASMJIT_EXPORTS
319 enum {
320 // Has to be big to be able to hold all metadata compiler can assign to a
321 // single instruction.
322 kMaxInstLineSize = 44,
323 kMaxBinarySize = 26
324 };
325
326 static Error formatLine(
327 String& sb,
328 const uint8_t* binData, size_t binSize, size_t dispSize, size_t immSize, const char* comment) noexcept;
329 #endif
330 };
331 #endif
332
333 //! \}
334
335 ASMJIT_END_NAMESPACE
336
337 #endif // _ASMJIT_CORE_LOGGER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_MISC_P_H
7 #define _ASMJIT_CORE_MISC_P_H
8
9 #include "../core/api-config.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \cond INTERNAL
14 //! \addtogroup asmjit_support
15 //! \{
16
17 #define ASMJIT_LOOKUP_TABLE_8(T, I) T((I)), T((I+1)), T((I+2)), T((I+3)), T((I+4)), T((I+5)), T((I+6)), T((I+7))
18 #define ASMJIT_LOOKUP_TABLE_16(T, I) ASMJIT_LOOKUP_TABLE_8(T, I), ASMJIT_LOOKUP_TABLE_8(T, I + 8)
19 #define ASMJIT_LOOKUP_TABLE_32(T, I) ASMJIT_LOOKUP_TABLE_16(T, I), ASMJIT_LOOKUP_TABLE_16(T, I + 16)
20 #define ASMJIT_LOOKUP_TABLE_64(T, I) ASMJIT_LOOKUP_TABLE_32(T, I), ASMJIT_LOOKUP_TABLE_32(T, I + 32)
21 #define ASMJIT_LOOKUP_TABLE_128(T, I) ASMJIT_LOOKUP_TABLE_64(T, I), ASMJIT_LOOKUP_TABLE_64(T, I + 64)
22 #define ASMJIT_LOOKUP_TABLE_256(T, I) ASMJIT_LOOKUP_TABLE_128(T, I), ASMJIT_LOOKUP_TABLE_128(T, I + 128)
23 #define ASMJIT_LOOKUP_TABLE_512(T, I) ASMJIT_LOOKUP_TABLE_256(T, I), ASMJIT_LOOKUP_TABLE_256(T, I + 256)
24 #define ASMJIT_LOOKUP_TABLE_1024(T, I) ASMJIT_LOOKUP_TABLE_512(T, I), ASMJIT_LOOKUP_TABLE_512(T, I + 512)
25
26 //! \}
27 //! \endcond
28
29 ASMJIT_END_NAMESPACE
30
31 #endif // _ASMJIT_CORE_MISC_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/operand.h"
8
9 ASMJIT_BEGIN_NAMESPACE
10
11 // ============================================================================
12 // [asmjit::Operand - Unit]
13 // ============================================================================
14
15 #if defined(ASMJIT_TEST)
16 UNIT(operand) {
17 INFO("Checking operand sizes");
18 EXPECT(sizeof(Operand) == 16);
19 EXPECT(sizeof(BaseReg) == 16);
20 EXPECT(sizeof(BaseMem) == 16);
21 EXPECT(sizeof(Imm) == 16);
22 EXPECT(sizeof(Label) == 16);
23
24 INFO("Checking basic functionality of Operand");
25 Operand a, b;
26 Operand dummy;
27
28 EXPECT(a.isNone() == true);
29 EXPECT(a.isReg() == false);
30 EXPECT(a.isMem() == false);
31 EXPECT(a.isImm() == false);
32 EXPECT(a.isLabel() == false);
33 EXPECT(a == b);
34 EXPECT(a._data[0] == 0);
35 EXPECT(a._data[1] == 0);
36
37 INFO("Checking basic functionality of Label");
38 Label label;
39 EXPECT(label.isValid() == false);
40 EXPECT(label.id() == Globals::kInvalidId);
41
42 INFO("Checking basic functionality of BaseReg");
43 EXPECT(BaseReg().isReg() == true);
44 EXPECT(BaseReg().isValid() == false);
45 EXPECT(BaseReg()._data[0] == 0);
46 EXPECT(BaseReg()._data[1] == 0);
47 EXPECT(dummy.as<BaseReg>().isValid() == false);
48
49 // Create some register (not specific to any architecture).
50 uint32_t rSig = Operand::kOpReg | (1 << Operand::kSignatureRegTypeShift ) |
51 (2 << Operand::kSignatureRegGroupShift) |
52 (8 << Operand::kSignatureSizeShift ) ;
53 BaseReg r1(rSig, 5);
54
55 EXPECT(r1.isValid() == true);
56 EXPECT(r1.isReg() == true);
57 EXPECT(r1.isReg(1) == true);
58 EXPECT(r1.isPhysReg() == true);
59 EXPECT(r1.isVirtReg() == false);
60 EXPECT(r1.signature() == rSig);
61 EXPECT(r1.type() == 1);
62 EXPECT(r1.group() == 2);
63 EXPECT(r1.size() == 8);
64 EXPECT(r1.id() == 5);
65 EXPECT(r1.isReg(1, 5) == true); // RegType and Id.
66 EXPECT(r1._data[0] == 0);
67 EXPECT(r1._data[1] == 0);
68
69 // The same type of register having different id.
70 BaseReg r2(r1, 6);
71 EXPECT(r2.isValid() == true);
72 EXPECT(r2.isReg() == true);
73 EXPECT(r2.isReg(1) == true);
74 EXPECT(r2.isPhysReg() == true);
75 EXPECT(r2.isVirtReg() == false);
76 EXPECT(r2.signature() == rSig);
77 EXPECT(r2.type() == r1.type());
78 EXPECT(r2.group() == r1.group());
79 EXPECT(r2.size() == r1.size());
80 EXPECT(r2.id() == 6);
81 EXPECT(r2.isReg(1, 6) == true);
82
83 r1.reset();
84 EXPECT(!r1.isReg());
85 EXPECT(!r1.isValid());
86
87 INFO("Checking basic functionality of BaseMem");
88 BaseMem m;
89 EXPECT(m.isMem());
90 EXPECT(m == BaseMem());
91 EXPECT(m.hasBase() == false);
92 EXPECT(m.hasIndex() == false);
93 EXPECT(m.hasOffset() == false);
94 EXPECT(m.isOffset64Bit() == true);
95 EXPECT(m.offset() == 0);
96
97 m.setOffset(-1);
98 EXPECT(m.offsetLo32() == -1);
99 EXPECT(m.offset() == -1);
100
101 int64_t x = int64_t(0xFF00FF0000000001u);
102 int32_t xHi = int32_t(0xFF00FF00u);
103
104 m.setOffset(x);
105 EXPECT(m.offset() == x);
106 EXPECT(m.offsetLo32() == 1);
107 EXPECT(m.offsetHi32() == xHi);
108
109 INFO("Checking basic functionality of Imm");
110 EXPECT(Imm(-1).i64() == int64_t(-1));
111 EXPECT(imm(-1).i64() == int64_t(-1));
112 EXPECT(imm(0xFFFFFFFF).i64() == int64_t(0xFFFFFFFF));
113 }
114 #endif
115
116 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_OPERAND_H
7 #define _ASMJIT_CORE_OPERAND_H
8
9 #include "../core/support.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [Macros]
15 // ============================================================================
16
17 //! Adds a template specialization for `REG_TYPE` into the local `RegTraits`.
18 #define ASMJIT_DEFINE_REG_TRAITS(REG, REG_TYPE, GROUP, SIZE, COUNT, TYPE_ID) \
19 template<> \
20 struct RegTraits<REG_TYPE> { \
21 typedef REG RegT; \
22 \
23 static constexpr uint32_t kValid = 1; \
24 static constexpr uint32_t kCount = COUNT; \
25 static constexpr uint32_t kTypeId = TYPE_ID; \
26 \
27 static constexpr uint32_t kType = REG_TYPE; \
28 static constexpr uint32_t kGroup = GROUP; \
29 static constexpr uint32_t kSize = SIZE; \
30 \
31 static constexpr uint32_t kSignature = \
32 (Operand::kOpReg << Operand::kSignatureOpShift ) | \
33 (kType << Operand::kSignatureRegTypeShift ) | \
34 (kGroup << Operand::kSignatureRegGroupShift) | \
35 (kSize << Operand::kSignatureSizeShift ) ; \
36 }
37
38 //! Adds constructors and member functions to a class that implements abstract
39 //! register. Abstract register is register that doesn't have type or signature
40 //! yet, it's a base class like `x86::Reg` or `arm::Reg`.
41 #define ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE) \
42 public: \
43 /*! Default constructor that only setups basics. */ \
44 constexpr REG() noexcept \
45 : BASE(kSignature, kIdBad) {} \
46 \
47 /*! Makes a copy of the `other` register operand. */ \
48 constexpr REG(const REG& other) noexcept \
49 : BASE(other) {} \
50 \
51 /*! Makes a copy of the `other` register having id set to `rId` */ \
52 constexpr REG(const BaseReg& other, uint32_t rId) noexcept \
53 : BASE(other, rId) {} \
54 \
55 /*! Creates a register based on `signature` and `rId`. */ \
56 constexpr REG(uint32_t signature, uint32_t rId) noexcept \
57 : BASE(signature, rId) {} \
58 \
59 /*! Creates a completely uninitialized REG register operand (garbage). */ \
60 inline explicit REG(Globals::NoInit_) noexcept \
61 : BASE(Globals::NoInit) {} \
62 \
63 /*! Creates a new register from register type and id. */ \
64 static inline REG fromTypeAndId(uint32_t rType, uint32_t rId) noexcept { \
65 return REG(signatureOf(rType), rId); \
66 } \
67 \
68 /*! Clones the register operand. */ \
69 constexpr REG clone() const noexcept { return REG(*this); } \
70 \
71 inline REG& operator=(const REG& other) noexcept = default;
72
73 //! Adds constructors and member functions to a class that implements final
74 //! register. Final registers MUST HAVE a valid signature.
75 #define ASMJIT_DEFINE_FINAL_REG(REG, BASE, TRAITS) \
76 public: \
77 static constexpr uint32_t kThisType = TRAITS::kType; \
78 static constexpr uint32_t kThisGroup = TRAITS::kGroup; \
79 static constexpr uint32_t kThisSize = TRAITS::kSize; \
80 static constexpr uint32_t kSignature = TRAITS::kSignature; \
81 \
82 ASMJIT_DEFINE_ABSTRACT_REG(REG, BASE) \
83 \
84 /*! Creates a register operand having its id set to `rId`. */ \
85 constexpr explicit REG(uint32_t rId) noexcept \
86 : BASE(kSignature, rId) {}
87
88 //! \addtogroup asmjit_core
89 //! \{
90
91 // ============================================================================
92 // [asmjit::Operand_]
93 // ============================================================================
94
95 //! Constructor-less `Operand`.
96 //!
97 //! Contains no initialization code and can be used safely to define an array
98 //! of operands that won't be initialized. This is an `Operand` compatible
99 //! data structure designed to be statically initialized, static const, or to
100 //! be used by the user to define an array of operands without having them
101 //! default initialized.
102 //!
103 //! The key difference between `Operand` and `Operand_`:
104 //!
105 //! ```
106 //! Operand_ xArray[10]; // Not initialized, contains garbage.
107 //! Operand yArray[10]; // All operands initialized to none.
108 //! ```
109 struct Operand_ {
110 //! Operand's signature that provides operand type and additional information.
111 uint32_t _signature;
112 //! Either base id as used by memory operand or any id as used by others.
113 uint32_t _baseId;
114
115 //! Data specific to the operand type.
116 //!
117 //! The reason we don't use union is that we have `constexpr` constructors that
118 //! construct operands and other `constexpr` functions that return wither another
119 //! Operand or something else. These cannot generally work with unions so we also
120 //! cannot use `union` if we want to be standard compliant.
121 uint32_t _data[2];
122
123 //! Indexes to `_data` array.
124 enum DataIndex : uint32_t {
125 kDataMemIndexId = 0,
126 kDataMemOffsetLo = 1,
127
128 kDataImmValueLo = ASMJIT_ARCH_LE ? 0 : 1,
129 kDataImmValueHi = ASMJIT_ARCH_LE ? 1 : 0
130 };
131
132 /*
133 //! Memory operand data.
134 struct MemData {
135 //! Index register id.
136 uint32_t indexId;
137 //! Low part of 64-bit offset (or 32-bit offset).
138 uint32_t offsetLo32;
139 };
140
141 //! Additional data used by some operands.
142 union {
143 //! 32-bit data (used either by immediate or as a 32-bit view).
144 uint32_t _data32[2];
145 //! 64-bit data (used either by immediate or as a 64-bit view).
146 uint64_t _data64;
147
148 //! Memory address data.
149 MemData _mem;
150 };
151 */
152
153 //! Operand types that can be encoded in `Operand`.
154 enum OpType : uint32_t {
155 //! Not an operand or not initialized.
156 kOpNone = 0,
157 //! Operand is a register.
158 kOpReg = 1,
159 //! Operand is a memory.
160 kOpMem = 2,
161 //! Operand is an immediate value.
162 kOpImm = 3,
163 //! Operand is a label.
164 kOpLabel = 4
165 };
166 static_assert(kOpMem == kOpReg + 1, "asmjit::Operand requires `kOpMem` to be `kOpReg+1`.");
167
168 // \cond INTERNAL
169 enum SignatureBits : uint32_t {
170 // Operand type (3 least significant bits).
171 // |........|........|........|.....XXX|
172 kSignatureOpShift = 0,
173 kSignatureOpMask = 0x07u << kSignatureOpShift,
174
175 // Register type (5 bits).
176 // |........|........|........|XXXXX...|
177 kSignatureRegTypeShift = 3,
178 kSignatureRegTypeMask = 0x1Fu << kSignatureRegTypeShift,
179
180 // Register group (4 bits).
181 // |........|........|....XXXX|........|
182 kSignatureRegGroupShift = 8,
183 kSignatureRegGroupMask = 0x0Fu << kSignatureRegGroupShift,
184
185 // Memory base type (5 bits).
186 // |........|........|........|XXXXX...|
187 kSignatureMemBaseTypeShift = 3,
188 kSignatureMemBaseTypeMask = 0x1Fu << kSignatureMemBaseTypeShift,
189
190 // Memory index type (5 bits).
191 // |........|........|...XXXXX|........|
192 kSignatureMemIndexTypeShift = 8,
193 kSignatureMemIndexTypeMask = 0x1Fu << kSignatureMemIndexTypeShift,
194
195 // Memory base+index combined (10 bits).
196 // |........|........|...XXXXX|XXXXX...|
197 kSignatureMemBaseIndexShift = 3,
198 kSignatureMemBaseIndexMask = 0x3FFu << kSignatureMemBaseIndexShift,
199
200 // Memory address type (2 bits).
201 // |........|........|.XX.....|........|
202 kSignatureMemAddrTypeShift = 13,
203 kSignatureMemAddrTypeMask = 0x03u << kSignatureMemAddrTypeShift,
204
205 // This memory operand represents a home-slot or stack (BaseCompiler).
206 // |........|........|X.......|........|
207 kSignatureMemRegHomeShift = 15,
208 kSignatureMemRegHomeFlag = 0x01u << kSignatureMemRegHomeShift,
209
210 // Operand size (8 most significant bits).
211 // |XXXXXXXX|........|........|........|
212 kSignatureSizeShift = 24,
213 kSignatureSizeMask = 0xFFu << kSignatureSizeShift
214 };
215 //! \endcond
216
217 //! \cond INTERNAL
218 //! Constants useful for VirtId <-> Index translation.
219 enum VirtIdConstants : uint32_t {
220 //! Minimum valid packed-id.
221 kVirtIdMin = 256,
222 //! Maximum valid packed-id, excludes Globals::kInvalidId.
223 kVirtIdMax = Globals::kInvalidId - 1,
224 //! Count of valid packed-ids.
225 kVirtIdCount = uint32_t(kVirtIdMax - kVirtIdMin + 1)
226 };
227
228 //! Tests whether the given `id` is a valid virtual register id. Since AsmJit
229 //! supports both physical and virtual registers it must be able to distinguish
230 //! between these two. The idea is that physical registers are always limited
231 //! in size, so virtual identifiers start from `kVirtIdMin` and end at
232 //! `kVirtIdMax`.
233 static ASMJIT_INLINE bool isVirtId(uint32_t id) noexcept { return id - kVirtIdMin < uint32_t(kVirtIdCount); }
234 //! Converts a real-id into a packed-id that can be stored in Operand.
235 static ASMJIT_INLINE uint32_t indexToVirtId(uint32_t id) noexcept { return id + kVirtIdMin; }
236 //! Converts a packed-id back to real-id.
237 static ASMJIT_INLINE uint32_t virtIdToIndex(uint32_t id) noexcept { return id - kVirtIdMin; }
238 //! \endcond
239
240 //! \name Construction & Destruction
241 //! \{
242
243 //! \cond INTERNAL
244 //! Initializes a `BaseReg` operand from `signature` and register `id`.
245 inline void _initReg(uint32_t signature, uint32_t id) noexcept {
246 _signature = signature;
247 _baseId = id;
248 _data[0] = 0;
249 _data[1] = 0;
250 }
251
252 //! Initializes the operand from `other` (used by operator overloads).
253 inline void copyFrom(const Operand_& other) noexcept { memcpy(this, &other, sizeof(Operand_)); }
254 //! \endcond
255
256 //! Resets the `Operand` to none.
257 //!
258 //! None operand is defined the following way:
259 //! - Its signature is zero (kOpNone, and the rest zero as well).
260 //! - Its id is `0`.
261 //! - The reserved8_4 field is set to `0`.
262 //! - The reserved12_4 field is set to zero.
263 //!
264 //! In other words, reset operands have all members set to zero. Reset operand
265 //! must match the Operand state right after its construction. Alternatively,
266 //! if you have an array of operands, you can simply use `memset()`.
267 //!
268 //! ```
269 //! using namespace asmjit;
270 //!
271 //! Operand a;
272 //! Operand b;
273 //! assert(a == b);
274 //!
275 //! b = x86::eax;
276 //! assert(a != b);
277 //!
278 //! b.reset();
279 //! assert(a == b);
280 //!
281 //! memset(&b, 0, sizeof(Operand));
282 //! assert(a == b);
283 //! ```
284 inline void reset() noexcept {
285 _signature = 0;
286 _baseId = 0;
287 _data[0] = 0;
288 _data[1] = 0;
289 }
290
291 //! \}
292
293 //! \name Operator Overloads
294 //! \{
295
296 constexpr bool operator==(const Operand_& other) const noexcept { return isEqual(other); }
297 constexpr bool operator!=(const Operand_& other) const noexcept { return !isEqual(other); }
298
299 //! \}
300
301 //! \name Cast
302 //! \{
303
304 //! Casts this operand to `T` type.
305 template<typename T>
306 inline T& as() noexcept { return static_cast<T&>(*this); }
307
308 //! Casts this operand to `T` type (const).
309 template<typename T>
310 inline const T& as() const noexcept { return static_cast<const T&>(*this); }
311
312 //! \}
313
314 //! \name Accessors
315 //! \{
316
317 //! Tests whether the operand matches the given signature `sign`.
318 constexpr bool hasSignature(uint32_t signature) const noexcept { return _signature == signature; }
319 //! Tests whether the operand matches the signature of the `other` operand.
320 constexpr bool hasSignature(const Operand_& other) const noexcept { return _signature == other.signature(); }
321
322 //! Returns operand signature as unsigned 32-bit integer.
323 //!
324 //! Signature is first 4 bytes of the operand data. It's used mostly for
325 //! operand checking as it's much faster to check 4 bytes at once than having
326 //! to check these bytes individually.
327 constexpr uint32_t signature() const noexcept { return _signature; }
328
329 //! Sets the operand signature, see `signature()`.
330 //!
331 //! \note Improper use of `setSignature()` can lead to hard-to-debug errors.
332 inline void setSignature(uint32_t signature) noexcept { _signature = signature; }
333
334 //! \cond INTERNAL
335 template<uint32_t mask>
336 constexpr bool _hasSignaturePart() const noexcept {
337 return (_signature & mask) != 0;
338 }
339
340 template<uint32_t mask>
341 constexpr uint32_t _getSignaturePart() const noexcept {
342 return (_signature >> Support::constCtz(mask)) & (mask >> Support::constCtz(mask));
343 }
344
345 template<uint32_t mask>
346 inline void _setSignaturePart(uint32_t value) noexcept {
347 ASMJIT_ASSERT((value & ~(mask >> Support::constCtz(mask))) == 0);
348 _signature = (_signature & ~mask) | (value << Support::constCtz(mask));
349 }
350 //! \endcond
351
352 //! Returns the type of the operand, see `OpType`.
353 constexpr uint32_t opType() const noexcept { return _getSignaturePart<kSignatureOpMask>(); }
354 //! Tests whether the operand is none (`kOpNone`).
355 constexpr bool isNone() const noexcept { return _signature == 0; }
356 //! Tests whether the operand is a register (`kOpReg`).
357 constexpr bool isReg() const noexcept { return opType() == kOpReg; }
358 //! Tests whether the operand is a memory location (`kOpMem`).
359 constexpr bool isMem() const noexcept { return opType() == kOpMem; }
360 //! Tests whether the operand is an immediate (`kOpImm`).
361 constexpr bool isImm() const noexcept { return opType() == kOpImm; }
362 //! Tests whether the operand is a label (`kOpLabel`).
363 constexpr bool isLabel() const noexcept { return opType() == kOpLabel; }
364
365 //! Tests whether the operand is a physical register.
366 constexpr bool isPhysReg() const noexcept { return isReg() && _baseId < 0xFFu; }
367 //! Tests whether the operand is a virtual register.
368 constexpr bool isVirtReg() const noexcept { return isReg() && _baseId > 0xFFu; }
369
370 //! Tests whether the operand specifies a size (i.e. the size is not zero).
371 constexpr bool hasSize() const noexcept { return _hasSignaturePart<kSignatureSizeMask>(); }
372 //! Tests whether the size of the operand matches `size`.
373 constexpr bool hasSize(uint32_t s) const noexcept { return size() == s; }
374
375 //! Returns the size of the operand in bytes.
376 //!
377 //! The value returned depends on the operand type:
378 //! * None - Should always return zero size.
379 //! * Reg - Should always return the size of the register. If the register
380 //! size depends on architecture (like `x86::CReg` and `x86::DReg`)
381 //! the size returned should be the greatest possible (so it should
382 //! return 64-bit size in such case).
383 //! * Mem - Size is optional and will be in most cases zero.
384 //! * Imm - Should always return zero size.
385 //! * Label - Should always return zero size.
386 constexpr uint32_t size() const noexcept { return _getSignaturePart<kSignatureSizeMask>(); }
387
388 //! Returns the operand id.
389 //!
390 //! The value returned should be interpreted accordingly to the operand type:
391 //! * None - Should be `0`.
392 //! * Reg - Physical or virtual register id.
393 //! * Mem - Multiple meanings - BASE address (register or label id), or
394 //! high value of a 64-bit absolute address.
395 //! * Imm - Should be `0`.
396 //! * Label - Label id if it was created by using `newLabel()` or
397 //! `Globals::kInvalidId` if the label is invalid or not
398 //! initialized.
399 constexpr uint32_t id() const noexcept { return _baseId; }
400
401 //! Tests whether the operand is 100% equal to `other`.
402 constexpr bool isEqual(const Operand_& other) const noexcept {
403 return (_signature == other._signature) &
404 (_baseId == other._baseId ) &
405 (_data[0] == other._data[0] ) &
406 (_data[1] == other._data[1] ) ;
407 }
408
409 //! Tests whether the operand is a register matching `rType`.
410 constexpr bool isReg(uint32_t rType) const noexcept {
411 return (_signature & (kSignatureOpMask | kSignatureRegTypeMask)) ==
412 ((kOpReg << kSignatureOpShift) | (rType << kSignatureRegTypeShift));
413 }
414
415 //! Tests whether the operand is register and of `rType` and `rId`.
416 constexpr bool isReg(uint32_t rType, uint32_t rId) const noexcept {
417 return isReg(rType) && id() == rId;
418 }
419
420 //! Tests whether the operand is a register or memory.
421 constexpr bool isRegOrMem() const noexcept {
422 return Support::isBetween<uint32_t>(opType(), kOpReg, kOpMem);
423 }
424
425 //! \}
426 };
427
428 // ============================================================================
429 // [asmjit::Operand]
430 // ============================================================================
431
432 //! Operand can contain register, memory location, immediate, or label.
433 class Operand : public Operand_ {
434 public:
435 //! \name Construction & Destruction
436 //! \{
437
438 //! Creates `kOpNone` operand having all members initialized to zero.
439 constexpr Operand() noexcept
440 : Operand_{ kOpNone, 0u, { 0u, 0u }} {}
441
442 //! Creates a cloned `other` operand.
443 constexpr Operand(const Operand& other) noexcept = default;
444
445 //! Creates a cloned `other` operand.
446 constexpr explicit Operand(const Operand_& other)
447 : Operand_(other) {}
448
449 //! Creates an operand initialized to raw `[u0, u1, u2, u3]` values.
450 constexpr Operand(Globals::Init_, uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept
451 : Operand_{ u0, u1, { u2, u3 }} {}
452
453 //! Creates an uninitialized operand (dangerous).
454 inline explicit Operand(Globals::NoInit_) noexcept {}
455
456 //! \}
457
458 //! \name Operator Overloads
459 //! \{
460
461 inline Operand& operator=(const Operand& other) noexcept = default;
462 inline Operand& operator=(const Operand_& other) noexcept { return operator=(static_cast<const Operand&>(other)); }
463
464 //! \}
465
466 //! \name Utilities
467 //! \{
468
469 //! Clones this operand and returns its copy.
470 constexpr Operand clone() const noexcept { return Operand(*this); }
471
472 //! \}
473 };
474
475 static_assert(sizeof(Operand) == 16, "asmjit::Operand must be exactly 16 bytes long");
476
477 namespace Globals {
478 //! A default-constructed operand of `Operand_::kOpNone` type.
479 static constexpr const Operand none;
480 }
481
482 // ============================================================================
483 // [asmjit::Label]
484 // ============================================================================
485
486 //! Label (jump target or data location).
487 //!
488 //! Label represents a location in code typically used as a jump target, but
489 //! may be also a reference to some data or a static variable. Label has to be
490 //! explicitly created by BaseEmitter.
491 //!
492 //! Example of using labels:
493 //!
494 //! ```
495 //! // Create some emitter (for example x86::Assembler).
496 //! x86::Assembler a;
497 //!
498 //! // Create Label instance.
499 //! Label L1 = a.newLabel();
500 //!
501 //! // ... your code ...
502 //!
503 //! // Using label.
504 //! a.jump(L1);
505 //!
506 //! // ... your code ...
507 //!
508 //! // Bind label to the current position, see `BaseEmitter::bind()`.
509 //! a.bind(L1);
510 //! ```
511 class Label : public Operand {
512 public:
513 //! Type of the Label.
514 enum LabelType : uint32_t {
515 //! Anonymous (unnamed) label.
516 kTypeAnonymous = 0,
517 //! Local label (always has parentId).
518 kTypeLocal = 1,
519 //! Global label (never has parentId).
520 kTypeGlobal = 2,
521 //! Number of label types.
522 kTypeCount = 3
523 };
524
525 // TODO: Find a better place, find a better name.
526 enum {
527 //! Label tag is used as a sub-type, forming a unique signature across all
528 //! operand types as 0x1 is never associated with any register (reg-type).
529 //! This means that a memory operand's BASE register can be constructed
530 //! from virtually any operand (register vs. label) by just assigning its
531 //! type (reg type or label-tag) and operand id.
532 kLabelTag = 0x1
533 };
534
535 //! \name Construction & Destruction
536 //! \{
537
538 //! Creates a label operand without ID (you must set the ID to make it valid).
539 constexpr Label() noexcept
540 : Operand(Globals::Init, kOpLabel, Globals::kInvalidId, 0, 0) {}
541
542 //! Creates a cloned label operand of `other` .
543 constexpr Label(const Label& other) noexcept
544 : Operand(other) {}
545
546 //! Creates a label operand of the given `id`.
547 constexpr explicit Label(uint32_t id) noexcept
548 : Operand(Globals::Init, kOpLabel, id, 0, 0) {}
549
550 inline explicit Label(Globals::NoInit_) noexcept
551 : Operand(Globals::NoInit) {}
552
553 //! Resets the label, will reset all properties and set its ID to `Globals::kInvalidId`.
554 inline void reset() noexcept {
555 _signature = kOpLabel;
556 _baseId = Globals::kInvalidId;
557 _data[0] = 0;
558 _data[1] = 0;
559 }
560
561 //! \}
562
563 //! \name Overloaded Operators
564 //! \{
565
566 inline Label& operator=(const Label& other) noexcept = default;
567
568 //! \}
569
570 //! \name Accessors
571 //! \{
572
573 //! Tests whether the label was created by CodeHolder and/or an attached emitter.
574 constexpr bool isValid() const noexcept { return _baseId != Globals::kInvalidId; }
575 //! Sets the label `id`.
576 inline void setId(uint32_t id) noexcept { _baseId = id; }
577
578 //! \}
579 };
580
581 // ============================================================================
582 // [asmjit::BaseRegTraits]
583 // ============================================================================
584
585 //! \cond INTERNAL
586 //! Default register traits.
587 struct BaseRegTraits {
588 //! RegType is not valid by default.
589 static constexpr uint32_t kValid = 0;
590 //! Count of registers (0 if none).
591 static constexpr uint32_t kCount = 0;
592 //! Everything is void by default.
593 static constexpr uint32_t kTypeId = 0;
594
595 //! Zero type by default.
596 static constexpr uint32_t kType = 0;
597 //! Zero group by default.
598 static constexpr uint32_t kGroup = 0;
599 //! No size by default.
600 static constexpr uint32_t kSize = 0;
601
602 //! Empty signature by default.
603 static constexpr uint32_t kSignature = Operand::kOpReg;
604 };
605 //! \endcond
606
607 // ============================================================================
608 // [asmjit::BaseReg]
609 // ============================================================================
610
611 //! Structure that allows to extract a register information based on the signature.
612 //!
613 //! This information is compatible with operand's signature (32-bit integer)
614 //! and `RegInfo` just provides easy way to access it.
615 struct RegInfo {
616 inline void reset() noexcept { _signature = 0; }
617 inline void setSignature(uint32_t signature) noexcept { _signature = signature; }
618
619 template<uint32_t mask>
620 constexpr uint32_t _getSignaturePart() const noexcept {
621 return (_signature >> Support::constCtz(mask)) & (mask >> Support::constCtz(mask));
622 }
623
624 constexpr bool isValid() const noexcept { return _signature != 0; }
625 constexpr uint32_t signature() const noexcept { return _signature; }
626 constexpr uint32_t opType() const noexcept { return _getSignaturePart<Operand::kSignatureOpMask>(); }
627 constexpr uint32_t group() const noexcept { return _getSignaturePart<Operand::kSignatureRegGroupMask>(); }
628 constexpr uint32_t type() const noexcept { return _getSignaturePart<Operand::kSignatureRegTypeMask>(); }
629 constexpr uint32_t size() const noexcept { return _getSignaturePart<Operand::kSignatureSizeMask>(); }
630
631 uint32_t _signature;
632 };
633
634 //! Physical/Virtual register operand.
635 class BaseReg : public Operand {
636 public:
637 //! Architecture neutral register types.
638 //!
639 //! These must be reused by any platform that contains that types. All GP
640 //! and VEC registers are also allowed by design to be part of a BASE|INDEX
641 //! of a memory operand.
642 enum RegType : uint32_t {
643 //! No register - unused, invalid, multiple meanings.
644 kTypeNone = 0,
645
646 // (1 is used as a LabelTag)
647
648 //! 8-bit low general purpose register (X86).
649 kTypeGp8Lo = 2,
650 //! 8-bit high general purpose register (X86).
651 kTypeGp8Hi = 3,
652 //! 16-bit general purpose register (X86).
653 kTypeGp16 = 4,
654 //! 32-bit general purpose register (X86|ARM).
655 kTypeGp32 = 5,
656 //! 64-bit general purpose register (X86|ARM).
657 kTypeGp64 = 6,
658 //! 32-bit view of a vector register (ARM).
659 kTypeVec32 = 7,
660 //! 64-bit view of a vector register (ARM).
661 kTypeVec64 = 8,
662 //! 128-bit view of a vector register (X86|ARM).
663 kTypeVec128 = 9,
664 //! 256-bit view of a vector register (X86).
665 kTypeVec256 = 10,
666 //! 512-bit view of a vector register (X86).
667 kTypeVec512 = 11,
668 //! 1024-bit view of a vector register (future).
669 kTypeVec1024 = 12,
670 //! Other0 register, should match `kOther0` group.
671 kTypeOther0 = 13,
672 //! Other1 register, should match `kOther1` group.
673 kTypeOther1 = 14,
674 //! Universal id of IP/PC register (if separate).
675 kTypeIP = 15,
676 //! Start of platform dependent register types (must be honored).
677 kTypeCustom = 16,
678 //! Maximum possible register id of all architectures.
679 kTypeMax = 31
680 };
681
682 //! Register group (architecture neutral), and some limits.
683 enum RegGroup : uint32_t {
684 //! General purpose register group compatible with all backends.
685 kGroupGp = 0,
686 //! Vector register group compatible with all backends.
687 kGroupVec = 1,
688 //! Group that is architecture dependent.
689 kGroupOther0 = 2,
690 //! Group that is architecture dependent.
691 kGroupOther1 = 3,
692 //! Count of register groups used by virtual registers.
693 kGroupVirt = 4,
694 //! Count of register groups used by physical registers.
695 kGroupCount = 16
696 };
697
698 enum Id : uint32_t {
699 //! None or any register (mostly internal).
700 kIdBad = 0xFFu
701 };
702
703 static constexpr uint32_t kSignature = kOpReg;
704
705 //! \name Construction & Destruction
706 //! \{
707
708 //! Creates a dummy register operand.
709 constexpr BaseReg() noexcept
710 : Operand(Globals::Init, kSignature, kIdBad, 0, 0) {}
711
712 //! Creates a new register operand which is the same as `other` .
713 constexpr BaseReg(const BaseReg& other) noexcept
714 : Operand(other) {}
715
716 //! Creates a new register operand compatible with `other`, but with a different `rId`.
717 constexpr BaseReg(const BaseReg& other, uint32_t rId) noexcept
718 : Operand(Globals::Init, other._signature, rId, 0, 0) {}
719
720 //! Creates a register initialized to `signature` and `rId`.
721 constexpr BaseReg(uint32_t signature, uint32_t rId) noexcept
722 : Operand(Globals::Init, signature, rId, 0, 0) {}
723
724 inline explicit BaseReg(Globals::NoInit_) noexcept
725 : Operand(Globals::NoInit) {}
726
727 //! \}
728
729 //! \name Overloaded Operators
730 //! \{
731
732 inline BaseReg& operator=(const BaseReg& other) noexcept = default;
733
734 //! \}
735
736 //! \name Accessors
737 //! \{
738
739 //! Tests whether this register is the same as `other`.
740 //!
741 //! This is just an optimization. Registers by default only use the first
742 //! 8 bytes of the Operand, so this method takes advantage of this knowledge
743 //! and only compares these 8 bytes. If both operands were created correctly
744 //! then `isEqual()` and `isSame()` should give the same answer, however, if
745 //! some one of the two operand contains a garbage or other metadata in the
746 //! upper 8 bytes then `isSame()` may return `true` in cases where `isEqual()`
747 //! returns false.
748 constexpr bool isSame(const BaseReg& other) const noexcept {
749 return (_signature == other._signature) &
750 (_baseId == other._baseId ) ;
751 }
752
753 //! Tests whether the register is valid (either virtual or physical).
754 constexpr bool isValid() const noexcept { return (_signature != 0) & (_baseId != kIdBad); }
755
756 //! Tests whether this is a physical register.
757 constexpr bool isPhysReg() const noexcept { return _baseId < kIdBad; }
758 //! Tests whether this is a virtual register.
759 constexpr bool isVirtReg() const noexcept { return _baseId > kIdBad; }
760
761 //! Tests whether the register type matches `type` - same as `isReg(type)`, provided for convenience.
762 constexpr bool isType(uint32_t type) const noexcept { return (_signature & kSignatureRegTypeMask) == (type << kSignatureRegTypeShift); }
763 //! Tests whether the register group matches `group`.
764 constexpr bool isGroup(uint32_t group) const noexcept { return (_signature & kSignatureRegGroupMask) == (group << kSignatureRegGroupShift); }
765
766 //! Tests whether the register is a general purpose register (any size).
767 constexpr bool isGp() const noexcept { return isGroup(kGroupGp); }
768 //! Tests whether the register is a vector register.
769 constexpr bool isVec() const noexcept { return isGroup(kGroupVec); }
770
771 using Operand_::isReg;
772
773 //! Same as `isType()`, provided for convenience.
774 constexpr bool isReg(uint32_t rType) const noexcept { return isType(rType); }
775 //! Tests whether the register type matches `type` and register id matches `rId`.
776 constexpr bool isReg(uint32_t rType, uint32_t rId) const noexcept { return isType(rType) && id() == rId; }
777
778 //! Returns the type of the register.
779 constexpr uint32_t type() const noexcept { return _getSignaturePart<kSignatureRegTypeMask>(); }
780 //! Returns the register group.
781 constexpr uint32_t group() const noexcept { return _getSignaturePart<kSignatureRegGroupMask>(); }
782
783 //! Clones the register operand.
784 constexpr BaseReg clone() const noexcept { return BaseReg(*this); }
785
786 //! Casts this register to `RegT` by also changing its signature.
787 //!
788 //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
789 template<typename RegT>
790 constexpr RegT cloneAs() const noexcept { return RegT(RegT::kSignature, id()); }
791
792 //! Casts this register to `other` by also changing its signature.
793 //!
794 //! \note Improper use of `cloneAs()` can lead to hard-to-debug errors.
795 template<typename RegT>
796 constexpr RegT cloneAs(const RegT& other) const noexcept { return RegT(other.signature(), id()); }
797
798 //! Sets the register id to `rId`.
799 inline void setId(uint32_t rId) noexcept { _baseId = rId; }
800
801 //! Sets a 32-bit operand signature based on traits of `RegT`.
802 template<typename RegT>
803 inline void setSignatureT() noexcept { _signature = RegT::kSignature; }
804
805 //! Sets the register `signature` and `rId`.
806 inline void setSignatureAndId(uint32_t signature, uint32_t rId) noexcept {
807 _signature = signature;
808 _baseId = rId;
809 }
810
811 //! \}
812
813 //! \name Static Functions
814 //! \{
815
816 static inline bool isGp(const Operand_& op) noexcept {
817 // Check operand type and register group. Not interested in register type and size.
818 const uint32_t kSgn = (kOpReg << kSignatureOpShift ) |
819 (kGroupGp << kSignatureRegGroupShift) ;
820 return (op.signature() & (kSignatureOpMask | kSignatureRegGroupMask)) == kSgn;
821 }
822
823 //! Tests whether the `op` operand is either a low or high 8-bit GPB register.
824 static inline bool isVec(const Operand_& op) noexcept {
825 // Check operand type and register group. Not interested in register type and size.
826 const uint32_t kSgn = (kOpReg << kSignatureOpShift ) |
827 (kGroupVec << kSignatureRegGroupShift) ;
828 return (op.signature() & (kSignatureOpMask | kSignatureRegGroupMask)) == kSgn;
829 }
830
831 static inline bool isGp(const Operand_& op, uint32_t rId) noexcept { return isGp(op) & (op.id() == rId); }
832 static inline bool isVec(const Operand_& op, uint32_t rId) noexcept { return isVec(op) & (op.id() == rId); }
833
834 //! \}
835 };
836
837 // ============================================================================
838 // [asmjit::RegOnly]
839 // ============================================================================
840
841 //! RegOnly is 8-byte version of `BaseReg` that allows to store either register
842 //! or nothing.
843 //!
844 //! This class was designed to decrease the space consumed by each extra "operand"
845 //! in `BaseEmitter` and `InstNode` classes.
846 struct RegOnly {
847 //! Type of the operand, either `kOpNone` or `kOpReg`.
848 uint32_t _signature;
849 //! Physical or virtual register id.
850 uint32_t _id;
851
852 //! \name Construction & Destruction
853 //! \{
854
855 //! Initializes the `RegOnly` instance to hold register `signature` and `id`.
856 inline void init(uint32_t signature, uint32_t id) noexcept {
857 _signature = signature;
858 _id = id;
859 }
860
861 inline void init(const BaseReg& reg) noexcept { init(reg.signature(), reg.id()); }
862 inline void init(const RegOnly& reg) noexcept { init(reg.signature(), reg.id()); }
863
864 //! Resets the `RegOnly` members to zeros (none).
865 inline void reset() noexcept { init(0, 0); }
866
867 //! \}
868
869 //! \name Accessors
870 //! \{
871
872 //! Tests whether this ExtraReg is none (same as calling `Operand_::isNone()`).
873 constexpr bool isNone() const noexcept { return _signature == 0; }
874 //! Tests whether the register is valid (either virtual or physical).
875 constexpr bool isReg() const noexcept { return _signature != 0; }
876
877 //! Tests whether this is a physical register.
878 constexpr bool isPhysReg() const noexcept { return _id < BaseReg::kIdBad; }
879 //! Tests whether this is a virtual register (used by `BaseCompiler`).
880 constexpr bool isVirtReg() const noexcept { return _id > BaseReg::kIdBad; }
881
882 //! Returns the register signature or 0 if no register is assigned.
883 constexpr uint32_t signature() const noexcept { return _signature; }
884 //! Returns the register id.
885 //!
886 //! \note Always check whether the register is assigned before using the
887 //! returned identifier as non-assigned `RegOnly` instance would return
888 //! zero id, which is still a valid register id.
889 constexpr uint32_t id() const noexcept { return _id; }
890
891 //! Sets the register id.
892 inline void setId(uint32_t id) noexcept { _id = id; }
893
894 //! \cond INTERNAL
895 //!
896 //! Extracts information from operand's signature.
897 template<uint32_t mask>
898 constexpr uint32_t _getSignaturePart() const noexcept {
899 return (_signature >> Support::constCtz(mask)) & (mask >> Support::constCtz(mask));
900 }
901 //! \endcond
902
903 //! Returns the type of the register.
904 constexpr uint32_t type() const noexcept { return _getSignaturePart<Operand::kSignatureRegTypeMask>(); }
905 //! Returns the register group.
906 constexpr uint32_t group() const noexcept { return _getSignaturePart<Operand::kSignatureRegGroupMask>(); }
907
908 //! \}
909
910 //! \name Utilities
911 //! \{
912
913 //! Converts this ExtraReg to a real `RegT` operand.
914 template<typename RegT>
915 constexpr RegT toReg() const noexcept { return RegT(_signature, _id); }
916
917 //! \}
918 };
919
920 // ============================================================================
921 // [asmjit::BaseMem]
922 // ============================================================================
923
924 //! Base class for all memory operands.
925 //!
926 //! \note It's tricky to pack all possible cases that define a memory operand
927 //! into just 16 bytes. The `BaseMem` splits data into the following parts:
928 //!
929 //! BASE - Base register or label - requires 36 bits total. 4 bits are used to
930 //! encode the type of the BASE operand (label vs. register type) and
931 //! the remaining 32 bits define the BASE id, which can be a physical or
932 //! virtual register index. If BASE type is zero, which is never used as
933 //! a register-type and label doesn't use it as well then BASE field
934 //! contains a high DWORD of a possible 64-bit absolute address, which is
935 //! possible on X64.
936 //!
937 //! INDEX - Index register (or theoretically Label, which doesn't make sense).
938 //! Encoding is similar to BASE - it also requires 36 bits and splits
939 //! the encoding to INDEX type (4 bits defining the register type) and
940 //! id (32-bits).
941 //!
942 //! OFFSET - A relative offset of the address. Basically if BASE is specified
943 //! the relative displacement adjusts BASE and an optional INDEX. if
944 //! BASE is not specified then the OFFSET should be considered as ABSOLUTE
945 //! address (at least on X86). In that case its low 32 bits are stored in
946 //! DISPLACEMENT field and the remaining high 32 bits are stored in BASE.
947 //!
948 //! OTHER - There is rest 8 bits that can be used for whatever purpose. The
949 //! x86::Mem operand uses these bits to store segment override prefix and
950 //! index shift (scale).
951 class BaseMem : public Operand {
952 public:
953 enum AddrType : uint32_t {
954 kAddrTypeDefault = 0,
955 kAddrTypeAbs = 1,
956 kAddrTypeRel = 2
957 };
958
959 // Shortcuts.
960 enum SignatureMem : uint32_t {
961 kSignatureMemAbs = kAddrTypeAbs << kSignatureMemAddrTypeShift,
962 kSignatureMemRel = kAddrTypeRel << kSignatureMemAddrTypeShift
963 };
964
965 //! \cond INTERNAL
966 //! Used internally to construct `BaseMem` operand from decomposed data.
967 struct Decomposed {
968 uint32_t baseType;
969 uint32_t baseId;
970 uint32_t indexType;
971 uint32_t indexId;
972 int32_t offset;
973 uint32_t size;
974 uint32_t flags;
975 };
976 //! \endcond
977
978 //! \name Construction & Destruction
979 //! \{
980
981 //! Creates a default `BaseMem` operand, that points to [0].
982 constexpr BaseMem() noexcept
983 : Operand(Globals::Init, kOpMem, 0, 0, 0) {}
984
985 //! Creates a `BaseMem` operand that is a clone of `other`.
986 constexpr BaseMem(const BaseMem& other) noexcept
987 : Operand(other) {}
988
989 //! \cond INTERNAL
990
991 //! Creates a `BaseMem` operand from 4 integers as used by `Operand_` struct.
992 constexpr BaseMem(Globals::Init_, uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept
993 : Operand(Globals::Init, u0, u1, u2, u3) {}
994
995 constexpr BaseMem(const Decomposed& d) noexcept
996 : Operand(Globals::Init,
997 kOpMem | (d.baseType << kSignatureMemBaseTypeShift )
998 | (d.indexType << kSignatureMemIndexTypeShift)
999 | (d.size << kSignatureSizeShift )
1000 | d.flags,
1001 d.baseId,
1002 d.indexId,
1003 uint32_t(d.offset)) {}
1004
1005 //! \endcond
1006
1007 //! Creates a completely uninitialized `BaseMem` operand.
1008 inline explicit BaseMem(Globals::NoInit_) noexcept
1009 : Operand(Globals::NoInit) {}
1010
1011 //! Resets the memory operand - after the reset the memory points to [0].
1012 inline void reset() noexcept {
1013 _signature = kOpMem;
1014 _baseId = 0;
1015 _data[0] = 0;
1016 _data[1] = 0;
1017 }
1018
1019 //! \}
1020
1021 //! \name Overloaded Operators
1022 //! \{
1023
1024 inline BaseMem& operator=(const BaseMem& other) noexcept { copyFrom(other); return *this; }
1025
1026 //! \}
1027
1028 //! \name Accessors
1029 //! \{
1030
1031 //! Clones the memory operand.
1032 constexpr BaseMem clone() const noexcept { return BaseMem(*this); }
1033
1034 constexpr uint32_t addrType() const noexcept { return _getSignaturePart<kSignatureMemAddrTypeMask>(); }
1035 inline void setAddrType(uint32_t addrType) noexcept { _setSignaturePart<kSignatureMemAddrTypeMask>(addrType); }
1036 inline void resetAddrType() noexcept { _setSignaturePart<kSignatureMemAddrTypeMask>(0); }
1037
1038 constexpr bool isAbs() const noexcept { return addrType() == kAddrTypeAbs; }
1039 inline void setAbs() noexcept { setAddrType(kAddrTypeAbs); }
1040
1041 constexpr bool isRel() const noexcept { return addrType() == kAddrTypeRel; }
1042 inline void setRel() noexcept { setAddrType(kAddrTypeRel); }
1043
1044 constexpr bool isRegHome() const noexcept { return _hasSignaturePart<kSignatureMemRegHomeFlag>(); }
1045 inline void setRegHome() noexcept { _signature |= kSignatureMemRegHomeFlag; }
1046 inline void clearRegHome() noexcept { _signature &= ~kSignatureMemRegHomeFlag; }
1047
1048 //! Tests whether the memory operand has a BASE register or label specified.
1049 constexpr bool hasBase() const noexcept { return (_signature & kSignatureMemBaseTypeMask) != 0; }
1050 //! Tests whether the memory operand has an INDEX register specified.
1051 constexpr bool hasIndex() const noexcept { return (_signature & kSignatureMemIndexTypeMask) != 0; }
1052 //! Tests whether the memory operand has BASE and INDEX register.
1053 constexpr bool hasBaseOrIndex() const noexcept { return (_signature & kSignatureMemBaseIndexMask) != 0; }
1054 //! Tests whether the memory operand has BASE and INDEX register.
1055 constexpr bool hasBaseAndIndex() const noexcept { return (_signature & kSignatureMemBaseTypeMask) != 0 && (_signature & kSignatureMemIndexTypeMask) != 0; }
1056
1057 //! Tests whether the BASE operand is a register (registers start after `kLabelTag`).
1058 constexpr bool hasBaseReg() const noexcept { return (_signature & kSignatureMemBaseTypeMask) > (Label::kLabelTag << kSignatureMemBaseTypeShift); }
1059 //! Tests whether the BASE operand is a label.
1060 constexpr bool hasBaseLabel() const noexcept { return (_signature & kSignatureMemBaseTypeMask) == (Label::kLabelTag << kSignatureMemBaseTypeShift); }
1061 //! Tests whether the INDEX operand is a register (registers start after `kLabelTag`).
1062 constexpr bool hasIndexReg() const noexcept { return (_signature & kSignatureMemIndexTypeMask) > (Label::kLabelTag << kSignatureMemIndexTypeShift); }
1063
1064 //! Returns the type of the BASE register (0 if this memory operand doesn't
1065 //! use the BASE register).
1066 //!
1067 //! \note If the returned type is one (a value never associated to a register
1068 //! type) the BASE is not register, but it's a label. One equals to `kLabelTag`.
1069 //! You should always check `hasBaseLabel()` before using `baseId()` result.
1070 constexpr uint32_t baseType() const noexcept { return _getSignaturePart<kSignatureMemBaseTypeMask>(); }
1071
1072 //! Returns the type of an INDEX register (0 if this memory operand doesn't
1073 //! use the INDEX register).
1074 constexpr uint32_t indexType() const noexcept { return _getSignaturePart<kSignatureMemIndexTypeMask>(); }
1075
1076 //! This is used internally for BASE+INDEX validation.
1077 constexpr uint32_t baseAndIndexTypes() const noexcept { return _getSignaturePart<kSignatureMemBaseIndexMask>(); }
1078
1079 //! Returns both BASE (4:0 bits) and INDEX (9:5 bits) types combined into a
1080 //! single value.
1081 //!
1082 //! \remarks Returns id of the BASE register or label (if the BASE was
1083 //! specified as label).
1084 constexpr uint32_t baseId() const noexcept { return _baseId; }
1085
1086 //! Returns the id of the INDEX register.
1087 constexpr uint32_t indexId() const noexcept { return _data[kDataMemIndexId]; }
1088
1089 //! Sets the id of the BASE register (without modifying its type).
1090 inline void setBaseId(uint32_t rId) noexcept { _baseId = rId; }
1091 //! Sets the id of the INDEX register (without modifying its type).
1092 inline void setIndexId(uint32_t rId) noexcept { _data[kDataMemIndexId] = rId; }
1093
1094 //! Sets the base register to type and id of the given `base` operand.
1095 inline void setBase(const BaseReg& base) noexcept { return _setBase(base.type(), base.id()); }
1096 //! Sets the index register to type and id of the given `index` operand.
1097 inline void setIndex(const BaseReg& index) noexcept { return _setIndex(index.type(), index.id()); }
1098
1099 inline void _setBase(uint32_t rType, uint32_t rId) noexcept {
1100 _setSignaturePart<kSignatureMemBaseTypeMask>(rType);
1101 _baseId = rId;
1102 }
1103
1104 inline void _setIndex(uint32_t rType, uint32_t rId) noexcept {
1105 _setSignaturePart<kSignatureMemIndexTypeMask>(rType);
1106 _data[kDataMemIndexId] = rId;
1107 }
1108
1109 //! Resets the memory operand's BASE register or label.
1110 inline void resetBase() noexcept { _setBase(0, 0); }
1111 //! Resets the memory operand's INDEX register.
1112 inline void resetIndex() noexcept { _setIndex(0, 0); }
1113
1114 //! Sets the memory operand size (in bytes).
1115 inline void setSize(uint32_t size) noexcept { _setSignaturePart<kSignatureSizeMask>(size); }
1116
1117 //! Tests whether the memory operand has a 64-bit offset or absolute address.
1118 //!
1119 //! If this is true then `hasBase()` must always report false.
1120 constexpr bool isOffset64Bit() const noexcept { return baseType() == 0; }
1121
1122 //! Tests whether the memory operand has a non-zero offset or absolute address.
1123 constexpr bool hasOffset() const noexcept {
1124 return (_data[kDataMemOffsetLo] | uint32_t(_baseId & Support::bitMaskFromBool<uint32_t>(isOffset64Bit()))) != 0;
1125 }
1126
1127 //! Returns either relative offset or absolute address as 64-bit integer.
1128 constexpr int64_t offset() const noexcept {
1129 return isOffset64Bit() ? int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32))
1130 : int64_t(int32_t(_data[kDataMemOffsetLo])); // Sign extend 32-bit offset.
1131 }
1132
1133 //! Returns a 32-bit low part of a 64-bit offset or absolute address.
1134 constexpr int32_t offsetLo32() const noexcept { return int32_t(_data[kDataMemOffsetLo]); }
1135 //! Returns a 32-but high part of a 64-bit offset or absolute address.
1136 //!
1137 //! \note This function is UNSAFE and returns garbage if `isOffset64Bit()`
1138 //! returns false. Never use it blindly without checking it first.
1139 constexpr int32_t offsetHi32() const noexcept { return int32_t(_baseId); }
1140
1141 //! Sets a 64-bit offset or an absolute address to `offset`.
1142 //!
1143 //! \note This functions attempts to set both high and low parts of a 64-bit
1144 //! offset, however, if the operand has a BASE register it will store only the
1145 //! low 32 bits of the offset / address as there is no way to store both BASE
1146 //! and 64-bit offset, and there is currently no architecture that has such
1147 //! capability targeted by AsmJit.
1148 inline void setOffset(int64_t offset) noexcept {
1149 uint32_t lo = uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
1150 uint32_t hi = uint32_t(uint64_t(offset) >> 32);
1151 uint32_t hiMsk = Support::bitMaskFromBool<uint32_t>(isOffset64Bit());
1152
1153 _data[kDataMemOffsetLo] = lo;
1154 _baseId = (hi & hiMsk) | (_baseId & ~hiMsk);
1155 }
1156 //! Sets a low 32-bit offset to `offset` (don't use without knowing how BaseMem works).
1157 inline void setOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] = uint32_t(offset); }
1158
1159 //! Adjusts the offset by `offset`.
1160 //!
1161 //! \note This is a fast function that doesn't use the HI 32-bits of a
1162 //! 64-bit offset. Use it only if you know that there is a BASE register
1163 //! and the offset is only 32 bits anyway.
1164
1165 //! Adjusts the offset by a 64-bit `offset`.
1166 inline void addOffset(int64_t offset) noexcept {
1167 if (isOffset64Bit()) {
1168 int64_t result = offset + int64_t(uint64_t(_data[kDataMemOffsetLo]) | (uint64_t(_baseId) << 32));
1169 _data[kDataMemOffsetLo] = uint32_t(uint64_t(result) & 0xFFFFFFFFu);
1170 _baseId = uint32_t(uint64_t(result) >> 32);
1171 }
1172 else {
1173 _data[kDataMemOffsetLo] += uint32_t(uint64_t(offset) & 0xFFFFFFFFu);
1174 }
1175 }
1176
1177 //! Adds `offset` to a low 32-bit offset part (don't use without knowing how
1178 //! BaseMem works).
1179 inline void addOffsetLo32(int32_t offset) noexcept { _data[kDataMemOffsetLo] += uint32_t(offset); }
1180
1181 //! Resets the memory offset to zero.
1182 inline void resetOffset() noexcept { setOffset(0); }
1183
1184 //! Resets the lo part of the memory offset to zero (don't use without knowing
1185 //! how BaseMem works).
1186 inline void resetOffsetLo32() noexcept { setOffsetLo32(0); }
1187
1188 //! \}
1189 };
1190
1191 // ============================================================================
1192 // [asmjit::Imm]
1193 // ============================================================================
1194
1195 //! Immediate operand.
1196 //!
1197 //! Immediate operand is usually part of instruction itself. It's inlined after
1198 //! or before the instruction opcode. Immediates can be only signed or unsigned
1199 //! integers.
1200 //!
1201 //! To create an immediate operand use `asmjit::imm()` helper, which can be used
1202 //! with any type, not just the default 64-bit int.
1203 class Imm : public Operand {
1204 public:
1205 //! \name Construction & Destruction
1206 //! \{
1207
1208 //! Creates a new immediate value (initial value is 0).
1209 constexpr Imm() noexcept
1210 : Operand(Globals::Init, kOpImm, 0, 0, 0) {}
1211
1212 //! Creates a new immediate value from `other`.
1213 constexpr Imm(const Imm& other) noexcept
1214 : Operand(other) {}
1215
1216 //! Creates a new signed immediate value, assigning the value to `val`.
1217 constexpr explicit Imm(int64_t val) noexcept
1218 : Operand(Globals::Init, kOpImm, 0, Support::unpackU32At0(val), Support::unpackU32At1(val)) {}
1219
1220 inline explicit Imm(Globals::NoInit_) noexcept
1221 : Operand(Globals::NoInit) {}
1222
1223 //! \}
1224
1225 //! \name Overloaded Operators
1226 //! \{
1227
1228 //! Assigns the value of the `other` operand to this immediate.
1229 inline Imm& operator=(const Imm& other) noexcept { copyFrom(other); return *this; }
1230
1231 //! \}
1232
1233 //! \name Accessors
1234 //! \{
1235
1236 //! Returns immediate value as 8-bit signed integer, possibly cropped.
1237 constexpr int8_t i8() const noexcept { return int8_t(_data[kDataImmValueLo] & 0xFFu); }
1238 //! Returns immediate value as 8-bit unsigned integer, possibly cropped.
1239 constexpr uint8_t u8() const noexcept { return uint8_t(_data[kDataImmValueLo] & 0xFFu); }
1240 //! Returns immediate value as 16-bit signed integer, possibly cropped.
1241 constexpr int16_t i16() const noexcept { return int16_t(_data[kDataImmValueLo] & 0xFFFFu);}
1242 //! Returns immediate value as 16-bit unsigned integer, possibly cropped.
1243 constexpr uint16_t u16() const noexcept { return uint16_t(_data[kDataImmValueLo] & 0xFFFFu);}
1244 //! Returns immediate value as 32-bit signed integer, possibly cropped.
1245 constexpr int32_t i32() const noexcept { return int32_t(_data[kDataImmValueLo]); }
1246 //! Returns low 32-bit signed integer.
1247 constexpr int32_t i32Lo() const noexcept { return int32_t(_data[kDataImmValueLo]); }
1248 //! Returns high 32-bit signed integer.
1249 constexpr int32_t i32Hi() const noexcept { return int32_t(_data[kDataImmValueHi]); }
1250 //! Returns immediate value as 32-bit unsigned integer, possibly cropped.
1251 constexpr uint32_t u32() const noexcept { return _data[kDataImmValueLo]; }
1252 //! Returns low 32-bit signed integer.
1253 constexpr uint32_t u32Lo() const noexcept { return _data[kDataImmValueLo]; }
1254 //! Returns high 32-bit signed integer.
1255 constexpr uint32_t u32Hi() const noexcept { return _data[kDataImmValueHi]; }
1256 //! Returns immediate value as 64-bit signed integer.
1257 constexpr int64_t i64() const noexcept { return int64_t((uint64_t(_data[kDataImmValueHi]) << 32) | _data[kDataImmValueLo]); }
1258 //! Returns immediate value as 64-bit unsigned integer.
1259 constexpr uint64_t u64() const noexcept { return uint64_t(i64()); }
1260 //! Returns immediate value as `intptr_t`, possibly cropped if size of `intptr_t` is 32 bits.
1261 constexpr intptr_t iptr() const noexcept { return (sizeof(intptr_t) == sizeof(int64_t)) ? intptr_t(i64()) : intptr_t(i32()); }
1262 //! Returns immediate value as `uintptr_t`, possibly cropped if size of `uintptr_t` is 32 bits.
1263 constexpr uintptr_t uptr() const noexcept { return (sizeof(uintptr_t) == sizeof(uint64_t)) ? uintptr_t(u64()) : uintptr_t(u32()); }
1264
1265 //! Tests whether the immediate can be casted to 8-bit signed integer.
1266 constexpr bool isInt8() const noexcept { return Support::isInt8(i64()); }
1267 //! Tests whether the immediate can be casted to 8-bit unsigned integer.
1268 constexpr bool isUInt8() const noexcept { return Support::isUInt8(i64()); }
1269 //! Tests whether the immediate can be casted to 16-bit signed integer.
1270 constexpr bool isInt16() const noexcept { return Support::isInt16(i64()); }
1271 //! Tests whether the immediate can be casted to 16-bit unsigned integer.
1272 constexpr bool isUInt16() const noexcept { return Support::isUInt16(i64()); }
1273 //! Tests whether the immediate can be casted to 32-bit signed integer.
1274 constexpr bool isInt32() const noexcept { return Support::isInt32(i64()); }
1275 //! Tests whether the immediate can be casted to 32-bit unsigned integer.
1276 constexpr bool isUInt32() const noexcept { return _data[kDataImmValueHi] == 0; }
1277
1278 //! Sets immediate value to 8-bit signed integer `val`.
1279 inline void setI8(int8_t val) noexcept { setI64(val); }
1280 //! Sets immediate value to 8-bit unsigned integer `val`.
1281 inline void setU8(uint8_t val) noexcept { setU64(val); }
1282 //! Sets immediate value to 16-bit signed integer `val`.
1283 inline void setI16(int16_t val) noexcept { setI64(val); }
1284 //! Sets immediate value to 16-bit unsigned integer `val`.
1285 inline void setU16(uint16_t val) noexcept { setU64(val); }
1286 //! Sets immediate value to 32-bit signed integer `val`.
1287 inline void setI32(int32_t val) noexcept { setI64(val); }
1288 //! Sets immediate value to 32-bit unsigned integer `val`.
1289 inline void setU32(uint32_t val) noexcept { setU64(val); }
1290 //! Sets immediate value to 64-bit signed integer `val`.
1291 inline void setI64(int64_t val) noexcept {
1292 _data[kDataImmValueHi] = uint32_t(uint64_t(val) >> 32);
1293 _data[kDataImmValueLo] = uint32_t(uint64_t(val) & 0xFFFFFFFFu);
1294 }
1295 //! Sets immediate value to 64-bit unsigned integer `val`.
1296 inline void setU64(uint64_t val) noexcept { setI64(int64_t(val)); }
1297 //! Sets immediate value to intptr_t `val`.
1298 inline void setIPtr(intptr_t val) noexcept { setI64(val); }
1299 //! Sets immediate value to uintptr_t `val`.
1300 inline void setUPtr(uintptr_t val) noexcept { setU64(val); }
1301
1302 //! Sets immediate value to `val`.
1303 template<typename T>
1304 inline void setValue(T val) noexcept { setI64(int64_t(Support::asNormalized(val))); }
1305
1306 inline void setDouble(double d) noexcept { setU64(Support::bitCast<uint64_t>(d)); }
1307
1308 //! \}
1309
1310 //! \name Utilities
1311 //! \{
1312
1313 //! Clones the immediate operand.
1314 constexpr Imm clone() const noexcept { return Imm(*this); }
1315
1316 inline void signExtend8Bits() noexcept { setI64(int64_t(i8())); }
1317 inline void signExtend16Bits() noexcept { setI64(int64_t(i16())); }
1318 inline void signExtend32Bits() noexcept { setI64(int64_t(i32())); }
1319
1320 inline void zeroExtend8Bits() noexcept { setU64(u8()); }
1321 inline void zeroExtend16Bits() noexcept { setU64(u16()); }
1322 inline void zeroExtend32Bits() noexcept { _data[kDataImmValueHi] = 0u; }
1323
1324 //! \}
1325 };
1326
1327 //! Creates a new immediate operand.
1328 //!
1329 //! Using `imm(x)` is much nicer than using `Imm(x)` as this is a template
1330 //! which can accept any integer including pointers and function pointers.
1331 template<typename T>
1332 static constexpr Imm imm(T val) noexcept {
1333 return Imm(std::is_signed<T>::value ? int64_t(val) : int64_t(uint64_t(val)));
1334 }
1335
1336 //! \}
1337
1338 ASMJIT_END_NAMESPACE
1339
1340 #endif // _ASMJIT_CORE_OPERAND_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/osutils.h"
8 #include "../core/support.h"
9
10 #if defined(_WIN32)
11 #include <atomic>
12 #elif defined(__APPLE__)
13 #include <mach/mach_time.h>
14 #else
15 #include <time.h>
16 #include <unistd.h>
17 #endif
18
19 ASMJIT_BEGIN_NAMESPACE
20
21 // ============================================================================
22 // [asmjit::OSUtils - GetTickCount]
23 // ============================================================================
24
25 uint32_t OSUtils::getTickCount() noexcept {
26 #if defined(_WIN32)
27 enum HiResStatus : uint32_t {
28 kHiResUnknown = 0,
29 kHiResAvailable = 1,
30 kHiResNotAvailable = 2
31 };
32
33 static std::atomic<uint32_t> _hiResStatus(kHiResUnknown);
34 static volatile double _hiResFreq(0);
35
36 uint32_t status = _hiResStatus.load();
37 LARGE_INTEGER now, qpf;
38
39 if (status != kHiResNotAvailable && ::QueryPerformanceCounter(&now)) {
40 double freq = _hiResFreq;
41 if (status == kHiResUnknown) {
42 // Detects the availability of high resolution counter.
43 if (::QueryPerformanceFrequency(&qpf)) {
44 freq = double(qpf.QuadPart) / 1000.0;
45 _hiResFreq = freq;
46 _hiResStatus.compare_exchange_strong(status, kHiResAvailable);
47 status = kHiResAvailable;
48 }
49 else {
50 // High resolution not available.
51 _hiResStatus.compare_exchange_strong(status, kHiResNotAvailable);
52 }
53 }
54
55 if (status == kHiResAvailable)
56 return uint32_t(uint64_t(int64_t(double(now.QuadPart) / freq)) & 0xFFFFFFFFu);
57 }
58
59 // Bail to `GetTickCount()` if we cannot use high resolution.
60 return ::GetTickCount();
61 #elif defined(__APPLE__)
62 // See Apple's QA1398.
63 static mach_timebase_info_data_t _machTime;
64
65 uint32_t denom = _machTime.denom;
66 if (ASMJIT_UNLIKELY(!denom)) {
67 if (mach_timebase_info(&_machTime) != KERN_SUCCESS || !(denom = _machTime.denom))
68 return 0;
69 }
70
71 // `mach_absolute_time()` returns nanoseconds, we want milliseconds.
72 uint64_t t = mach_absolute_time() / 1000000u;
73 t = (t * _machTime.numer) / _machTime.denom;
74 return uint32_t(t & 0xFFFFFFFFu);
75 #elif defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
76 struct timespec ts;
77 if (ASMJIT_UNLIKELY(clock_gettime(CLOCK_MONOTONIC, &ts) != 0))
78 return 0;
79
80 uint64_t t = (uint64_t(ts.tv_sec ) * 1000u) + (uint64_t(ts.tv_nsec) / 1000000u);
81 return uint32_t(t & 0xFFFFFFFFu);
82 #else
83 #pragma message("asmjit::OSUtils::getTickCount() doesn't have implementation for the target OS.")
84 return 0;
85 #endif
86 }
87
88 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_OSUTILS_H
7 #define _ASMJIT_CORE_OSUTILS_H
8
9 #include "../core/globals.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_support
14 //! \{
15
16 // ============================================================================
17 // [asmjit::OSUtils]
18 // ============================================================================
19
20 //! Operating system utilities.
21 namespace OSUtils {
22 //! Gets the current CPU tick count, used for benchmarking (1ms resolution).
23 ASMJIT_API uint32_t getTickCount() noexcept;
24 };
25
26 // ============================================================================
27 // [asmjit::Lock]
28 // ============================================================================
29
30 //! \cond INTERNAL
31
32 //! Lock.
33 //!
34 //! Lock is internal, it cannot be used outside of AsmJit, however, its internal
35 //! layout is exposed as it's used by some other public classes.
36 class Lock {
37 public:
38 ASMJIT_NONCOPYABLE(Lock)
39
40 #if defined(_WIN32)
41 #pragma pack(push, 8)
42 struct ASMJIT_MAY_ALIAS Handle {
43 void* DebugInfo;
44 long LockCount;
45 long RecursionCount;
46 void* OwningThread;
47 void* LockSemaphore;
48 unsigned long* SpinCount;
49 };
50 Handle _handle;
51 #pragma pack(pop)
52 #elif !defined(__EMSCRIPTEN__)
53 typedef pthread_mutex_t Handle;
54 Handle _handle;
55 #endif
56
57 inline Lock() noexcept;
58 inline ~Lock() noexcept;
59
60 inline void lock() noexcept;
61 inline void unlock() noexcept;
62 };
63
64 #ifdef ASMJIT_EXPORTS
65 #if defined(_WIN32)
66
67 // Win32 implementation.
68 static_assert(sizeof(Lock::Handle) == sizeof(CRITICAL_SECTION), "asmjit::Lock::Handle layout must match CRITICAL_SECTION");
69 static_assert(alignof(Lock::Handle) == alignof(CRITICAL_SECTION), "asmjit::Lock::Handle alignment must match CRITICAL_SECTION");
70
71 inline Lock::Lock() noexcept { InitializeCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
72 inline Lock::~Lock() noexcept { DeleteCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
73 inline void Lock::lock() noexcept { EnterCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
74 inline void Lock::unlock() noexcept { LeaveCriticalSection(reinterpret_cast<CRITICAL_SECTION*>(&_handle)); }
75
76 #elif !defined(__EMSCRIPTEN__)
77
78 // PThread implementation.
79 inline Lock::Lock() noexcept { pthread_mutex_init(&_handle, nullptr); }
80 inline Lock::~Lock() noexcept { pthread_mutex_destroy(&_handle); }
81 inline void Lock::lock() noexcept { pthread_mutex_lock(&_handle); }
82 inline void Lock::unlock() noexcept { pthread_mutex_unlock(&_handle); }
83
84 #else
85
86 // Dummy implementation - Emscripten or other unsupported platform.
87 inline Lock::Lock() noexcept {}
88 inline Lock::~Lock() noexcept {}
89 inline void Lock::lock() noexcept {}
90 inline void Lock::unlock() noexcept {}
91
92 #endif
93 #endif
94
95 //! \endcond
96
97 // ============================================================================
98 // [asmjit::LockGuard]
99 // ============================================================================
100
101 #ifdef ASMJIT_EXPORTS
102 //! \cond INTERNAL
103
104 //! Scoped lock.
105 struct LockGuard {
106 ASMJIT_NONCOPYABLE(LockGuard)
107
108 Lock& _target;
109
110 inline LockGuard(Lock& target) noexcept : _target(target) { _target.lock(); }
111 inline ~LockGuard() noexcept { _target.unlock(); }
112 };
113
114 //! \endcond
115 #endif
116
117 //! \}
118
119 ASMJIT_END_NAMESPACE
120
121 #endif // _ASMJIT_CORE_OSUTILS_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RAASSIGNMENT_P_H
7 #define _ASMJIT_CORE_RAASSIGNMENT_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/radefs_p.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \cond INTERNAL
17 //! \addtogroup asmjit_ra
18 //! \{
19
20 // ============================================================================
21 // [asmjit::RAAssignment]
22 // ============================================================================
23
24 class RAAssignment {
25 ASMJIT_NONCOPYABLE(RAAssignment)
26
27 public:
28 enum Ids : uint32_t {
29 kPhysNone = 0xFF,
30 kWorkNone = RAWorkReg::kIdNone
31 };
32
33 enum DirtyBit : uint32_t {
34 kClean = 0,
35 kDirty = 1
36 };
37
38 struct Layout {
39 inline void reset() noexcept {
40 physIndex.reset();
41 physCount.reset();
42 physTotal = 0;
43 workCount = 0;
44 workRegs = nullptr;
45 }
46
47 RARegIndex physIndex; //!< Index of architecture registers per group.
48 RARegCount physCount; //!< Count of architecture registers per group.
49 uint32_t physTotal; //!< Count of physical registers of all groups.
50 uint32_t workCount; //!< Count of work registers.
51 const RAWorkRegs* workRegs; //!< WorkRegs data (vector).
52 };
53
54 struct PhysToWorkMap {
55 static inline size_t sizeOf(uint32_t count) noexcept {
56 return sizeof(PhysToWorkMap) - sizeof(uint32_t) + size_t(count) * sizeof(uint32_t);
57 }
58
59 inline void reset(uint32_t count) noexcept {
60 assigned.reset();
61 dirty.reset();
62
63 for (uint32_t i = 0; i < count; i++)
64 workIds[i] = kWorkNone;
65 }
66
67 inline void copyFrom(const PhysToWorkMap* other, uint32_t count) noexcept {
68 size_t size = sizeOf(count);
69 memcpy(this, other, size);
70 }
71
72 RARegMask assigned; //!< Assigned registers (each bit represents one physical reg).
73 RARegMask dirty; //!< Dirty registers (spill slot out of sync or no spill slot).
74 uint32_t workIds[1 /* ... */]; //!< PhysReg to WorkReg mapping.
75 };
76
77 struct WorkToPhysMap {
78 static inline size_t sizeOf(uint32_t count) noexcept {
79 return size_t(count) * sizeof(uint8_t);
80 }
81
82 inline void reset(uint32_t count) noexcept {
83 for (uint32_t i = 0; i < count; i++)
84 physIds[i] = kPhysNone;
85 }
86
87 inline void copyFrom(const WorkToPhysMap* other, uint32_t count) noexcept {
88 size_t size = sizeOf(count);
89 if (ASMJIT_LIKELY(size))
90 memcpy(this, other, size);
91 }
92
93 uint8_t physIds[1 /* ... */]; //!< WorkReg to PhysReg mapping
94 };
95
96 //! Physical registers layout.
97 Layout _layout;
98 //! WorkReg to PhysReg mapping.
99 WorkToPhysMap* _workToPhysMap;
100 //! PhysReg to WorkReg mapping and assigned/dirty bits.
101 PhysToWorkMap* _physToWorkMap;
102 //! Optimization to translate PhysRegs to WorkRegs faster.
103 uint32_t* _physToWorkIds[BaseReg::kGroupVirt];
104
105 //! \name Construction & Destruction
106 //! \{
107
108 inline RAAssignment() noexcept {
109 _layout.reset();
110 resetMaps();
111 }
112
113 inline void initLayout(const RARegCount& physCount, const RAWorkRegs& workRegs) noexcept {
114 // Layout must be initialized before data.
115 ASMJIT_ASSERT(_physToWorkMap == nullptr);
116 ASMJIT_ASSERT(_workToPhysMap == nullptr);
117
118 _layout.physIndex.buildIndexes(physCount);
119 _layout.physCount = physCount;
120 _layout.physTotal = uint32_t(_layout.physIndex[BaseReg::kGroupVirt - 1]) +
121 uint32_t(_layout.physCount[BaseReg::kGroupVirt - 1]) ;
122 _layout.workCount = workRegs.size();
123 _layout.workRegs = &workRegs;
124 }
125
126 inline void initMaps(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
127 _physToWorkMap = physToWorkMap;
128 _workToPhysMap = workToPhysMap;
129 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
130 _physToWorkIds[group] = physToWorkMap->workIds + _layout.physIndex.get(group);
131 }
132
133 inline void resetMaps() noexcept {
134 _physToWorkMap = nullptr;
135 _workToPhysMap = nullptr;
136 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
137 _physToWorkIds[group] = nullptr;
138 }
139
140 //! \}
141
142 //! \name Accessors
143 //! \{
144
145 inline PhysToWorkMap* physToWorkMap() const noexcept { return _physToWorkMap; }
146 inline WorkToPhysMap* workToPhysMap() const noexcept { return _workToPhysMap; }
147
148 inline RARegMask& assigned() noexcept { return _physToWorkMap->assigned; }
149 inline const RARegMask& assigned() const noexcept { return _physToWorkMap->assigned; }
150 inline uint32_t assigned(uint32_t group) const noexcept { return _physToWorkMap->assigned[group]; }
151
152 inline RARegMask& dirty() noexcept { return _physToWorkMap->dirty; }
153 inline const RARegMask& dirty() const noexcept { return _physToWorkMap->dirty; }
154 inline uint32_t dirty(uint32_t group) const noexcept { return _physToWorkMap->dirty[group]; }
155
156 inline uint32_t workToPhysId(uint32_t group, uint32_t workId) const noexcept {
157 ASMJIT_UNUSED(group);
158 ASMJIT_ASSERT(workId != kWorkNone);
159 ASMJIT_ASSERT(workId < _layout.workCount);
160 return _workToPhysMap->physIds[workId];
161 }
162
163 inline uint32_t physToWorkId(uint32_t group, uint32_t physId) const noexcept {
164 ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
165 return _physToWorkIds[group][physId];
166 }
167
168 inline bool isPhysAssigned(uint32_t group, uint32_t physId) const noexcept {
169 ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
170 return Support::bitTest(_physToWorkMap->assigned[group], physId);
171 }
172
173 inline bool isPhysDirty(uint32_t group, uint32_t physId) const noexcept {
174 ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
175 return Support::bitTest(_physToWorkMap->dirty[group], physId);
176 }
177
178 //! \}
179
180 //! \name Assignment
181 //! \{
182
183 // These are low-level allocation helpers that are used to update the current
184 // mappings between physical and virt/work registers and also to update masks
185 // that represent allocated and dirty registers. These functions don't emit
186 // any code; they are only used to update and keep all mappings in sync.
187
188 //! Assign [VirtReg/WorkReg] to a physical register.
189 ASMJIT_INLINE void assign(uint32_t group, uint32_t workId, uint32_t physId, uint32_t dirty) noexcept {
190 ASMJIT_ASSERT(workToPhysId(group, workId) == kPhysNone);
191 ASMJIT_ASSERT(physToWorkId(group, physId) == kWorkNone);
192 ASMJIT_ASSERT(!isPhysAssigned(group, physId));
193 ASMJIT_ASSERT(!isPhysDirty(group, physId));
194
195 _workToPhysMap->physIds[workId] = uint8_t(physId);
196 _physToWorkIds[group][physId] = workId;
197
198 uint32_t regMask = Support::bitMask(physId);
199 _physToWorkMap->assigned[group] |= regMask;
200 _physToWorkMap->dirty[group] |= regMask & Support::bitMaskFromBool<uint32_t>(dirty);
201
202 verify();
203 }
204
205 //! Reassign [VirtReg/WorkReg] to `dstPhysId` from `srcPhysId`.
206 ASMJIT_INLINE void reassign(uint32_t group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
207 ASMJIT_ASSERT(dstPhysId != srcPhysId);
208 ASMJIT_ASSERT(workToPhysId(group, workId) == srcPhysId);
209 ASMJIT_ASSERT(physToWorkId(group, srcPhysId) == workId);
210 ASMJIT_ASSERT(isPhysAssigned(group, srcPhysId) == true);
211 ASMJIT_ASSERT(isPhysAssigned(group, dstPhysId) == false);
212
213 _workToPhysMap->physIds[workId] = uint8_t(dstPhysId);
214 _physToWorkIds[group][srcPhysId] = kWorkNone;
215 _physToWorkIds[group][dstPhysId] = workId;
216
217 uint32_t srcMask = Support::bitMask(srcPhysId);
218 uint32_t dstMask = Support::bitMask(dstPhysId);
219
220 uint32_t dirty = (_physToWorkMap->dirty[group] & srcMask) != 0;
221 uint32_t regMask = dstMask | srcMask;
222
223 _physToWorkMap->assigned[group] ^= regMask;
224 _physToWorkMap->dirty[group] ^= regMask & Support::bitMaskFromBool<uint32_t>(dirty);
225
226 verify();
227 }
228
229 ASMJIT_INLINE void swap(uint32_t group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
230 ASMJIT_ASSERT(aPhysId != bPhysId);
231 ASMJIT_ASSERT(workToPhysId(group, aWorkId) == aPhysId);
232 ASMJIT_ASSERT(workToPhysId(group, bWorkId) == bPhysId);
233 ASMJIT_ASSERT(physToWorkId(group, aPhysId) == aWorkId);
234 ASMJIT_ASSERT(physToWorkId(group, bPhysId) == bWorkId);
235 ASMJIT_ASSERT(isPhysAssigned(group, aPhysId));
236 ASMJIT_ASSERT(isPhysAssigned(group, bPhysId));
237
238 _workToPhysMap->physIds[aWorkId] = uint8_t(bPhysId);
239 _workToPhysMap->physIds[bWorkId] = uint8_t(aPhysId);
240 _physToWorkIds[group][aPhysId] = bWorkId;
241 _physToWorkIds[group][bPhysId] = aWorkId;
242
243 uint32_t aMask = Support::bitMask(aPhysId);
244 uint32_t bMask = Support::bitMask(bPhysId);
245
246 uint32_t flipMask = Support::bitMaskFromBool<uint32_t>(
247 ((_physToWorkMap->dirty[group] & aMask) != 0) ^
248 ((_physToWorkMap->dirty[group] & bMask) != 0));
249
250 uint32_t regMask = aMask | bMask;
251 _physToWorkMap->dirty[group] ^= regMask & flipMask;
252
253 verify();
254 }
255
256 //! Unassign [VirtReg/WorkReg] from a physical register.
257 ASMJIT_INLINE void unassign(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
258 ASMJIT_ASSERT(physId < Globals::kMaxPhysRegs);
259 ASMJIT_ASSERT(workToPhysId(group, workId) == physId);
260 ASMJIT_ASSERT(physToWorkId(group, physId) == workId);
261 ASMJIT_ASSERT(isPhysAssigned(group, physId));
262
263 _workToPhysMap->physIds[workId] = kPhysNone;
264 _physToWorkIds[group][physId] = kWorkNone;
265
266 uint32_t regMask = Support::bitMask(physId);
267 _physToWorkMap->assigned[group] &= ~regMask;
268 _physToWorkMap->dirty[group] &= ~regMask;
269
270 verify();
271 }
272
273 inline void makeClean(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
274 ASMJIT_UNUSED(workId);
275
276 uint32_t regMask = Support::bitMask(physId);
277 _physToWorkMap->dirty[group] &= ~regMask;
278 }
279
280 inline void makeDirty(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
281 ASMJIT_UNUSED(workId);
282
283 uint32_t regMask = Support::bitMask(physId);
284 _physToWorkMap->dirty[group] |= regMask;
285 }
286
287 //! \}
288
289 //! \name Utilities
290 //! \{
291
292 inline void swap(RAAssignment& other) noexcept {
293 std::swap(_workToPhysMap, other._workToPhysMap);
294 std::swap(_physToWorkMap, other._physToWorkMap);
295
296 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
297 std::swap(_physToWorkIds[group], other._physToWorkIds[group]);
298 }
299
300 inline void copyFrom(const PhysToWorkMap* physToWorkMap, const WorkToPhysMap* workToPhysMap) noexcept {
301 memcpy(_physToWorkMap, physToWorkMap, PhysToWorkMap::sizeOf(_layout.physTotal));
302 memcpy(_workToPhysMap, workToPhysMap, WorkToPhysMap::sizeOf(_layout.workCount));
303 }
304
305 inline void copyFrom(const RAAssignment& other) noexcept {
306 copyFrom(other.physToWorkMap(), other.workToPhysMap());
307 }
308
309 // Not really useful outside of debugging.
310 bool equals(const RAAssignment& other) const noexcept {
311 // Layout should always match.
312 if (_layout.physIndex != other._layout.physIndex ||
313 _layout.physCount != other._layout.physCount ||
314 _layout.physTotal != other._layout.physTotal ||
315 _layout.workCount != other._layout.workCount ||
316 _layout.workRegs != other._layout.workRegs)
317 return false;
318
319 uint32_t physTotal = _layout.physTotal;
320 uint32_t workCount = _layout.workCount;
321
322 for (uint32_t physId = 0; physId < physTotal; physId++) {
323 uint32_t thisWorkId = _physToWorkMap->workIds[physId];
324 uint32_t otherWorkId = other._physToWorkMap->workIds[physId];
325 if (thisWorkId != otherWorkId)
326 return false;
327 }
328
329 for (uint32_t workId = 0; workId < workCount; workId++) {
330 uint32_t thisPhysId = _workToPhysMap->physIds[workId];
331 uint32_t otherPhysId = other._workToPhysMap->physIds[workId];
332 if (thisPhysId != otherPhysId)
333 return false;
334 }
335
336 if (_physToWorkMap->assigned != other._physToWorkMap->assigned ||
337 _physToWorkMap->dirty != other._physToWorkMap->dirty )
338 return false;
339
340 return true;
341 }
342
343 #if defined(ASMJIT_BUILD_DEBUG)
344 ASMJIT_NOINLINE void verify() noexcept {
345 // Verify WorkToPhysMap.
346 {
347 for (uint32_t workId = 0; workId < _layout.workCount; workId++) {
348 uint32_t physId = _workToPhysMap->physIds[workId];
349 if (physId != kPhysNone) {
350 const RAWorkReg* workReg = _layout.workRegs->at(workId);
351 uint32_t group = workReg->group();
352 ASMJIT_ASSERT(_physToWorkIds[group][physId] == workId);
353 }
354 }
355 }
356
357 // Verify PhysToWorkMap.
358 {
359 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
360 uint32_t physCount = _layout.physCount[group];
361 for (uint32_t physId = 0; physId < physCount; physId++) {
362 uint32_t workId = _physToWorkIds[group][physId];
363 if (workId != kWorkNone) {
364 ASMJIT_ASSERT(_workToPhysMap->physIds[workId] == physId);
365 }
366 }
367 }
368 }
369 }
370 #else
371 inline void verify() noexcept {}
372 #endif
373
374 //! \}
375 };
376
377 //! \}
378 //! \endcond
379
380 ASMJIT_END_NAMESPACE
381
382 #endif // !ASMJIT_NO_COMPILER
383 #endif // _ASMJIT_CORE_RAASSIGNMENT_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RABUILDERS_P_H
7 #define _ASMJIT_CORE_RABUILDERS_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/rapass_p.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \cond INTERNAL
17 //! \addtogroup asmjit_ra
18 //! \{
19
20 // ============================================================================
21 // [asmjit::RACFGBuilder]
22 // ============================================================================
23
24 template<typename This>
25 class RACFGBuilder {
26 public:
27 RAPass* _pass;
28 BaseCompiler* _cc;
29 RABlock* _curBlock;
30 RABlock* _retBlock;
31
32 // NOTE: This is a bit hacky. There are some nodes which are processed twice
33 // (see `onBeforeCall()` and `onBeforeRet()`) as they can insert some nodes
34 // around them. Since we don't have any flags to mark these we just use their
35 // position that is [at that time] unassigned.
36 static constexpr uint32_t kNodePositionDidOnBefore = 0xFFFFFFFFu;
37
38 inline RACFGBuilder(RAPass* pass) noexcept
39 : _pass(pass),
40 _cc(pass->cc()),
41 _curBlock(nullptr),
42 _retBlock(nullptr) {}
43
44 inline BaseCompiler* cc() const noexcept { return _cc; }
45
46 Error run() noexcept {
47 #ifndef ASMJIT_NO_LOGGING
48 Logger* logger = _pass->debugLogger();
49 uint32_t flags = FormatOptions::kFlagPositions;
50 RABlock* lastPrintedBlock = nullptr;
51 StringTmp<512> sb;
52 #endif
53
54 ASMJIT_RA_LOG_FORMAT("[RAPass::BuildCFG]\n");
55
56 FuncNode* func = _pass->func();
57 BaseNode* node = nullptr;
58
59 // Create entry and exit blocks.
60 _retBlock = _pass->newBlockOrExistingAt(func->exitNode(), &node);
61 if (ASMJIT_UNLIKELY(!_retBlock))
62 return DebugUtils::errored(kErrorOutOfMemory);
63 ASMJIT_PROPAGATE(_pass->addExitBlock(_retBlock));
64
65 if (node != func) {
66 _curBlock = _pass->newBlock();
67 if (ASMJIT_UNLIKELY(!_curBlock))
68 return DebugUtils::errored(kErrorOutOfMemory);
69 }
70 else {
71 // Function that has no code at all.
72 _curBlock = _retBlock;
73 }
74
75 ASMJIT_PROPAGATE(_pass->addBlock(_curBlock));
76
77 RARegsStats blockRegStats;
78 blockRegStats.reset();
79 RAInstBuilder ib;
80
81 bool hasCode = false;
82 uint32_t exitLabelId = func->exitNode()->id();
83
84 ASMJIT_RA_LOG_COMPLEX({
85 flags |= logger->flags();
86
87 Logging::formatNode(sb, flags, cc(), func);
88 logger->logf(" %s\n", sb.data());
89
90 lastPrintedBlock = _curBlock;
91 logger->logf(" {#%u}\n", lastPrintedBlock->blockId());
92 });
93
94 node = func->next();
95 if (ASMJIT_UNLIKELY(!node))
96 return DebugUtils::errored(kErrorInvalidState);
97
98 _curBlock->setFirst(node);
99 _curBlock->setLast(node);
100
101 for (;;) {
102 BaseNode* next = node->next();
103 ASMJIT_ASSERT(node->position() == 0 || node->position() == kNodePositionDidOnBefore);
104
105 if (node->isInst()) {
106 if (ASMJIT_UNLIKELY(!_curBlock)) {
107 // If this code is unreachable then it has to be removed.
108 ASMJIT_RA_LOG_COMPLEX({
109 sb.clear();
110 Logging::formatNode(sb, flags, cc(), node);
111 logger->logf(" <Removed> %s\n", sb.data());
112 });
113 cc()->removeNode(node);
114 node = next;
115 continue;
116 }
117 else {
118 // Handle `InstNode`, `FuncCallNode`, and `FuncRetNode`. All of
119 // these share the `InstNode` interface and contain operands.
120 hasCode = true;
121
122 if (node->type() != BaseNode::kNodeInst) {
123 if (node->position() != kNodePositionDidOnBefore) {
124 // Call and Reg are complicated as they may insert some surrounding
125 // code around them. The simplest approach is to get the previous
126 // node, call the `onBefore()` handlers and then check whether
127 // anything changed and restart if so. By restart we mean that the
128 // current `node` would go back to the first possible inserted node
129 // by `onBeforeCall()` or `onBeforeRet()`.
130 BaseNode* prev = node->prev();
131 if (node->type() == BaseNode::kNodeFuncCall) {
132 ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeCall(node->as<FuncCallNode>()));
133 }
134 else if (node->type() == BaseNode::kNodeFuncRet) {
135 ASMJIT_PROPAGATE(static_cast<This*>(this)->onBeforeRet(node->as<FuncRetNode>()));
136 }
137
138 if (prev != node->prev()) {
139 // If this was the first node in the block and something was
140 // inserted before it then we have to update the first block.
141 if (_curBlock->first() == node)
142 _curBlock->setFirst(prev->next());
143
144 node->setPosition(kNodePositionDidOnBefore);
145 node = prev->next();
146
147 // `onBeforeCall()` and `onBeforeRet()` can only insert instructions.
148 ASMJIT_ASSERT(node->isInst());
149 }
150
151 // Necessary if something was inserted after `node`, but nothing before.
152 next = node->next();
153 }
154 else {
155 // Change the position back to its original value.
156 node->setPosition(0);
157 }
158 }
159
160 InstNode* inst = node->as<InstNode>();
161 ASMJIT_RA_LOG_COMPLEX({
162 sb.clear();
163 Logging::formatNode(sb, flags, cc(), node);
164 logger->logf(" %s\n", sb.data());
165 });
166
167 uint32_t controlType = BaseInst::kControlNone;
168 ib.reset();
169 ASMJIT_PROPAGATE(static_cast<This*>(this)->onInst(inst, controlType, ib));
170
171 if (node->type() != BaseNode::kNodeInst) {
172 if (node->type() == BaseNode::kNodeFuncCall) {
173 ASMJIT_PROPAGATE(static_cast<This*>(this)->onCall(inst->as<FuncCallNode>(), ib));
174 }
175 else if (node->type() == BaseNode::kNodeFuncRet) {
176 ASMJIT_PROPAGATE(static_cast<This*>(this)->onRet(inst->as<FuncRetNode>(), ib));
177 controlType = BaseInst::kControlReturn;
178 }
179 }
180
181 ASMJIT_PROPAGATE(_pass->assignRAInst(inst, _curBlock, ib));
182 blockRegStats.combineWith(ib._stats);
183
184 if (controlType != BaseInst::kControlNone) {
185 // Support for conditional and unconditional jumps.
186 if (controlType == BaseInst::kControlJump || controlType == BaseInst::kControlBranch) {
187 _curBlock->setLast(node);
188 _curBlock->addFlags(RABlock::kFlagHasTerminator);
189 _curBlock->makeConstructed(blockRegStats);
190
191 if (!(inst->instOptions() & BaseInst::kOptionUnfollow)) {
192 // Jmp/Jcc/Call/Loop/etc...
193 uint32_t opCount = inst->opCount();
194 const Operand* opArray = inst->operands();
195
196 // The last operand must be label (this supports also instructions
197 // like jecx in explicit form).
198 if (ASMJIT_UNLIKELY(opCount == 0 || !opArray[opCount - 1].isLabel()))
199 return DebugUtils::errored(kErrorInvalidState);
200
201 LabelNode* cbLabel;
202 ASMJIT_PROPAGATE(cc()->labelNodeOf(&cbLabel, opArray[opCount - 1].as<Label>()));
203
204 RABlock* targetBlock = _pass->newBlockOrExistingAt(cbLabel);
205 if (ASMJIT_UNLIKELY(!targetBlock))
206 return DebugUtils::errored(kErrorOutOfMemory);
207
208 ASMJIT_PROPAGATE(_curBlock->appendSuccessor(targetBlock));
209 }
210
211 if (controlType == BaseInst::kControlJump) {
212 // Unconditional jump makes the code after the jump unreachable,
213 // which will be removed instantly during the CFG construction;
214 // as we cannot allocate registers for instructions that are not
215 // part of any block. Of course we can leave these instructions
216 // as they are, however, that would only postpone the problem as
217 // assemblers can't encode instructions that use virtual registers.
218 _curBlock = nullptr;
219 }
220 else {
221 node = next;
222 if (ASMJIT_UNLIKELY(!node))
223 return DebugUtils::errored(kErrorInvalidState);
224
225 RABlock* consecutiveBlock;
226 if (node->type() == BaseNode::kNodeLabel) {
227 if (node->hasPassData()) {
228 consecutiveBlock = node->passData<RABlock>();
229 }
230 else {
231 consecutiveBlock = _pass->newBlock(node);
232 if (ASMJIT_UNLIKELY(!consecutiveBlock))
233 return DebugUtils::errored(kErrorOutOfMemory);
234 node->setPassData<RABlock>(consecutiveBlock);
235 }
236 }
237 else {
238 consecutiveBlock = _pass->newBlock(node);
239 if (ASMJIT_UNLIKELY(!consecutiveBlock))
240 return DebugUtils::errored(kErrorOutOfMemory);
241 }
242
243 _curBlock->addFlags(RABlock::kFlagHasConsecutive);
244 ASMJIT_PROPAGATE(_curBlock->prependSuccessor(consecutiveBlock));
245
246 _curBlock = consecutiveBlock;
247 hasCode = false;
248 blockRegStats.reset();
249
250 if (_curBlock->isConstructed())
251 break;
252 ASMJIT_PROPAGATE(_pass->addBlock(consecutiveBlock));
253
254 ASMJIT_RA_LOG_COMPLEX({
255 lastPrintedBlock = _curBlock;
256 logger->logf(" {#%u}\n", lastPrintedBlock->blockId());
257 });
258
259 continue;
260 }
261 }
262
263 if (controlType == BaseInst::kControlReturn) {
264 _curBlock->setLast(node);
265 _curBlock->makeConstructed(blockRegStats);
266 ASMJIT_PROPAGATE(_curBlock->appendSuccessor(_retBlock));
267
268 _curBlock = nullptr;
269 }
270 }
271 }
272 }
273 else if (node->type() == BaseNode::kNodeLabel) {
274 if (!_curBlock) {
275 // If the current code is unreachable the label makes it reachable again.
276 _curBlock = node->passData<RABlock>();
277 if (_curBlock) {
278 // If the label has a block assigned we can either continue with
279 // it or skip it if the block has been constructed already.
280 if (_curBlock->isConstructed())
281 break;
282 }
283 else {
284 // No block assigned, to create a new one, and assign it.
285 _curBlock = _pass->newBlock(node);
286 if (ASMJIT_UNLIKELY(!_curBlock))
287 return DebugUtils::errored(kErrorOutOfMemory);
288 node->setPassData<RABlock>(_curBlock);
289 }
290
291 hasCode = false;
292 blockRegStats.reset();
293 ASMJIT_PROPAGATE(_pass->addBlock(_curBlock));
294 }
295 else {
296 if (node->hasPassData()) {
297 RABlock* consecutive = node->passData<RABlock>();
298 if (_curBlock == consecutive) {
299 // The label currently processed is part of the current block. This
300 // is only possible for multiple labels that are right next to each
301 // other, or are separated by non-code nodes like directives and comments.
302 if (ASMJIT_UNLIKELY(hasCode))
303 return DebugUtils::errored(kErrorInvalidState);
304 }
305 else {
306 // Label makes the current block constructed. There is a chance that the
307 // Label is not used, but we don't know that at this point. In the worst
308 // case there would be two blocks next to each other, it's just fine.
309 ASMJIT_ASSERT(_curBlock->last() != node);
310 _curBlock->setLast(node->prev());
311 _curBlock->addFlags(RABlock::kFlagHasConsecutive);
312 _curBlock->makeConstructed(blockRegStats);
313
314 ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
315 ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
316
317 _curBlock = consecutive;
318 hasCode = false;
319 blockRegStats.reset();
320 }
321 }
322 else {
323 // First time we see this label.
324 if (hasCode) {
325 // Cannot continue the current block if it already contains some
326 // code. We need to create a new block and make it a successor.
327 ASMJIT_ASSERT(_curBlock->last() != node);
328 _curBlock->setLast(node->prev());
329 _curBlock->addFlags(RABlock::kFlagHasConsecutive);
330 _curBlock->makeConstructed(blockRegStats);
331
332 RABlock* consecutive = _pass->newBlock(node);
333 if (ASMJIT_UNLIKELY(!consecutive))
334 return DebugUtils::errored(kErrorOutOfMemory);
335
336 ASMJIT_PROPAGATE(_curBlock->appendSuccessor(consecutive));
337 ASMJIT_PROPAGATE(_pass->addBlock(consecutive));
338
339 _curBlock = consecutive;
340 hasCode = false;
341 blockRegStats.reset();
342 }
343
344 node->setPassData<RABlock>(_curBlock);
345 }
346 }
347
348 ASMJIT_RA_LOG_COMPLEX({
349 if (_curBlock && _curBlock != lastPrintedBlock) {
350 lastPrintedBlock = _curBlock;
351 logger->logf(" {#%u}\n", lastPrintedBlock->blockId());
352 }
353
354 sb.clear();
355 Logging::formatNode(sb, flags, cc(), node);
356 logger->logf(" %s\n", sb.data());
357 });
358
359 // Unlikely: Assume that the exit label is reached only once per function.
360 if (ASMJIT_UNLIKELY(node->as<LabelNode>()->id() == exitLabelId)) {
361 _curBlock->setLast(node);
362 _curBlock->makeConstructed(blockRegStats);
363 ASMJIT_PROPAGATE(_pass->addExitBlock(_curBlock));
364
365 _curBlock = nullptr;
366 }
367 }
368 else {
369 ASMJIT_RA_LOG_COMPLEX({
370 sb.clear();
371 Logging::formatNode(sb, flags, cc(), node);
372 logger->logf(" %s\n", sb.data());
373 });
374
375 if (node->type() == BaseNode::kNodeSentinel) {
376 if (node == func->endNode()) {
377 // Make sure we didn't flow here if this is the end of the function sentinel.
378 if (ASMJIT_UNLIKELY(_curBlock))
379 return DebugUtils::errored(kErrorInvalidState);
380 break;
381 }
382 }
383 else if (node->type() == BaseNode::kNodeFunc) {
384 // RAPass can only compile a single function at a time. If we
385 // encountered a function it must be the current one, bail if not.
386 if (ASMJIT_UNLIKELY(node != func))
387 return DebugUtils::errored(kErrorInvalidState);
388 // PASS if this is the first node.
389 }
390 else {
391 // PASS if this is a non-interesting or unknown node.
392 }
393 }
394
395 // Advance to the next node.
396 node = next;
397
398 // NOTE: We cannot encounter a NULL node, because every function must be
399 // terminated by a sentinel (`stop`) node. If we encountered a NULL node it
400 // means that something went wrong and this node list is corrupted; bail in
401 // such case.
402 if (ASMJIT_UNLIKELY(!node))
403 return DebugUtils::errored(kErrorInvalidState);
404 }
405
406 if (_pass->hasDanglingBlocks())
407 return DebugUtils::errored(kErrorInvalidState);
408
409 return kErrorOk;
410 }
411 };
412
413 //! \}
414 //! \endcond
415
416 ASMJIT_END_NAMESPACE
417
418 #endif // !ASMJIT_NO_COMPILER
419 #endif // _ASMJIT_CORE_RABUILDERS_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RADEFS_P_H
7 #define _ASMJIT_CORE_RADEFS_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/compiler.h"
13 #include "../core/logging.h"
14 #include "../core/support.h"
15 #include "../core/zone.h"
16 #include "../core/zonevector.h"
17
18 ASMJIT_BEGIN_NAMESPACE
19
20 //! \cond INTERNAL
21 //! \addtogroup asmjit_ra
22 //! \{
23
24 // ============================================================================
25 // [Logging]
26 // ============================================================================
27
28 #ifndef ASMJIT_NO_LOGGING
29 # define ASMJIT_RA_LOG_FORMAT(...) \
30 do { \
31 if (logger) \
32 logger->logf(__VA_ARGS__); \
33 } while (0)
34 # define ASMJIT_RA_LOG_COMPLEX(...) \
35 do { \
36 if (logger) { \
37 __VA_ARGS__ \
38 } \
39 } while (0)
40 #else
41 # define ASMJIT_RA_LOG_FORMAT(...) ((void)0)
42 # define ASMJIT_RA_LOG_COMPLEX(...) ((void)0)
43 #endif
44
45 // ============================================================================
46 // [Forward Declarations]
47 // ============================================================================
48
49 class RAPass;
50 class RABlock;
51 struct RAStackSlot;
52
53 typedef ZoneVector<RABlock*> RABlocks;
54 typedef ZoneVector<RAWorkReg*> RAWorkRegs;
55
56 // ============================================================================
57 // [asmjit::RAStrategy]
58 // ============================================================================
59
60 struct RAStrategy {
61 uint8_t _type;
62
63 enum StrategyType : uint32_t {
64 kStrategySimple = 0,
65 kStrategyComplex = 1
66 };
67
68 inline RAStrategy() noexcept { reset(); }
69 inline void reset() noexcept { memset(this, 0, sizeof(*this)); }
70
71 inline uint32_t type() const noexcept { return _type; }
72 inline void setType(uint32_t type) noexcept { _type = uint8_t(type); }
73
74 inline bool isSimple() const noexcept { return _type == kStrategySimple; }
75 inline bool isComplex() const noexcept { return _type >= kStrategyComplex; }
76 };
77
78 // ============================================================================
79 // [asmjit::RAArchTraits]
80 // ============================================================================
81
82 //! Traits.
83 struct RAArchTraits {
84 enum Flags : uint32_t {
85 //! Registers can be swapped by a single instruction.
86 kHasSwap = 0x01u
87 };
88
89 uint8_t _flags[BaseReg::kGroupVirt];
90
91 //! \name Construction & Destruction
92 //! \{
93
94 inline RAArchTraits() noexcept { reset(); }
95 inline void reset() noexcept { memset(_flags, 0, sizeof(_flags)); }
96
97 //! \}
98
99 //! \name Accessors
100 //! \{
101
102 inline bool hasFlag(uint32_t group, uint32_t flag) const noexcept { return (_flags[group] & flag) != 0; }
103 inline bool hasSwap(uint32_t group) const noexcept { return hasFlag(group, kHasSwap); }
104
105 inline uint8_t& operator[](uint32_t group) noexcept {
106 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
107 return _flags[group];
108 }
109
110 inline const uint8_t& operator[](uint32_t group) const noexcept {
111 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
112 return _flags[group];
113 }
114
115 //! \}
116 };
117
118 // ============================================================================
119 // [asmjit::RARegCount]
120 // ============================================================================
121
122 //! Count of virtual or physical registers per group.
123 //!
124 //! \note This class uses 8-bit integers to represent counters, it's only used
125 //! in places where this is sufficient - for example total count of machine's
126 //! physical registers, count of virtual registers per instruction, etc. There
127 //! is also `RALiveCount`, which uses 32-bit integers and is indeed much safer.
128 struct RARegCount {
129 union {
130 uint8_t _regs[4];
131 uint32_t _packed;
132 };
133
134 //! \name Construction & Destruction
135 //! \{
136
137 //! Resets all counters to zero.
138 inline void reset() noexcept { _packed = 0; }
139
140 //! \}
141
142 //! \name Overloaded Operators
143 //! \{
144
145 inline uint8_t& operator[](uint32_t index) noexcept {
146 ASMJIT_ASSERT(index < BaseReg::kGroupVirt);
147 return _regs[index];
148 }
149
150 inline const uint8_t& operator[](uint32_t index) const noexcept {
151 ASMJIT_ASSERT(index < BaseReg::kGroupVirt);
152 return _regs[index];
153 }
154
155 inline RARegCount& operator=(const RARegCount& other) noexcept = default;
156
157 inline bool operator==(const RARegCount& other) const noexcept { return _packed == other._packed; }
158 inline bool operator!=(const RARegCount& other) const noexcept { return _packed != other._packed; }
159
160 //! \}
161
162 //! \name Utilities
163 //! \{
164
165 //! Returns the count of registers by the given register `group`.
166 inline uint32_t get(uint32_t group) const noexcept {
167 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
168
169 uint32_t shift = Support::byteShiftOfDWordStruct(group);
170 return (_packed >> shift) & uint32_t(0xFF);
171 }
172
173 //! Sets the register count by a register `group`.
174 inline void set(uint32_t group, uint32_t n) noexcept {
175 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
176 ASMJIT_ASSERT(n <= 0xFF);
177
178 uint32_t shift = Support::byteShiftOfDWordStruct(group);
179 _packed = (_packed & ~uint32_t(0xFF << shift)) + (n << shift);
180 }
181
182 //! Adds the register count by a register `group`.
183 inline void add(uint32_t group, uint32_t n = 1) noexcept {
184 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
185 ASMJIT_ASSERT(0xFF - uint32_t(_regs[group]) >= n);
186
187 uint32_t shift = Support::byteShiftOfDWordStruct(group);
188 _packed += n << shift;
189 }
190
191 //! \}
192 };
193
194 // ============================================================================
195 // [asmjit::RARegIndex]
196 // ============================================================================
197
198 struct RARegIndex : public RARegCount {
199 //! Build register indexes based on the given `count` of registers.
200 inline void buildIndexes(const RARegCount& count) noexcept {
201 uint32_t x = uint32_t(count._regs[0]);
202 uint32_t y = uint32_t(count._regs[1]) + x;
203 uint32_t z = uint32_t(count._regs[2]) + y;
204
205 ASMJIT_ASSERT(y <= 0xFF);
206 ASMJIT_ASSERT(z <= 0xFF);
207 _packed = Support::bytepack32_4x8(0, x, y, z);
208 }
209 };
210
211 // ============================================================================
212 // [asmjit::RARegMask]
213 // ============================================================================
214
215 //! Registers mask.
216 struct RARegMask {
217 uint32_t _masks[BaseReg::kGroupVirt];
218
219 //! \name Construction & Destruction
220 //! \{
221
222 inline void init(const RARegMask& other) noexcept {
223 for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++)
224 _masks[i] = other._masks[i];
225 }
226
227 //! Reset all register masks to zero.
228 inline void reset() noexcept {
229 for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++)
230 _masks[i] = 0;
231 }
232
233 //! \}
234
235 //! \name Overloaded Operators
236 //! \{
237
238 inline RARegMask& operator=(const RARegMask& other) noexcept = default;
239
240 inline bool operator==(const RARegMask& other) const noexcept {
241 return _masks[0] == other._masks[0] &&
242 _masks[1] == other._masks[1] &&
243 _masks[2] == other._masks[2] &&
244 _masks[3] == other._masks[3] ;
245 }
246
247 inline bool operator!=(const RARegMask& other) const noexcept {
248 return !operator==(other);
249 }
250
251 inline uint32_t& operator[](uint32_t index) noexcept {
252 ASMJIT_ASSERT(index < BaseReg::kGroupVirt);
253 return _masks[index];
254 }
255
256 inline const uint32_t& operator[](uint32_t index) const noexcept {
257 ASMJIT_ASSERT(index < BaseReg::kGroupVirt);
258 return _masks[index];
259 }
260
261 //! \}
262
263 //! \name Utilities
264 //! \{
265
266 //! Tests whether all register masks are zero (empty).
267 inline bool empty() const noexcept {
268 uint32_t m = 0;
269 for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++)
270 m |= _masks[i];
271 return m == 0;
272 }
273
274 inline bool has(uint32_t group, uint32_t mask = 0xFFFFFFFFu) const noexcept {
275 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
276 return (_masks[group] & mask) != 0;
277 }
278
279 template<class Operator>
280 inline void op(const RARegMask& other) noexcept {
281 for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++)
282 _masks[i] = Operator::op(_masks[i], other._masks[i]);
283 }
284
285 template<class Operator>
286 inline void op(uint32_t group, uint32_t input) noexcept {
287 _masks[group] = Operator::op(_masks[group], input);
288 }
289
290 //! \}
291 };
292
293 // ============================================================================
294 // [asmjit::RARegsStats]
295 // ============================================================================
296
297 //! Information associated with each instruction, propagated to blocks, loops,
298 //! and the whole function. This information can be used to do minor decisions
299 //! before the register allocator tries to do its job. For example to use fast
300 //! register allocation inside a block or loop it cannot have clobbered and/or
301 //! fixed registers, etc...
302 struct RARegsStats {
303 uint32_t _packed;
304
305 enum Index : uint32_t {
306 kIndexUsed = 0,
307 kIndexFixed = 8,
308 kIndexClobbered = 16
309 };
310
311 enum Mask : uint32_t {
312 kMaskUsed = 0xFFu << kIndexUsed,
313 kMaskFixed = 0xFFu << kIndexFixed,
314 kMaskClobbered = 0xFFu << kIndexClobbered
315 };
316
317 inline void reset() noexcept { _packed = 0; }
318 inline void combineWith(const RARegsStats& other) noexcept { _packed |= other._packed; }
319
320 inline bool hasUsed() const noexcept { return (_packed & kMaskUsed) != 0u; }
321 inline bool hasUsed(uint32_t group) const noexcept { return (_packed & Support::bitMask(kIndexUsed + group)) != 0u; }
322 inline void makeUsed(uint32_t group) noexcept { _packed |= Support::bitMask(kIndexUsed + group); }
323
324 inline bool hasFixed() const noexcept { return (_packed & kMaskFixed) != 0u; }
325 inline bool hasFixed(uint32_t group) const noexcept { return (_packed & Support::bitMask(kIndexFixed + group)) != 0u; }
326 inline void makeFixed(uint32_t group) noexcept { _packed |= Support::bitMask(kIndexFixed + group); }
327
328 inline bool hasClobbered() const noexcept { return (_packed & kMaskClobbered) != 0u; }
329 inline bool hasClobbered(uint32_t group) const noexcept { return (_packed & Support::bitMask(kIndexClobbered + group)) != 0u; }
330 inline void makeClobbered(uint32_t group) noexcept { _packed |= Support::bitMask(kIndexClobbered + group); }
331 };
332
333 // ============================================================================
334 // [asmjit::RALiveCount]
335 // ============================================================================
336
337 //! Count of live registers, per group.
338 class RALiveCount {
339 public:
340 uint32_t n[BaseReg::kGroupVirt];
341
342 //! \name Construction & Destruction
343 //! \{
344
345 inline RALiveCount() noexcept { reset(); }
346 inline RALiveCount(const RALiveCount& other) noexcept = default;
347
348 inline void init(const RALiveCount& other) noexcept {
349 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
350 n[group] = other.n[group];
351 }
352
353 inline void reset() noexcept {
354 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
355 n[group] = 0;
356 }
357
358 //! \}
359
360 //! \name Overloaded Operators
361 //! \{
362
363 inline RALiveCount& operator=(const RALiveCount& other) noexcept = default;
364
365 inline uint32_t& operator[](uint32_t group) noexcept { return n[group]; }
366 inline const uint32_t& operator[](uint32_t group) const noexcept { return n[group]; }
367
368 //! \}
369
370 //! \name Utilities
371 //! \{
372
373 template<class Operator>
374 inline void op(const RALiveCount& other) noexcept {
375 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
376 n[group] = Operator::op(n[group], other.n[group]);
377 }
378
379 //! \}
380 };
381
382 // ============================================================================
383 // [asmjit::LiveInterval]
384 // ============================================================================
385
386 struct LiveInterval {
387 uint32_t a, b;
388
389 enum Misc : uint32_t {
390 kNaN = 0,
391 kInf = 0xFFFFFFFFu
392 };
393
394 //! \name Construction & Destruction
395 //! \{
396
397 inline LiveInterval() noexcept : a(0), b(0) {}
398 inline LiveInterval(uint32_t a, uint32_t b) noexcept : a(a), b(b) {}
399 inline LiveInterval(const LiveInterval& other) noexcept : a(other.a), b(other.b) {}
400
401 inline void init(uint32_t aVal, uint32_t bVal) noexcept {
402 a = aVal;
403 b = bVal;
404 }
405 inline void init(const LiveInterval& other) noexcept { init(other.a, other.b); }
406 inline void reset() noexcept { init(0, 0); }
407
408 //! \}
409
410 //! \name Overloaded Operators
411 //! \{
412
413 inline LiveInterval& operator=(const LiveInterval& other) = default;
414
415 //! \}
416
417 //! \name Accessors
418 //! \{
419
420 inline bool isValid() const noexcept { return a < b; }
421 inline uint32_t width() const noexcept { return b - a; }
422
423 //! \}
424 };
425
426 // ============================================================================
427 // [asmjit::RALiveSpan<T>]
428 // ============================================================================
429
430 template<typename T>
431 class RALiveSpan : public LiveInterval, public T {
432 public:
433 typedef T DataType;
434
435 //! \name Construction & Destruction
436 //! \{
437
438 inline RALiveSpan() noexcept : LiveInterval(), T() {}
439 inline RALiveSpan(const RALiveSpan<T>& other) noexcept : LiveInterval(other), T() {}
440 inline RALiveSpan(const LiveInterval& interval, const T& data) noexcept : LiveInterval(interval), T(data) {}
441 inline RALiveSpan(uint32_t a, uint32_t b) noexcept : LiveInterval(a, b), T() {}
442 inline RALiveSpan(uint32_t a, uint32_t b, const T& data) noexcept : LiveInterval(a, b), T(data) {}
443
444 inline void init(const RALiveSpan<T>& other) noexcept {
445 LiveInterval::init(static_cast<const LiveInterval&>(other));
446 T::init(static_cast<const T&>(other));
447 }
448
449 inline void init(const RALiveSpan<T>& span, const T& data) noexcept {
450 LiveInterval::init(static_cast<const LiveInterval&>(span));
451 T::init(data);
452 }
453
454 inline void init(const LiveInterval& interval, const T& data) noexcept {
455 LiveInterval::init(interval);
456 T::init(data);
457 }
458
459 //! \}
460
461 //! \name Overloaded Operators
462 //! \{
463
464 inline RALiveSpan& operator=(const RALiveSpan& other) {
465 init(other);
466 return *this;
467 }
468
469 //! \}
470 };
471
472 // ============================================================================
473 // [asmjit::RALiveSpans<T>]
474 // ============================================================================
475
476 template<typename T>
477 class RALiveSpans {
478 public:
479 ASMJIT_NONCOPYABLE(RALiveSpans<T>)
480
481 typedef typename T::DataType DataType;
482 ZoneVector<T> _data;
483
484 //! \name Construction & Destruction
485 //! \{
486
487 inline RALiveSpans() noexcept : _data() {}
488
489 inline void reset() noexcept { _data.reset(); }
490 inline void release(ZoneAllocator* allocator) noexcept { _data.release(allocator); }
491
492 //! \}
493
494 //! \name Accessors
495 //! \{
496
497 inline bool empty() const noexcept { return _data.empty(); }
498 inline uint32_t size() const noexcept { return _data.size(); }
499
500 inline T* data() noexcept { return _data.data(); }
501 inline const T* data() const noexcept { return _data.data(); }
502
503 inline bool isOpen() const noexcept {
504 uint32_t size = _data.size();
505 return size > 0 && _data[size - 1].b == LiveInterval::kInf;
506 }
507
508 //! \}
509
510 //! \name Utilities
511 //! \{
512
513 inline void swap(RALiveSpans<T>& other) noexcept { _data.swap(other._data); }
514
515 //! Open the current live span.
516 ASMJIT_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end) noexcept {
517 bool wasOpen;
518 return openAt(allocator, start, end, wasOpen);
519 }
520
521 ASMJIT_INLINE Error openAt(ZoneAllocator* allocator, uint32_t start, uint32_t end, bool& wasOpen) noexcept {
522 uint32_t size = _data.size();
523 wasOpen = false;
524
525 if (size > 0) {
526 T& last = _data[size - 1];
527 if (last.b >= start) {
528 wasOpen = last.b > start;
529 last.b = end;
530 return kErrorOk;
531 }
532 }
533
534 return _data.append(allocator, T(start, end));
535 }
536
537 inline void closeAt(uint32_t end) noexcept {
538 ASMJIT_ASSERT(!empty());
539
540 uint32_t size = _data.size();
541 _data[size - 1].b = end;
542 }
543
544 //! Returns the sum of width of all spans.
545 //!
546 //! \note Don't overuse, this iterates over all spans so it's O(N).
547 //! It should be only called once and then cached.
548 ASMJIT_INLINE uint32_t width() const noexcept {
549 uint32_t width = 0;
550 for (const T& span : _data)
551 width += span.width();
552 return width;
553 }
554
555 inline T& operator[](uint32_t index) noexcept { return _data[index]; }
556 inline const T& operator[](uint32_t index) const noexcept { return _data[index]; }
557
558 inline bool intersects(const RALiveSpans<T>& other) const noexcept {
559 return intersects(*this, other);
560 }
561
562 ASMJIT_INLINE Error nonOverlappingUnionOf(ZoneAllocator* allocator, const RALiveSpans<T>& x, const RALiveSpans<T>& y, const DataType& yData) noexcept {
563 uint32_t finalSize = x.size() + y.size();
564 ASMJIT_PROPAGATE(_data.reserve(allocator, finalSize));
565
566 T* dstPtr = _data.data();
567 const T* xSpan = x.data();
568 const T* ySpan = y.data();
569
570 const T* xEnd = xSpan + x.size();
571 const T* yEnd = ySpan + y.size();
572
573 // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`,
574 // which means that there is no intersection. We advance either `xSpan` or `ySpan`
575 // depending on their ranges.
576 if (xSpan != xEnd && ySpan != yEnd) {
577 uint32_t xa, ya;
578 xa = xSpan->a;
579 for (;;) {
580 while (ySpan->b <= xa) {
581 dstPtr->init(*ySpan, yData);
582 dstPtr++;
583 if (++ySpan == yEnd)
584 goto Done;
585 }
586
587 ya = ySpan->a;
588 while (xSpan->b <= ya) {
589 *dstPtr++ = *xSpan;
590 if (++xSpan == xEnd)
591 goto Done;
592 }
593
594 // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
595 xa = xSpan->a;
596 if (ySpan->b > xa)
597 return 0xFFFFFFFFu;
598 }
599 }
600
601 Done:
602 while (xSpan != xEnd) {
603 *dstPtr++ = *xSpan++;
604 }
605
606 while (ySpan != yEnd) {
607 dstPtr->init(*ySpan, yData);
608 dstPtr++;
609 ySpan++;
610 }
611
612 _data._setEndPtr(dstPtr);
613 return kErrorOk;
614 }
615
616 static ASMJIT_INLINE bool intersects(const RALiveSpans<T>& x, const RALiveSpans<T>& y) noexcept {
617 const T* xSpan = x.data();
618 const T* ySpan = y.data();
619
620 const T* xEnd = xSpan + x.size();
621 const T* yEnd = ySpan + y.size();
622
623 // Loop until we have intersection or either `xSpan == xEnd` or `ySpan == yEnd`,
624 // which means that there is no intersection. We advance either `xSpan` or `ySpan`
625 // depending on their end positions.
626 if (xSpan == xEnd || ySpan == yEnd)
627 return false;
628
629 uint32_t xa, ya;
630 xa = xSpan->a;
631
632 for (;;) {
633 while (ySpan->b <= xa)
634 if (++ySpan == yEnd)
635 return false;
636
637 ya = ySpan->a;
638 while (xSpan->b <= ya)
639 if (++xSpan == xEnd)
640 return false;
641
642 // We know that `xSpan->b > ySpan->a`, so check if `ySpan->b > xSpan->a`.
643 xa = xSpan->a;
644 if (ySpan->b > xa)
645 return true;
646 }
647 }
648
649 //! \}
650 };
651
652 // ============================================================================
653 // [asmjit::RALiveStats]
654 // ============================================================================
655
656 //! Statistics about a register liveness.
657 class RALiveStats {
658 public:
659 uint32_t _width;
660 float _freq;
661 float _priority;
662
663 //! \name Construction & Destruction
664 //! \{
665
666 inline RALiveStats()
667 : _width(0),
668 _freq(0.0f),
669 _priority(0.0f) {}
670
671 //! \}
672
673 //! \name Accessors
674 //! \{
675
676 inline uint32_t width() const noexcept { return _width; }
677 inline float freq() const noexcept { return _freq; }
678 inline float priority() const noexcept { return _priority; }
679
680 //! \}
681 };
682
683 // ============================================================================
684 // [asmjit::LiveRegData]
685 // ============================================================================
686
687 struct LiveRegData {
688 uint32_t id;
689
690 inline explicit LiveRegData(uint32_t id = BaseReg::kIdBad) noexcept : id(id) {}
691 inline LiveRegData(const LiveRegData& other) noexcept : id(other.id) {}
692
693 inline void init(const LiveRegData& other) noexcept { id = other.id; }
694
695 inline bool operator==(const LiveRegData& other) const noexcept { return id == other.id; }
696 inline bool operator!=(const LiveRegData& other) const noexcept { return id != other.id; }
697 };
698
699 typedef RALiveSpan<LiveRegData> LiveRegSpan;
700 typedef RALiveSpans<LiveRegSpan> LiveRegSpans;
701
702 // ============================================================================
703 // [asmjit::RATiedReg]
704 // ============================================================================
705
706 //! Tied register merges one ore more register operand into a single entity. It
707 //! contains information about its access (Read|Write) and allocation slots
708 //! (Use|Out) that are used by the register allocator and liveness analysis.
709 struct RATiedReg {
710 //! WorkReg id.
711 uint32_t _workId;
712 //! Allocation flags.
713 uint32_t _flags;
714 //! Registers where input {R|X} can be allocated to.
715 uint32_t _allocableRegs;
716 //! Indexes used to rewrite USE regs.
717 uint32_t _useRewriteMask;
718 //! Indexes used to rewrite OUT regs.
719 uint32_t _outRewriteMask;
720
721 union {
722 struct {
723 //! How many times the VirtReg is referenced in all operands.
724 uint8_t _refCount;
725 //! Physical register for use operation (ReadOnly / ReadWrite).
726 uint8_t _useId;
727 //! Physical register for out operation (WriteOnly).
728 uint8_t _outId;
729 //! Reserved for future use (padding).
730 uint8_t _rmSize;
731 };
732 //! Packed data.
733 uint32_t _packed;
734 };
735
736 //! Flags.
737 //!
738 //! Register access information is encoded in 4 flags in total:
739 //!
740 //! - `kRead` - Register is Read (ReadWrite if combined with `kWrite`).
741 //! - `kWrite` - Register is Written (ReadWrite if combined with `kRead`).
742 //! - `kUse` - Encoded as Read or ReadWrite.
743 //! - `kOut` - Encoded as WriteOnly.
744 //!
745 //! Let's describe all of these on two X86 instructions:
746 //!
747 //! - ADD x{R|W|Use}, x{R|Use} -> {x:R|W|Use }
748 //! - LEA x{ W|Out}, [x{R|Use} + x{R|Out}] -> {x:R|W|Use|Out }
749 //! - ADD x{R|W|Use}, y{R|Use} -> {x:R|W|Use y:R|Use}
750 //! - LEA x{ W|Out}, [x{R|Use} + y{R|Out}] -> {x:R|W|Use|Out y:R|Use}
751 //!
752 //! It should be obvious from the example above how these flags get created.
753 //! Each operand contains READ/WRITE information, which is then merged to
754 //! RATiedReg's flags. However, we also need to represent the possitility to
755 //! use see the operation as two independent operations - USE and OUT, because
756 //! the register allocator will first allocate USE registers, and then assign
757 //! OUT registers independently of USE registers.
758 enum Flags : uint32_t {
759 kRead = OpRWInfo::kRead, //!< Register is read.
760 kWrite = OpRWInfo::kWrite, //!< Register is written.
761 kRW = OpRWInfo::kRW, //!< Register both read and written.
762
763 kUse = 0x00000100u, //!< Register has a USE slot (read/rw).
764 kOut = 0x00000200u, //!< Register has an OUT slot (write-only).
765 kUseRM = 0x00000400u, //!< Register in USE slot can be patched to memory.
766 kOutRM = 0x00000800u, //!< Register in OUT slot can be patched to memory.
767
768 kUseFixed = 0x00001000u, //!< Register has a fixed USE slot.
769 kOutFixed = 0x00002000u, //!< Register has a fixed OUT slot.
770 kUseDone = 0x00004000u, //!< Register USE slot has been allocated.
771 kOutDone = 0x00008000u, //!< Register OUT slot has been allocated.
772
773 kDuplicate = 0x00010000u, //!< Register must be duplicated (function call only).
774 kLast = 0x00020000u, //!< Last occurrence of this VirtReg in basic block.
775 kKill = 0x00040000u, //!< Kill this VirtReg after use.
776
777 // Architecture specific flags are used during RATiedReg building to ensure
778 // that architecture-specific constraints are handled properly. These flags
779 // are not really needed after RATiedReg[] is built and copied to `RAInst`.
780
781 kX86Gpb = 0x01000000u //!< This RATiedReg references GPB-LO or GPB-HI.
782 };
783
784 static_assert(kRead == 0x1, "RATiedReg::kRead flag must be 0x1");
785 static_assert(kWrite == 0x2, "RATiedReg::kWrite flag must be 0x2");
786 static_assert(kRW == 0x3, "RATiedReg::kRW combination must be 0x3");
787
788 //! \name Construction & Destruction
789 //! \{
790
791 ASMJIT_INLINE void init(uint32_t workId, uint32_t flags, uint32_t allocableRegs, uint32_t useId, uint32_t useRewriteMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0) noexcept {
792 _workId = workId;
793 _flags = flags;
794 _allocableRegs = allocableRegs;
795 _useRewriteMask = useRewriteMask;
796 _outRewriteMask = outRewriteMask;
797 _refCount = 1;
798 _useId = uint8_t(useId);
799 _outId = uint8_t(outId);
800 _rmSize = uint8_t(rmSize);
801 }
802
803 //! \}
804
805 //! \name Overloaded Operators
806 //! \{
807
808 inline RATiedReg& operator=(const RATiedReg& other) noexcept = default;
809
810 //! \}
811
812 //! \name Accessors
813 //! \{
814
815 //! Returns the associated WorkReg id.
816 inline uint32_t workId() const noexcept { return _workId; }
817
818 //! Checks if the given `flag` is set, see `Flags`.
819 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
820
821 //! Returns TiedReg flags, see `RATiedReg::Flags`.
822 inline uint32_t flags() const noexcept { return _flags; }
823 //! Adds tied register flags, see `Flags`.
824 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
825
826 //! Tests whether the register is read (writes `true` also if it's Read/Write).
827 inline bool isRead() const noexcept { return hasFlag(kRead); }
828 //! Tests whether the register is written (writes `true` also if it's Read/Write).
829 inline bool isWrite() const noexcept { return hasFlag(kWrite); }
830 //! Tests whether the register is read only.
831 inline bool isReadOnly() const noexcept { return (_flags & kRW) == kRead; }
832 //! Tests whether the register is write only.
833 inline bool isWriteOnly() const noexcept { return (_flags & kRW) == kWrite; }
834 //! Tests whether the register is read and written.
835 inline bool isReadWrite() const noexcept { return (_flags & kRW) == kRW; }
836
837 //! Tests whether the tied register has use operand (Read/ReadWrite).
838 inline bool isUse() const noexcept { return hasFlag(kUse); }
839 //! Tests whether the tied register has out operand (Write).
840 inline bool isOut() const noexcept { return hasFlag(kOut); }
841
842 //! Tests whether the USE slot can be patched to memory operand.
843 inline bool hasUseRM() const noexcept { return hasFlag(kUseRM); }
844 //! Tests whether the OUT slot can be patched to memory operand.
845 inline bool hasOutRM() const noexcept { return hasFlag(kOutRM); }
846
847 inline uint32_t rmSize() const noexcept { return _rmSize; }
848
849 inline void makeReadOnly() noexcept {
850 _flags = (_flags & ~(kOut | kWrite)) | kUse;
851 _useRewriteMask |= _outRewriteMask;
852 _outRewriteMask = 0;
853 }
854
855 inline void makeWriteOnly() noexcept {
856 _flags = (_flags & ~(kUse | kRead)) | kOut;
857 _outRewriteMask |= _useRewriteMask;
858 _useRewriteMask = 0;
859 }
860
861 //! Tests whether the register would duplicate.
862 inline bool isDuplicate() const noexcept { return hasFlag(kDuplicate); }
863
864 //! Tests whether the register (and the instruction it's part of) appears last in the basic block.
865 inline bool isLast() const noexcept { return hasFlag(kLast); }
866 //! Tests whether the register should be killed after USEd and/or OUTed.
867 inline bool isKill() const noexcept { return hasFlag(kKill); }
868
869 //! Tests whether the register is OUT or KILL (used internally by local register allocator).
870 inline bool isOutOrKill() const noexcept { return hasFlag(kOut | kKill); }
871
872 inline uint32_t allocableRegs() const noexcept { return _allocableRegs; }
873
874 inline uint32_t refCount() const noexcept { return _refCount; }
875 inline void addRefCount(uint32_t n = 1) noexcept { _refCount = uint8_t(_refCount + n); }
876
877 //! Tests whether the register must be allocated to a fixed physical register before it's used.
878 inline bool hasUseId() const noexcept { return _useId != BaseReg::kIdBad; }
879 //! Tests whether the register must be allocated to a fixed physical register before it's written.
880 inline bool hasOutId() const noexcept { return _outId != BaseReg::kIdBad; }
881
882 //! Returns a physical register id used for 'use' operation.
883 inline uint32_t useId() const noexcept { return _useId; }
884 //! Returns a physical register id used for 'out' operation.
885 inline uint32_t outId() const noexcept { return _outId; }
886
887 inline uint32_t useRewriteMask() const noexcept { return _useRewriteMask; }
888 inline uint32_t outRewriteMask() const noexcept { return _outRewriteMask; }
889
890 //! Sets a physical register used for 'use' operation.
891 inline void setUseId(uint32_t index) noexcept { _useId = uint8_t(index); }
892 //! Sets a physical register used for 'out' operation.
893 inline void setOutId(uint32_t index) noexcept { _outId = uint8_t(index); }
894
895 inline bool isUseDone() const noexcept { return hasFlag(kUseDone); }
896 inline bool isOutDone() const noexcept { return hasFlag(kUseDone); }
897
898 inline void markUseDone() noexcept { addFlags(kUseDone); }
899 inline void markOutDone() noexcept { addFlags(kUseDone); }
900
901 //! \}
902 };
903
904 // ============================================================================
905 // [asmjit::RAWorkReg]
906 // ============================================================================
907
908 class RAWorkReg {
909 public:
910 ASMJIT_NONCOPYABLE(RAWorkReg)
911
912 //! RAPass specific ID used during analysis and allocation.
913 uint32_t _workId;
914 //! Copy of ID used by `VirtReg`.
915 uint32_t _virtId;
916
917 //! Permanent association with `VirtReg`.
918 VirtReg* _virtReg;
919 //! Temporary association with `RATiedReg`.
920 RATiedReg* _tiedReg;
921 //! Stack slot associated with the register.
922 RAStackSlot* _stackSlot;
923
924 //! Copy of a signature used by `VirtReg`.
925 RegInfo _info;
926 //! RAPass specific flags used during analysis and allocation.
927 uint32_t _flags;
928 //! IDs of all physical registers this WorkReg has been allocated to.
929 uint32_t _allocatedMask;
930 //! IDs of all physical registers that are clobbered during the lifetime of
931 //! this WorkReg.
932 //!
933 //! This mask should be updated by `RAPass::buildLiveness()`, because it's
934 //! global and should be updated after unreachable code has been removed.
935 uint32_t _clobberSurvivalMask;
936
937 //! A byte-mask where each bit represents one valid byte of the register.
938 uint64_t _regByteMask;
939
940 //! Argument index (or `kNoArgIndex` if none).
941 uint8_t _argIndex;
942 //! Global home register ID (if any, assigned by RA).
943 uint8_t _homeRegId;
944 //! Global hint register ID (provided by RA or user).
945 uint8_t _hintRegId;
946
947 //! Live spans of the `VirtReg`.
948 LiveRegSpans _liveSpans;
949 //! Live statistics.
950 RALiveStats _liveStats;
951
952 //! All nodes that read/write this VirtReg/WorkReg.
953 ZoneVector<BaseNode*> _refs;
954 //! All nodes that write to this VirtReg/WorkReg.
955 ZoneVector<BaseNode*> _writes;
956
957 enum Ids : uint32_t {
958 kIdNone = 0xFFFFFFFFu
959 };
960
961 enum Flags : uint32_t {
962 //! Has been coalesced to another WorkReg.
963 kFlagCoalesced = 0x00000001u,
964 //! Stack slot has to be allocated.
965 kFlagStackUsed = 0x00000002u,
966 //! Stack allocation is preferred.
967 kFlagStackPreferred = 0x00000004u,
968 //! Marked for stack argument reassignment.
969 kFlagStackArgToStack = 0x00000008u,
970
971 // TODO: Used?
972 kFlagDirtyStats = 0x80000000u
973 };
974
975 enum ArgIndex : uint32_t {
976 kNoArgIndex = 0xFFu
977 };
978
979 //! \name Construction & Destruction
980 //! \{
981
982 ASMJIT_INLINE RAWorkReg(VirtReg* vReg, uint32_t workId) noexcept
983 : _workId(workId),
984 _virtId(vReg->id()),
985 _virtReg(vReg),
986 _tiedReg(nullptr),
987 _stackSlot(nullptr),
988 _info(vReg->info()),
989 _flags(kFlagDirtyStats),
990 _allocatedMask(0),
991 _clobberSurvivalMask(0),
992 _regByteMask(0),
993 _argIndex(kNoArgIndex),
994 _homeRegId(BaseReg::kIdBad),
995 _hintRegId(BaseReg::kIdBad),
996 _liveSpans(),
997 _liveStats(),
998 _refs() {}
999
1000 //! \}
1001
1002 //! \name Accessors
1003 //! \{
1004
1005 inline uint32_t workId() const noexcept { return _workId; }
1006 inline uint32_t virtId() const noexcept { return _virtId; }
1007
1008 inline const char* name() const noexcept { return _virtReg->name(); }
1009 inline uint32_t nameSize() const noexcept { return _virtReg->nameSize(); }
1010
1011 inline uint32_t typeId() const noexcept { return _virtReg->typeId(); }
1012
1013 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
1014 inline uint32_t flags() const noexcept { return _flags; }
1015 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
1016
1017 inline bool isStackUsed() const noexcept { return hasFlag(kFlagStackUsed); }
1018 inline void markStackUsed() noexcept { addFlags(kFlagStackUsed); }
1019
1020 inline bool isStackPreferred() const noexcept { return hasFlag(kFlagStackPreferred); }
1021 inline void markStackPreferred() noexcept { addFlags(kFlagStackPreferred); }
1022
1023 //! Tests whether this RAWorkReg has been coalesced with another one (cannot be used anymore).
1024 inline bool isCoalesced() const noexcept { return hasFlag(kFlagCoalesced); }
1025
1026 inline const RegInfo& info() const noexcept { return _info; }
1027 inline uint32_t group() const noexcept { return _info.group(); }
1028 inline uint32_t signature() const noexcept { return _info.signature(); }
1029
1030 inline VirtReg* virtReg() const noexcept { return _virtReg; }
1031
1032 inline bool hasTiedReg() const noexcept { return _tiedReg != nullptr; }
1033 inline RATiedReg* tiedReg() const noexcept { return _tiedReg; }
1034 inline void setTiedReg(RATiedReg* tiedReg) noexcept { _tiedReg = tiedReg; }
1035 inline void resetTiedReg() noexcept { _tiedReg = nullptr; }
1036
1037 inline bool hasStackSlot() const noexcept { return _stackSlot != nullptr; }
1038 inline RAStackSlot* stackSlot() const noexcept { return _stackSlot; }
1039
1040 inline LiveRegSpans& liveSpans() noexcept { return _liveSpans; }
1041 inline const LiveRegSpans& liveSpans() const noexcept { return _liveSpans; }
1042
1043 inline RALiveStats& liveStats() noexcept { return _liveStats; }
1044 inline const RALiveStats& liveStats() const noexcept { return _liveStats; }
1045
1046 inline bool hasArgIndex() const noexcept { return _argIndex != kNoArgIndex; }
1047 inline uint32_t argIndex() const noexcept { return _argIndex; }
1048 inline void setArgIndex(uint32_t index) noexcept { _argIndex = uint8_t(index); }
1049
1050 inline bool hasHomeRegId() const noexcept { return _homeRegId != BaseReg::kIdBad; }
1051 inline uint32_t homeRegId() const noexcept { return _homeRegId; }
1052 inline void setHomeRegId(uint32_t physId) noexcept { _homeRegId = uint8_t(physId); }
1053
1054 inline bool hasHintRegId() const noexcept { return _hintRegId != BaseReg::kIdBad; }
1055 inline uint32_t hintRegId() const noexcept { return _hintRegId; }
1056 inline void setHintRegId(uint32_t physId) noexcept { _hintRegId = uint8_t(physId); }
1057
1058 inline uint32_t allocatedMask() const noexcept { return _allocatedMask; }
1059 inline void addAllocatedMask(uint32_t mask) noexcept { _allocatedMask |= mask; }
1060
1061 inline uint32_t clobberSurvivalMask() const noexcept { return _clobberSurvivalMask; }
1062 inline void addClobberSurvivalMask(uint32_t mask) noexcept { _clobberSurvivalMask |= mask; }
1063
1064 inline uint64_t regByteMask() const noexcept { return _regByteMask; }
1065 inline void setRegByteMask(uint64_t mask) noexcept { _regByteMask = mask; }
1066
1067 //! \}
1068 };
1069
1070 //! \}
1071 //! \endcond
1072
1073 ASMJIT_END_NAMESPACE
1074
1075 #endif // !ASMJIT_NO_COMPILER
1076 #endif // _ASMJIT_CORE_RADEFS_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_COMPILER
8
9 #include "../core/ralocal_p.h"
10 #include "../core/support.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 // ============================================================================
15 // [asmjit::RALocalAllocator - Utilities]
16 // ============================================================================
17
18 static ASMJIT_INLINE RATiedReg* RALocal_findTiedRegByWorkId(RATiedReg* tiedRegs, size_t count, uint32_t workId) noexcept {
19 for (size_t i = 0; i < count; i++)
20 if (tiedRegs[i].workId() == workId)
21 return &tiedRegs[i];
22 return nullptr;
23 }
24
25 // ============================================================================
26 // [asmjit::RALocalAllocator - Init / Reset]
27 // ============================================================================
28
29 Error RALocalAllocator::init() noexcept {
30 PhysToWorkMap* physToWorkMap;
31 WorkToPhysMap* workToPhysMap;
32
33 physToWorkMap = _pass->newPhysToWorkMap();
34 workToPhysMap = _pass->newWorkToPhysMap();
35 if (!physToWorkMap || !workToPhysMap)
36 return DebugUtils::errored(kErrorOutOfMemory);
37
38 _curAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
39 _curAssignment.initMaps(physToWorkMap, workToPhysMap);
40
41 physToWorkMap = _pass->newPhysToWorkMap();
42 workToPhysMap = _pass->newWorkToPhysMap();
43 if (!physToWorkMap || !workToPhysMap)
44 return DebugUtils::errored(kErrorOutOfMemory);
45
46 _tmpAssignment.initLayout(_pass->_physRegCount, _pass->workRegs());
47 _tmpAssignment.initMaps(physToWorkMap, workToPhysMap);
48
49 return kErrorOk;
50 }
51
52 // ============================================================================
53 // [asmjit::RALocalAllocator - Assignment]
54 // ============================================================================
55
56 Error RALocalAllocator::makeInitialAssignment() noexcept {
57 FuncNode* func = _pass->func();
58 RABlock* entry = _pass->entryBlock();
59
60 ZoneBitVector& liveIn = entry->liveIn();
61 uint32_t argCount = func->argCount();
62 uint32_t numIter = 1;
63
64 for (uint32_t iter = 0; iter < numIter; iter++) {
65 for (uint32_t i = 0; i < argCount; i++) {
66 // Unassigned argument.
67 VirtReg* virtReg = func->arg(i);
68 if (!virtReg) continue;
69
70 // Unreferenced argument.
71 RAWorkReg* workReg = virtReg->workReg();
72 if (!workReg) continue;
73
74 // Overwritten argument.
75 uint32_t workId = workReg->workId();
76 if (!liveIn.bitAt(workId))
77 continue;
78
79 uint32_t group = workReg->group();
80 if (_curAssignment.workToPhysId(group, workId) != RAAssignment::kPhysNone)
81 continue;
82
83 uint32_t allocableRegs = _availableRegs[group] & ~_curAssignment.assigned(group);
84 if (iter == 0) {
85 // First iteration: Try to allocate to home RegId.
86 if (workReg->hasHomeRegId()) {
87 uint32_t physId = workReg->homeRegId();
88 if (Support::bitTest(allocableRegs, physId)) {
89 _curAssignment.assign(group, workId, physId, true);
90 _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId());
91 continue;
92 }
93 }
94
95 numIter = 2;
96 }
97 else {
98 // Second iteration: Pick any other register if the is an unassigned one or assign to stack.
99 if (allocableRegs) {
100 uint32_t physId = Support::ctz(allocableRegs);
101 _curAssignment.assign(group, workId, physId, true);
102 _pass->_argsAssignment.assignReg(i, workReg->info().type(), physId, workReg->typeId());
103 }
104 else {
105 // This register will definitely need stack, create the slot now and assign also `argIndex`
106 // to it. We will patch `_argsAssignment` later after RAStackAllocator finishes.
107 RAStackSlot* slot = _pass->getOrCreateStackSlot(workReg);
108 if (ASMJIT_UNLIKELY(!slot))
109 return DebugUtils::errored(kErrorOutOfMemory);
110
111 // This means STACK_ARG may be moved to STACK.
112 workReg->addFlags(RAWorkReg::kFlagStackArgToStack);
113 _pass->_numStackArgsToStackSlots++;
114 }
115 }
116 }
117 }
118
119 return kErrorOk;
120 }
121
122 Error RALocalAllocator::replaceAssignment(
123 const PhysToWorkMap* physToWorkMap,
124 const WorkToPhysMap* workToPhysMap) noexcept {
125
126 _curAssignment.copyFrom(physToWorkMap, workToPhysMap);
127 return kErrorOk;
128 }
129
130 Error RALocalAllocator::switchToAssignment(
131 PhysToWorkMap* dstPhysToWorkMap,
132 WorkToPhysMap* dstWorkToPhysMap,
133 const ZoneBitVector& liveIn,
134 bool dstReadOnly,
135 bool tryMode) noexcept {
136
137 RAAssignment dst;
138 RAAssignment& cur = _curAssignment;
139
140 dst.initLayout(_pass->_physRegCount, _pass->workRegs());
141 dst.initMaps(dstPhysToWorkMap, dstWorkToPhysMap);
142
143 if (tryMode)
144 return kErrorOk;
145
146 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
147 // ------------------------------------------------------------------------
148 // STEP 1:
149 // - KILL all registers that are not live at `dst`,
150 // - SPILL all registers that are not assigned at `dst`.
151 // ------------------------------------------------------------------------
152
153 if (!tryMode) {
154 Support::BitWordIterator<uint32_t> it(cur.assigned(group));
155 while (it.hasNext()) {
156 uint32_t physId = it.next();
157 uint32_t workId = cur.physToWorkId(group, physId);
158
159 // Must be true as we iterate over assigned registers.
160 ASMJIT_ASSERT(workId != RAAssignment::kWorkNone);
161
162 // KILL if it's not live on entry.
163 if (!liveIn.bitAt(workId)) {
164 onKillReg(group, workId, physId);
165 continue;
166 }
167
168 // SPILL if it's not assigned on entry.
169 uint32_t altId = dst.workToPhysId(group, workId);
170 if (altId == RAAssignment::kPhysNone) {
171 ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
172 }
173 }
174 }
175
176 // ------------------------------------------------------------------------
177 // STEP 2:
178 // - MOVE and SWAP registers from their current assignments into their
179 // DST assignments.
180 // - Build `willLoadRegs` mask of registers scheduled for `onLoadReg()`.
181 // ------------------------------------------------------------------------
182
183 // Current run-id (1 means more aggressive decisions).
184 int32_t runId = -1;
185 // Remaining registers scheduled for `onLoadReg()`.
186 uint32_t willLoadRegs = 0;
187 // Remaining registers to be allocated in this loop.
188 uint32_t affectedRegs = dst.assigned(group);
189
190 while (affectedRegs) {
191 if (++runId == 2) {
192 if (!tryMode)
193 return DebugUtils::errored(kErrorInvalidState);
194
195 // Stop in `tryMode` if we haven't done anything in past two rounds.
196 break;
197 }
198
199 Support::BitWordIterator<uint32_t> it(affectedRegs);
200 while (it.hasNext()) {
201 uint32_t physId = it.next();
202 uint32_t physMask = Support::bitMask(physId);
203
204 uint32_t curWorkId = cur.physToWorkId(group, physId);
205 uint32_t dstWorkId = dst.physToWorkId(group, physId);
206
207 // The register must have assigned `dstWorkId` as we only iterate over assigned regs.
208 ASMJIT_ASSERT(dstWorkId != RAAssignment::kWorkNone);
209
210 if (curWorkId != RAAssignment::kWorkNone) {
211 // Both assigned.
212 if (curWorkId != dstWorkId) {
213 // Wait a bit if this is the first run, we may avoid this if `curWorkId` moves out.
214 if (runId <= 0)
215 continue;
216
217 uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
218 if (altPhysId == RAAssignment::kPhysNone)
219 continue;
220
221 // Reset as we will do some changes to the current assignment.
222 runId = -1;
223
224 if (_archTraits.hasSwap(group)) {
225 ASMJIT_PROPAGATE(onSwapReg(group, curWorkId, physId, dstWorkId, altPhysId));
226 }
227 else {
228 // SPILL the reg if it's not dirty in DST, otherwise try to MOVE.
229 if (!cur.isPhysDirty(group, physId)) {
230 ASMJIT_PROPAGATE(onKillReg(group, curWorkId, physId));
231 }
232 else {
233 uint32_t allocableRegs = _pass->_availableRegs[group] & ~cur.assigned(group);
234
235 // If possible don't conflict with assigned regs at DST.
236 if (allocableRegs & ~dst.assigned(group))
237 allocableRegs &= ~dst.assigned(group);
238
239 if (allocableRegs) {
240 // MOVE is possible, thus preferred.
241 uint32_t tmpPhysId = Support::ctz(allocableRegs);
242
243 ASMJIT_PROPAGATE(onMoveReg(group, curWorkId, tmpPhysId, physId));
244 _pass->_clobberedRegs[group] |= Support::bitMask(tmpPhysId);
245 }
246 else {
247 // MOVE is impossible, must SPILL.
248 ASMJIT_PROPAGATE(onSpillReg(group, curWorkId, physId));
249 }
250 }
251
252 goto Cleared;
253 }
254 }
255 }
256 else {
257 Cleared:
258 // DST assigned, CUR unassigned.
259 uint32_t altPhysId = cur.workToPhysId(group, dstWorkId);
260 if (altPhysId == RAAssignment::kPhysNone) {
261 if (liveIn.bitAt(dstWorkId))
262 willLoadRegs |= physMask; // Scheduled for `onLoadReg()`.
263 affectedRegs &= ~physMask; // Unaffected from now.
264 continue;
265 }
266 ASMJIT_PROPAGATE(onMoveReg(group, dstWorkId, physId, altPhysId));
267 }
268
269 // Both DST and CUR assigned to the same reg or CUR just moved to DST.
270 if ((dst.dirty(group) & physMask) != (cur.dirty(group) & physMask)) {
271 if ((dst.dirty(group) & physMask) == 0) {
272 // CUR dirty, DST not dirty (the assert is just to visualize the condition).
273 ASMJIT_ASSERT(!dst.isPhysDirty(group, physId) && cur.isPhysDirty(group, physId));
274
275 // If `dstReadOnly` is true it means that that block was already
276 // processed and we cannot change from CLEAN to DIRTY. In that case
277 // the register has to be saved as it cannot enter the block DIRTY.
278 if (dstReadOnly)
279 ASMJIT_PROPAGATE(onSaveReg(group, dstWorkId, physId));
280 else
281 dst.makeDirty(group, dstWorkId, physId);
282 }
283 else {
284 // DST dirty, CUR not dirty (the assert is just to visualize the condition).
285 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) && !cur.isPhysDirty(group, physId));
286
287 cur.makeDirty(group, dstWorkId, physId);
288 }
289 }
290
291 // Must match now...
292 ASMJIT_ASSERT(dst.physToWorkId(group, physId) == cur.physToWorkId(group, physId));
293 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
294
295 runId = -1;
296 affectedRegs &= ~physMask;
297 }
298 }
299
300 // ------------------------------------------------------------------------
301 // STEP 3:
302 // - Load registers specified by `willLoadRegs`.
303 // ------------------------------------------------------------------------
304
305 {
306 Support::BitWordIterator<uint32_t> it(willLoadRegs);
307 while (it.hasNext()) {
308 uint32_t physId = it.next();
309
310 if (!cur.isPhysAssigned(group, physId)) {
311 uint32_t workId = dst.physToWorkId(group, physId);
312
313 // The algorithm is broken if it tries to load a register that is not in LIVE-IN.
314 ASMJIT_ASSERT(liveIn.bitAt(workId) == true);
315
316 ASMJIT_PROPAGATE(onLoadReg(group, workId, physId));
317 if (dst.isPhysDirty(group, physId))
318 cur.makeDirty(group, workId, physId);
319 ASMJIT_ASSERT(dst.isPhysDirty(group, physId) == cur.isPhysDirty(group, physId));
320 }
321 else {
322 // Not possible otherwise.
323 ASMJIT_ASSERT(tryMode == true);
324 }
325 }
326 }
327 }
328
329 if (!tryMode) {
330 // Hre is a code that dumps the conflicting part if something fails here:
331 // if (!dst.equals(cur)) {
332 // uint32_t physTotal = dst._layout.physTotal;
333 // uint32_t workCount = dst._layout.workCount;
334 //
335 // for (uint32_t physId = 0; physId < physTotal; physId++) {
336 // uint32_t dstWorkId = dst._physToWorkMap->workIds[physId];
337 // uint32_t curWorkId = cur._physToWorkMap->workIds[physId];
338 // if (dstWorkId != curWorkId)
339 // fprintf(stderr, "[PhysIdWork] PhysId=%u WorkId[DST(%u) != CUR(%u)]\n", physId, dstWorkId, curWorkId);
340 // }
341 //
342 // for (uint32_t workId = 0; workId < workCount; workId++) {
343 // uint32_t dstPhysId = dst._workToPhysMap->physIds[workId];
344 // uint32_t curPhysId = cur._workToPhysMap->physIds[workId];
345 // if (dstPhysId != curPhysId)
346 // fprintf(stderr, "[WorkToPhys] WorkId=%u PhysId[DST(%u) != CUR(%u)]\n", workId, dstPhysId, curPhysId);
347 // }
348 // }
349 ASMJIT_ASSERT(dst.equals(cur));
350 }
351
352 return kErrorOk;
353 }
354
355 // ============================================================================
356 // [asmjit::RALocalAllocator - Allocation]
357 // ============================================================================
358
359 Error RALocalAllocator::allocInst(InstNode* node) noexcept {
360 RAInst* raInst = node->passData<RAInst>();
361
362 RATiedReg* outTiedRegs[Globals::kMaxPhysRegs];
363 RATiedReg* dupTiedRegs[Globals::kMaxPhysRegs];
364
365 // The cursor must point to the previous instruction for a possible instruction insertion.
366 _cc->_setCursor(node->prev());
367
368 _node = node;
369 _raInst = raInst;
370 _tiedTotal = raInst->_tiedTotal;
371 _tiedCount = raInst->_tiedCount;
372
373 // Whether we already replaced register operand with memory operand.
374 bool rmAllocated = false;
375
376 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
377 uint32_t i, count = this->tiedCount(group);
378 RATiedReg* tiedRegs = this->tiedRegs(group);
379
380 uint32_t willUse = _raInst->_usedRegs[group];
381 uint32_t willOut = _raInst->_clobberedRegs[group];
382 uint32_t willFree = 0;
383 uint32_t usePending = count;
384
385 uint32_t outTiedCount = 0;
386 uint32_t dupTiedCount = 0;
387
388 // ------------------------------------------------------------------------
389 // STEP 1:
390 //
391 // Calculate `willUse` and `willFree` masks based on tied registers we have.
392 //
393 // We don't do any assignment decisions at this stage as we just need to
394 // collect some information first. Then, after we populate all masks needed
395 // we can finally make some decisions in the second loop. The main reason
396 // for this is that we really need `willFree` to make assignment decisions
397 // for `willUse`, because if we mark some registers that will be freed, we
398 // can consider them in decision making afterwards.
399 // ------------------------------------------------------------------------
400
401 for (i = 0; i < count; i++) {
402 RATiedReg* tiedReg = &tiedRegs[i];
403
404 // Add OUT and KILL to `outPending` for CLOBBERing and/or OUT assignment.
405 if (tiedReg->isOutOrKill())
406 outTiedRegs[outTiedCount++] = tiedReg;
407
408 if (tiedReg->isDuplicate())
409 dupTiedRegs[dupTiedCount++] = tiedReg;
410
411 if (!tiedReg->isUse()) {
412 tiedReg->markUseDone();
413 usePending--;
414 continue;
415 }
416
417 uint32_t workId = tiedReg->workId();
418 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
419
420 if (tiedReg->hasUseId()) {
421 // If the register has `useId` it means it can only be allocated in that register.
422 uint32_t useMask = Support::bitMask(tiedReg->useId());
423
424 // RAInstBuilder must have collected `usedRegs` on-the-fly.
425 ASMJIT_ASSERT((willUse & useMask) != 0);
426
427 if (assignedId == tiedReg->useId()) {
428 // If the register is already allocated in this one, mark it done and continue.
429 tiedReg->markUseDone();
430 if (tiedReg->isWrite())
431 _curAssignment.makeDirty(group, workId, assignedId);
432 usePending--;
433 willUse |= useMask;
434 }
435 else {
436 willFree |= useMask & _curAssignment.assigned(group);
437 }
438 }
439 else {
440 // Check if the register must be moved to `allocableRegs`.
441 uint32_t allocableRegs = tiedReg->allocableRegs();
442 if (assignedId != RAAssignment::kPhysNone) {
443 uint32_t assignedMask = Support::bitMask(assignedId);
444 if ((allocableRegs & ~willUse) & assignedMask) {
445 tiedReg->setUseId(assignedId);
446 tiedReg->markUseDone();
447 if (tiedReg->isWrite())
448 _curAssignment.makeDirty(group, workId, assignedId);
449 usePending--;
450 willUse |= assignedMask;
451 }
452 else {
453 willFree |= assignedMask;
454 }
455 }
456 }
457 }
458
459 // ------------------------------------------------------------------------
460 // STEP 2:
461 //
462 // Do some decision making to find the best candidates of registers that
463 // need to be assigned, moved, and/or spilled. Only USE registers are
464 // considered here, OUT will be decided later after all CLOBBERed and OUT
465 // registers are unassigned.
466 // ------------------------------------------------------------------------
467
468 if (usePending) {
469 // TODO: Not sure `liveRegs` should be used, maybe willUse and willFree would be enough and much more clear.
470
471 // All registers that are currently alive without registers that will be freed.
472 uint32_t liveRegs = _curAssignment.assigned(group) & ~willFree;
473
474 for (i = 0; i < count; i++) {
475 RATiedReg* tiedReg = &tiedRegs[i];
476 if (tiedReg->isUseDone()) continue;
477
478 uint32_t workId = tiedReg->workId();
479 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
480
481 // REG/MEM: Patch register operand to memory operand if not allocated.
482 if (!rmAllocated && tiedReg->hasUseRM()) {
483 if (assignedId == RAAssignment::kPhysNone && Support::isPowerOf2(tiedReg->useRewriteMask())) {
484 RAWorkReg* workReg = workRegById(tiedReg->workId());
485 uint32_t opIndex = Support::ctz(tiedReg->useRewriteMask()) / uint32_t(sizeof(Operand) / sizeof(uint32_t));
486 uint32_t rmSize = tiedReg->rmSize();
487
488 if (rmSize <= workReg->virtReg()->virtSize()) {
489 Operand& op = node->operands()[opIndex];
490 op = _pass->workRegAsMem(workReg);
491 op.as<BaseMem>().setSize(rmSize);
492 tiedReg->_useRewriteMask = 0;
493
494 tiedReg->markUseDone();
495 usePending--;
496
497 rmAllocated = true;
498 continue;
499 }
500 }
501 }
502
503 if (!tiedReg->hasUseId()) {
504 uint32_t allocableRegs = tiedReg->allocableRegs() & ~(willFree | willUse);
505
506 // DECIDE where to assign the USE register.
507 uint32_t useId = decideOnAssignment(group, workId, assignedId, allocableRegs);
508 uint32_t useMask = Support::bitMask(useId);
509
510 willUse |= useMask;
511 willFree |= useMask & liveRegs;
512 tiedReg->setUseId(useId);
513
514 if (assignedId != RAAssignment::kPhysNone) {
515 uint32_t assignedMask = Support::bitMask(assignedId);
516
517 willFree |= assignedMask;
518 liveRegs &= ~assignedMask;
519
520 // OPTIMIZATION: Assign the USE register here if it's possible.
521 if (!(liveRegs & useMask)) {
522 ASMJIT_PROPAGATE(onMoveReg(group, workId, useId, assignedId));
523 tiedReg->markUseDone();
524 if (tiedReg->isWrite())
525 _curAssignment.makeDirty(group, workId, useId);
526 usePending--;
527 }
528 }
529 else {
530 // OPTIMIZATION: Assign the USE register here if it's possible.
531 if (!(liveRegs & useMask)) {
532 ASMJIT_PROPAGATE(onLoadReg(group, workId, useId));
533 tiedReg->markUseDone();
534 if (tiedReg->isWrite())
535 _curAssignment.makeDirty(group, workId, useId);
536 usePending--;
537 }
538 }
539
540 liveRegs |= useMask;
541 }
542 }
543 }
544
545 // Initially all used regs will be marked clobbered.
546 uint32_t clobberedByInst = willUse | willOut;
547
548 // ------------------------------------------------------------------------
549 // STEP 3:
550 //
551 // Free all registers that we marked as `willFree`. Only registers that are not
552 // USEd by the instruction are considered as we don't want to free regs we need.
553 // ------------------------------------------------------------------------
554
555 if (willFree) {
556 uint32_t allocableRegs = _availableRegs[group] & ~(_curAssignment.assigned(group) | willFree | willUse | willOut);
557 Support::BitWordIterator<uint32_t> it(willFree);
558
559 do {
560 uint32_t assignedId = it.next();
561 if (_curAssignment.isPhysAssigned(group, assignedId)) {
562 uint32_t workId = _curAssignment.physToWorkId(group, assignedId);
563
564 // DECIDE whether to MOVE or SPILL.
565 if (allocableRegs) {
566 uint32_t reassignedId = decideOnUnassignment(group, workId, assignedId, allocableRegs);
567 if (reassignedId != RAAssignment::kPhysNone) {
568 ASMJIT_PROPAGATE(onMoveReg(group, workId, reassignedId, assignedId));
569 allocableRegs ^= Support::bitMask(reassignedId);
570 continue;
571 }
572 }
573
574 ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
575 }
576 } while (it.hasNext());
577 }
578
579 // ------------------------------------------------------------------------
580 // STEP 4:
581 //
582 // ALLOCATE / SHUFFLE all registers that we marked as `willUse` and weren't
583 // allocated yet. This is a bit complicated as the allocation is iterative.
584 // In some cases we have to wait before allocating a particual physical
585 // register as it's still occupied by some other one, which we need to move
586 // before we can use it. In this case we skip it and allocate another some
587 // other instead (making it free for another iteration).
588 //
589 // NOTE: Iterations are mostly important for complicated allocations like
590 // function calls, where there can be up to N registers used at once. Asm
591 // instructions won't run the loop more than once in 99.9% of cases as they
592 // use 2..3 registers in average.
593 // ------------------------------------------------------------------------
594
595 if (usePending) {
596 bool mustSwap = false;
597 do {
598 uint32_t oldPending = usePending;
599
600 for (i = 0; i < count; i++) {
601 RATiedReg* thisTiedReg = &tiedRegs[i];
602 if (thisTiedReg->isUseDone()) continue;
603
604 uint32_t thisWorkId = thisTiedReg->workId();
605 uint32_t thisPhysId = _curAssignment.workToPhysId(group, thisWorkId);
606
607 // This would be a bug, fatal one!
608 uint32_t targetPhysId = thisTiedReg->useId();
609 ASMJIT_ASSERT(targetPhysId != thisPhysId);
610
611 uint32_t targetWorkId = _curAssignment.physToWorkId(group, targetPhysId);
612 if (targetWorkId != RAAssignment::kWorkNone) {
613 RAWorkReg* targetWorkReg = workRegById(targetWorkId);
614
615 // Swapping two registers can solve two allocation tasks by emitting
616 // just a single instruction. However, swap is only available on few
617 // architectures and it's definitely not available for each register
618 // group. Calling `onSwapReg()` before checking these would be fatal.
619 if (_archTraits.hasSwap(group) && thisPhysId != RAAssignment::kPhysNone) {
620 ASMJIT_PROPAGATE(onSwapReg(group, thisWorkId, thisPhysId, targetWorkId, targetPhysId));
621
622 thisTiedReg->markUseDone();
623 if (thisTiedReg->isWrite())
624 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
625 usePending--;
626
627 // Double-hit.
628 RATiedReg* targetTiedReg = RALocal_findTiedRegByWorkId(tiedRegs, count, targetWorkReg->workId());
629 if (targetTiedReg && targetTiedReg->useId() == thisPhysId) {
630 targetTiedReg->markUseDone();
631 if (targetTiedReg->isWrite())
632 _curAssignment.makeDirty(group, targetWorkId, thisPhysId);
633 usePending--;
634 }
635 continue;
636 }
637
638 if (!mustSwap)
639 continue;
640
641 // Only branched here if the previous iteration did nothing. This is
642 // essentially a SWAP operation without having a dedicated instruction
643 // for that purpose (vector registers, etc). The simplest way to
644 // handle such case is to SPILL the target register.
645 ASMJIT_PROPAGATE(onSpillReg(group, targetWorkId, targetPhysId));
646 }
647
648 if (thisPhysId != RAAssignment::kPhysNone) {
649 ASMJIT_PROPAGATE(onMoveReg(group, thisWorkId, targetPhysId, thisPhysId));
650
651 thisTiedReg->markUseDone();
652 if (thisTiedReg->isWrite())
653 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
654 usePending--;
655 }
656 else {
657 ASMJIT_PROPAGATE(onLoadReg(group, thisWorkId, targetPhysId));
658
659 thisTiedReg->markUseDone();
660 if (thisTiedReg->isWrite())
661 _curAssignment.makeDirty(group, thisWorkId, targetPhysId);
662 usePending--;
663 }
664 }
665
666 mustSwap = (oldPending == usePending);
667 } while (usePending);
668 }
669
670 // ------------------------------------------------------------------------
671 // STEP 5:
672 //
673 // KILL registers marked as KILL/OUT.
674 // ------------------------------------------------------------------------
675
676 uint32_t outPending = outTiedCount;
677 if (outTiedCount) {
678 for (i = 0; i < outTiedCount; i++) {
679 RATiedReg* tiedReg = outTiedRegs[i];
680
681 uint32_t workId = tiedReg->workId();
682 uint32_t physId = _curAssignment.workToPhysId(group, workId);
683
684 // Must check if it's allocated as KILL can be related to OUT (like KILL
685 // immediately after OUT, which could mean the register is not assigned).
686 if (physId != RAAssignment::kPhysNone) {
687 ASMJIT_PROPAGATE(onKillReg(group, workId, physId));
688 willOut &= ~Support::bitMask(physId);
689 }
690
691 // We still maintain number of pending registers for OUT assignment.
692 // So, if this is only KILL, not OUT, we can safely decrement it.
693 outPending -= !tiedReg->isOut();
694 }
695 }
696
697 // ------------------------------------------------------------------------
698 // STEP 6:
699 //
700 // SPILL registers that will be CLOBBERed. Since OUT and KILL were
701 // already processed this is used mostly to handle function CALLs.
702 // ------------------------------------------------------------------------
703
704 if (willOut) {
705 Support::BitWordIterator<uint32_t> it(willOut);
706 do {
707 uint32_t physId = it.next();
708 uint32_t workId = _curAssignment.physToWorkId(group, physId);
709
710 if (workId == RAAssignment::kWorkNone)
711 continue;
712
713 ASMJIT_PROPAGATE(onSpillReg(group, workId, physId));
714 } while (it.hasNext());
715 }
716
717 // ------------------------------------------------------------------------
718 // STEP 7:
719 //
720 // Duplication.
721 // ------------------------------------------------------------------------
722
723 for (i = 0; i < dupTiedCount; i++) {
724 RATiedReg* tiedReg = dupTiedRegs[i];
725 uint32_t workId = tiedReg->workId();
726 uint32_t srcId = tiedReg->useId();
727
728 Support::BitWordIterator<uint32_t> it(tiedReg->_allocableRegs);
729 while (it.hasNext()) {
730 uint32_t dstId = it.next();
731 if (dstId == srcId)
732 continue;
733 _pass->onEmitMove(workId, dstId, srcId);
734 }
735 }
736
737 // ------------------------------------------------------------------------
738 // STEP 8:
739 //
740 // Assign OUT registers.
741 // ------------------------------------------------------------------------
742
743 if (outPending) {
744 // Live registers, we need a separate variable (outside of `_curAssignment)
745 // to hold these because of KILLed registers. If we KILL a register here it
746 // will go out from `_curAssignment`, but we cannot assign to it in here.
747 uint32_t liveRegs = _curAssignment.assigned(group);
748
749 // Must avoid as they have been already OUTed (added during the loop).
750 uint32_t outRegs = 0;
751
752 // Must avoid as they collide with already allocated ones.
753 uint32_t avoidRegs = willUse & ~clobberedByInst;
754
755 for (i = 0; i < outTiedCount; i++) {
756 RATiedReg* tiedReg = outTiedRegs[i];
757 if (!tiedReg->isOut()) continue;
758
759 uint32_t workId = tiedReg->workId();
760 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
761
762 if (assignedId != RAAssignment::kPhysNone)
763 ASMJIT_PROPAGATE(onKillReg(group, workId, assignedId));
764
765 uint32_t physId = tiedReg->outId();
766 if (physId == RAAssignment::kPhysNone) {
767 uint32_t allocableRegs = _availableRegs[group] & ~(outRegs | avoidRegs);
768
769 if (!(allocableRegs & ~liveRegs)) {
770 // There are no more registers, decide which one to spill.
771 uint32_t spillWorkId;
772 physId = decideOnSpillFor(group, workId, allocableRegs & liveRegs, &spillWorkId);
773 ASMJIT_PROPAGATE(onSpillReg(group, spillWorkId, physId));
774 }
775 else {
776 physId = decideOnAssignment(group, workId, RAAssignment::kPhysNone, allocableRegs & ~liveRegs);
777 }
778 }
779
780 // OUTs are CLOBBERed thus cannot be ASSIGNed right now.
781 ASMJIT_ASSERT(!_curAssignment.isPhysAssigned(group, physId));
782
783 if (!tiedReg->isKill())
784 ASMJIT_PROPAGATE(onAssignReg(group, workId, physId, true));
785
786 tiedReg->setOutId(physId);
787 tiedReg->markOutDone();
788
789 outRegs |= Support::bitMask(physId);
790 liveRegs &= ~Support::bitMask(physId);
791 outPending--;
792 }
793
794 clobberedByInst |= outRegs;
795 ASMJIT_ASSERT(outPending == 0);
796 }
797
798 _clobberedRegs[group] |= clobberedByInst;
799 }
800
801 return kErrorOk;
802 }
803
804 Error RALocalAllocator::spillAfterAllocation(InstNode* node) noexcept {
805 // This is experimental feature that would spill registers that don't have
806 // home-id and are last in this basic block. This prevents saving these regs
807 // in other basic blocks and then restoring them (mostly relevant for loops).
808 RAInst* raInst = node->passData<RAInst>();
809 uint32_t count = raInst->tiedCount();
810
811 for (uint32_t i = 0; i < count; i++) {
812 RATiedReg* tiedReg = raInst->tiedAt(i);
813 if (tiedReg->isLast()) {
814 uint32_t workId = tiedReg->workId();
815 RAWorkReg* workReg = workRegById(workId);
816 if (!workReg->hasHomeRegId()) {
817 uint32_t group = workReg->group();
818 uint32_t assignedId = _curAssignment.workToPhysId(group, workId);
819 if (assignedId != RAAssignment::kPhysNone) {
820 _cc->_setCursor(node);
821 ASMJIT_PROPAGATE(onSpillReg(group, workId, assignedId));
822 }
823 }
824 }
825 }
826
827 return kErrorOk;
828 }
829
830 Error RALocalAllocator::allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept {
831 // TODO: This should be used to make the branch allocation better.
832 ASMJIT_UNUSED(cont);
833
834 // The cursor must point to the previous instruction for a possible instruction insertion.
835 _cc->_setCursor(node->prev());
836
837 // Use TryMode of `switchToAssignment()` if possible.
838 if (target->hasEntryAssignment()) {
839 ASMJIT_PROPAGATE(switchToAssignment(
840 target->entryPhysToWorkMap(),
841 target->entryWorkToPhysMap(),
842 target->liveIn(),
843 target->isAllocated(),
844 true));
845 }
846
847 ASMJIT_PROPAGATE(allocInst(node));
848
849 if (target->hasEntryAssignment()) {
850 BaseNode* injectionPoint = _pass->extraBlock()->prev();
851 BaseNode* prevCursor = _cc->setCursor(injectionPoint);
852
853 _tmpAssignment.copyFrom(_curAssignment);
854 ASMJIT_PROPAGATE(switchToAssignment(
855 target->entryPhysToWorkMap(),
856 target->entryWorkToPhysMap(),
857 target->liveIn(),
858 target->isAllocated(),
859 false));
860
861 BaseNode* curCursor = _cc->cursor();
862 if (curCursor != injectionPoint) {
863 // Additional instructions emitted to switch from the current state to
864 // the `target`s state. This means that we have to move these instructions
865 // into an independent code block and patch the jump location.
866 Operand& targetOp(node->opType(node->opCount() - 1));
867 if (ASMJIT_UNLIKELY(!targetOp.isLabel()))
868 return DebugUtils::errored(kErrorInvalidState);
869
870 Label trampoline = _cc->newLabel();
871 Label savedTarget = targetOp.as<Label>();
872
873 // Patch `target` to point to the `trampoline` we just created.
874 targetOp = trampoline;
875
876 // Clear a possible SHORT form as we have no clue now if the SHORT form would
877 // be encodable after patching the target to `trampoline` (X86 specific).
878 node->clearInstOptions(BaseInst::kOptionShortForm);
879
880 // Finalize the switch assignment sequence.
881 ASMJIT_PROPAGATE(_pass->onEmitJump(savedTarget));
882 _cc->_setCursor(injectionPoint);
883 _cc->bind(trampoline);
884 }
885
886 _cc->_setCursor(prevCursor);
887 _curAssignment.swap(_tmpAssignment);
888 }
889 else {
890 ASMJIT_PROPAGATE(_pass->setBlockEntryAssignment(target, block(), _curAssignment));
891 }
892
893 return kErrorOk;
894 }
895
896 // ============================================================================
897 // [asmjit::RALocalAllocator - Decision Making]
898 // ============================================================================
899
900 uint32_t RALocalAllocator::decideOnAssignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
901 ASMJIT_UNUSED(group);
902 ASMJIT_UNUSED(physId);
903 ASMJIT_ASSERT(allocableRegs != 0);
904
905 RAWorkReg* workReg = workRegById(workId);
906
907 // HIGHEST PRIORITY: Home register id.
908 if (workReg->hasHomeRegId()) {
909 uint32_t homeId = workReg->homeRegId();
910 if (Support::bitTest(allocableRegs, homeId))
911 return homeId;
912 }
913
914 // HIGH PRIORITY: Register IDs used upon block entries.
915 uint32_t previouslyAssignedRegs = workReg->allocatedMask();
916 if (allocableRegs & previouslyAssignedRegs)
917 allocableRegs &= previouslyAssignedRegs;
918
919 if (Support::isPowerOf2(allocableRegs))
920 return Support::ctz(allocableRegs);
921
922 // TODO: This is not finished.
923 return Support::ctz(allocableRegs);
924 }
925
926 uint32_t RALocalAllocator::decideOnUnassignment(uint32_t group, uint32_t workId, uint32_t physId, uint32_t allocableRegs) const noexcept {
927 ASMJIT_ASSERT(allocableRegs != 0);
928
929 // TODO:
930 ASMJIT_UNUSED(allocableRegs);
931 ASMJIT_UNUSED(group);
932 ASMJIT_UNUSED(workId);
933 ASMJIT_UNUSED(physId);
934
935 // if (!_curAssignment.isPhysDirty(group, physId)) {
936 // }
937
938 // Decided to SPILL.
939 return RAAssignment::kPhysNone;
940 }
941
942 uint32_t RALocalAllocator::decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept {
943 // May be used in the future to decide which register would be best to spill so `workId` can be assigned.
944 ASMJIT_UNUSED(workId);
945 ASMJIT_ASSERT(spillableRegs != 0);
946
947 Support::BitWordIterator<uint32_t> it(spillableRegs);
948 uint32_t bestPhysId = it.next();
949 uint32_t bestWorkId = _curAssignment.physToWorkId(group, bestPhysId);
950
951 // Avoid calculating the cost model if there is only one spillable register.
952 if (it.hasNext()) {
953 uint32_t bestCost = calculateSpillCost(group, bestWorkId, bestPhysId);
954 do {
955 uint32_t localPhysId = it.next();
956 uint32_t localWorkId = _curAssignment.physToWorkId(group, localPhysId);
957 uint32_t localCost = calculateSpillCost(group, localWorkId, localPhysId);
958
959 if (localCost < bestCost) {
960 bestCost = localCost;
961 bestPhysId = localPhysId;
962 bestWorkId = localWorkId;
963 }
964 } while (it.hasNext());
965 }
966
967 *spillWorkId = bestWorkId;
968 return bestPhysId;
969 }
970
971 ASMJIT_END_NAMESPACE
972
973 #endif // !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RALOCAL_P_H
7 #define _ASMJIT_CORE_RALOCAL_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/raassignment_p.h"
13 #include "../core/radefs_p.h"
14 #include "../core/rapass_p.h"
15 #include "../core/support.h"
16
17 ASMJIT_BEGIN_NAMESPACE
18
19 //! \cond INTERNAL
20 //! \addtogroup asmjit_ra
21 //! \{
22
23 // ============================================================================
24 // [asmjit::RALocalAllocator]
25 // ============================================================================
26
27 //! Local register allocator.
28 class RALocalAllocator {
29 public:
30 ASMJIT_NONCOPYABLE(RALocalAllocator)
31
32 typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
33 typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
34
35 //! Link to `RAPass`.
36 RAPass* _pass;
37 //! Link to `BaseCompiler`.
38 BaseCompiler* _cc;
39
40 //! Architecture traits.
41 RAArchTraits _archTraits;
42 //! Registers available to the allocator.
43 RARegMask _availableRegs;
44 //! Registers clobbered by the allocator.
45 RARegMask _clobberedRegs;
46
47 //! Register assignment (current).
48 RAAssignment _curAssignment;
49 //! Register assignment used temporarily during assignment switches.
50 RAAssignment _tmpAssignment;
51
52 //! Link to the current `RABlock`.
53 RABlock* _block;
54 //! InstNode.
55 InstNode* _node;
56 //! RA instruction.
57 RAInst* _raInst;
58
59 //! Count of all TiedReg's.
60 uint32_t _tiedTotal;
61 //! TiedReg's total counter.
62 RARegCount _tiedCount;
63
64 //! \name Construction & Destruction
65 //! \{
66
67 inline RALocalAllocator(RAPass* pass) noexcept
68 : _pass(pass),
69 _cc(pass->cc()),
70 _archTraits(pass->_archTraits),
71 _availableRegs(pass->_availableRegs),
72 _clobberedRegs(),
73 _curAssignment(),
74 _block(nullptr),
75 _node(nullptr),
76 _raInst(nullptr),
77 _tiedTotal(),
78 _tiedCount() {}
79
80 Error init() noexcept;
81
82 //! \}
83
84 //! \name Accessors
85 //! \{
86
87 inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _pass->workRegById(workId); }
88 inline PhysToWorkMap* physToWorkMap() const noexcept { return _curAssignment.physToWorkMap(); }
89 inline WorkToPhysMap* workToPhysMap() const noexcept { return _curAssignment.workToPhysMap(); }
90
91 //! Returns the currently processed block.
92 inline RABlock* block() const noexcept { return _block; }
93 //! Sets the currently processed block.
94 inline void setBlock(RABlock* block) noexcept { _block = block; }
95
96 //! Returns the currently processed `InstNode`.
97 inline InstNode* node() const noexcept { return _node; }
98 //! Returns the currently processed `RAInst`.
99 inline RAInst* raInst() const noexcept { return _raInst; }
100
101 //! Returns all tied regs as `RATiedReg` array.
102 inline RATiedReg* tiedRegs() const noexcept { return _raInst->tiedRegs(); }
103 //! Returns tied registers grouped by the given `group`.
104 inline RATiedReg* tiedRegs(uint32_t group) const noexcept { return _raInst->tiedRegs(group); }
105
106 //! Returns count of all TiedRegs used by the instruction.
107 inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
108 //! Returns count of TiedRegs used by the given register `group`.
109 inline uint32_t tiedCount(uint32_t group) const noexcept { return _tiedCount.get(group); }
110
111 inline bool isGroupUsed(uint32_t group) const noexcept { return _tiedCount[group] != 0; }
112
113 //! \}
114
115 //! \name Assignment
116 //! \{
117
118 Error makeInitialAssignment() noexcept;
119
120 Error replaceAssignment(
121 const PhysToWorkMap* physToWorkMap,
122 const WorkToPhysMap* workToPhysMap) noexcept;
123
124 //! Switch to the given assignment by reassigning all register and emitting
125 //! code that reassigns them. This is always used to switch to a previously
126 //! stored assignment.
127 //!
128 //! If `tryMode` is true then the final assignment doesn't have to be exactly
129 //! same as specified by `dstPhysToWorkMap` and `dstWorkToPhysMap`. This mode
130 //! is only used before conditional jumps that already have assignment to
131 //! generate a code sequence that is always executed regardless of the flow.
132 Error switchToAssignment(
133 PhysToWorkMap* dstPhysToWorkMap,
134 WorkToPhysMap* dstWorkToPhysMap,
135 const ZoneBitVector& liveIn,
136 bool dstReadOnly,
137 bool tryMode) noexcept;
138
139 //! \}
140
141 //! \name Allocation
142 //! \{
143
144 Error allocInst(InstNode* node) noexcept;
145 Error spillAfterAllocation(InstNode* node) noexcept;
146
147 Error allocBranch(InstNode* node, RABlock* target, RABlock* cont) noexcept;
148
149 //! \}
150
151 //! \name Decision Making
152 //! \{
153
154 enum CostModel : uint32_t {
155 kCostOfFrequency = 1048576,
156 kCostOfDirtyFlag = kCostOfFrequency / 4
157 };
158
159 inline uint32_t costByFrequency(float freq) const noexcept {
160 return uint32_t(int32_t(freq * float(kCostOfFrequency)));
161 }
162
163 inline uint32_t calculateSpillCost(uint32_t group, uint32_t workId, uint32_t assignedId) const noexcept {
164 RAWorkReg* workReg = workRegById(workId);
165 uint32_t cost = costByFrequency(workReg->liveStats().freq());
166
167 if (_curAssignment.isPhysDirty(group, assignedId))
168 cost += kCostOfDirtyFlag;
169
170 return cost;
171 }
172
173 //! Decides on register assignment.
174 uint32_t decideOnAssignment(uint32_t group, uint32_t workId, uint32_t assignedId, uint32_t allocableRegs) const noexcept;
175
176 //! Decides on whether to MOVE or SPILL the given WorkReg.
177 //!
178 //! The function must return either `RAAssignment::kPhysNone`, which means that
179 //! the WorkReg should be spilled, or a valid physical register ID, which means
180 //! that the register should be moved to that physical register instead.
181 uint32_t decideOnUnassignment(uint32_t group, uint32_t workId, uint32_t assignedId, uint32_t allocableRegs) const noexcept;
182
183 //! Decides on best spill given a register mask `spillableRegs`
184 uint32_t decideOnSpillFor(uint32_t group, uint32_t workId, uint32_t spillableRegs, uint32_t* spillWorkId) const noexcept;
185
186 //! \}
187
188 //! \name Emit
189 //! \{
190
191 //! Emits a move between a destination and source register, and fixes the
192 //! register assignment.
193 inline Error onMoveReg(uint32_t group, uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
194 if (dstPhysId == srcPhysId) return kErrorOk;
195 _curAssignment.reassign(group, workId, dstPhysId, srcPhysId);
196 return _pass->onEmitMove(workId, dstPhysId, srcPhysId);
197 }
198
199 //! Emits a swap between two physical registers and fixes their assignment.
200 //!
201 //! \note Target must support this operation otherwise this would ASSERT.
202 inline Error onSwapReg(uint32_t group, uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
203 _curAssignment.swap(group, aWorkId, aPhysId, bWorkId, bPhysId);
204 return _pass->onEmitSwap(aWorkId, aPhysId, bWorkId, bPhysId);
205 }
206
207 //! Emits a load from [VirtReg/WorkReg]'s spill slot to a physical register
208 //! and makes it assigned and clean.
209 inline Error onLoadReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
210 _curAssignment.assign(group, workId, physId, RAAssignment::kClean);
211 return _pass->onEmitLoad(workId, physId);
212 }
213
214 //! Emits a save a physical register to a [VirtReg/WorkReg]'s spill slot,
215 //! keeps it assigned, and makes it clean.
216 inline Error onSaveReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
217 ASMJIT_ASSERT(_curAssignment.workToPhysId(group, workId) == physId);
218 ASMJIT_ASSERT(_curAssignment.physToWorkId(group, physId) == workId);
219
220 _curAssignment.makeClean(group, workId, physId);
221 return _pass->onEmitSave(workId, physId);
222 }
223
224 //! Assigns a register, the content of it is undefined at this point.
225 inline Error onAssignReg(uint32_t group, uint32_t workId, uint32_t physId, uint32_t dirty) noexcept {
226 _curAssignment.assign(group, workId, physId, dirty);
227 return kErrorOk;
228 }
229
230 //! Spills a variable/register, saves the content to the memory-home if modified.
231 inline Error onSpillReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
232 if (_curAssignment.isPhysDirty(group, physId))
233 ASMJIT_PROPAGATE(onSaveReg(group, workId, physId));
234 return onKillReg(group, workId, physId);
235 }
236
237 inline Error onDirtyReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
238 _curAssignment.makeDirty(group, workId, physId);
239 return kErrorOk;
240 }
241
242 inline Error onKillReg(uint32_t group, uint32_t workId, uint32_t physId) noexcept {
243 _curAssignment.unassign(group, workId, physId);
244 return kErrorOk;
245 }
246
247 //! \}
248 };
249
250 //! \}
251 //! \endcond
252
253 ASMJIT_END_NAMESPACE
254
255 #endif // !ASMJIT_NO_COMPILER
256 #endif // _ASMJIT_CORE_RALOCAL_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_COMPILER
8
9 #include "../core/ralocal_p.h"
10 #include "../core/rapass_p.h"
11 #include "../core/support.h"
12 #include "../core/type.h"
13 #include "../core/zonestack.h"
14
15 ASMJIT_BEGIN_NAMESPACE
16
17 // ============================================================================
18 // [asmjit::RABlock - Control Flow]
19 // ============================================================================
20
21 Error RABlock::appendSuccessor(RABlock* successor) noexcept {
22 RABlock* predecessor = this;
23
24 if (predecessor->_successors.contains(successor))
25 return kErrorOk;
26 ASMJIT_ASSERT(!successor->_predecessors.contains(predecessor));
27
28 ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
29 ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
30
31 predecessor->_successors.appendUnsafe(successor);
32 successor->_predecessors.appendUnsafe(predecessor);
33
34 return kErrorOk;
35 }
36
37 Error RABlock::prependSuccessor(RABlock* successor) noexcept {
38 RABlock* predecessor = this;
39
40 if (predecessor->_successors.contains(successor))
41 return kErrorOk;
42 ASMJIT_ASSERT(!successor->_predecessors.contains(predecessor));
43
44 ASMJIT_PROPAGATE(successor->_predecessors.willGrow(allocator()));
45 ASMJIT_PROPAGATE(predecessor->_successors.willGrow(allocator()));
46
47 predecessor->_successors.prependUnsafe(successor);
48 successor->_predecessors.prependUnsafe(predecessor);
49
50 return kErrorOk;
51 }
52
53 // ============================================================================
54 // [asmjit::RAPass - Construction / Destruction]
55 // ============================================================================
56
57 RAPass::RAPass() noexcept
58 : FuncPass("RAPass"),
59 _allocator(),
60 _logger(nullptr),
61 _debugLogger(nullptr),
62 _loggerFlags(0),
63 _func(nullptr),
64 _stop(nullptr),
65 _extraBlock(nullptr),
66 _blocks(),
67 _exits(),
68 _pov(),
69 _instructionCount(0),
70 _createdBlockCount(0),
71 _lastTimestamp(0),
72 _archRegsInfo(nullptr),
73 _archTraits(),
74 _physRegIndex(),
75 _physRegCount(),
76 _physRegTotal(0),
77 _availableRegs(),
78 _availableRegCount(),
79 _clobberedRegs(),
80 _globalMaxLiveCount(),
81 _globalLiveSpans {},
82 _temporaryMem(),
83 _sp(),
84 _fp(),
85 _stackAllocator(),
86 _argsAssignment(),
87 _numStackArgsToStackSlots(0),
88 _maxWorkRegNameSize(0) {}
89 RAPass::~RAPass() noexcept {}
90
91 // ============================================================================
92 // [asmjit::RAPass - RunOnFunction]
93 // ============================================================================
94
95 static void RAPass_reset(RAPass* self, FuncDetail* funcDetail) noexcept {
96 ZoneAllocator* allocator = self->allocator();
97
98 self->_blocks.reset();
99 self->_exits.reset();
100 self->_pov.reset();
101 self->_workRegs.reset();
102 self->_instructionCount = 0;
103 self->_createdBlockCount = 0;
104 self->_lastTimestamp = 0;
105
106 self->_archRegsInfo = nullptr;
107 self->_archTraits.reset();
108 self->_physRegIndex.reset();
109 self->_physRegCount.reset();
110 self->_physRegTotal = 0;
111
112 self->_availableRegs.reset();
113 self->_availableRegCount.reset();
114 self->_clobberedRegs.reset();
115
116 self->_workRegs.reset();
117 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
118 self->_workRegsOfGroup[group].reset();
119 self->_strategy[group].reset();
120 self->_globalLiveSpans[group] = nullptr;
121 }
122 self->_globalMaxLiveCount.reset();
123 self->_temporaryMem.reset();
124
125 self->_stackAllocator.reset(allocator);
126 self->_argsAssignment.reset(funcDetail);
127 self->_numStackArgsToStackSlots = 0;
128 self->_maxWorkRegNameSize = 0;
129 }
130
131 static void RAPass_resetVirtRegData(RAPass* self) noexcept {
132 // Zero everything so it cannot be used by accident.
133 for (RAWorkReg* wReg : self->_workRegs) {
134 VirtReg* vReg = wReg->virtReg();
135 vReg->_workReg = nullptr;
136 }
137 }
138
139 Error RAPass::runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept {
140 _allocator.reset(zone);
141
142 #ifndef ASMJIT_NO_LOGGING
143 _logger = logger;
144 _debugLogger = nullptr;
145
146 if (logger) {
147 _loggerFlags = logger->flags();
148 if (_loggerFlags & FormatOptions::kFlagDebugPasses)
149 _debugLogger = logger;
150 }
151 #endif
152
153 // Initialize all core structures to use `zone` and `func`.
154 BaseNode* end = func->endNode();
155 _func = func;
156 _stop = end->next();
157 _extraBlock = end;
158
159 RAPass_reset(this, &_func->_funcDetail);
160
161 // Initialize architecture-specific members.
162 onInit();
163
164 // Perform all allocation steps required.
165 Error err = onPerformAllSteps();
166
167 // Must be called regardless of the allocation status.
168 onDone();
169
170 // TODO: I don't like this...
171 // Reset possible connections introduced by the register allocator.
172 RAPass_resetVirtRegData(this);
173
174 // Reset all core structures and everything that depends on the passed `Zone`.
175 RAPass_reset(this, nullptr);
176 _allocator.reset(nullptr);
177
178 #ifndef ASMJIT_NO_LOGGING
179 _logger = nullptr;
180 _debugLogger = nullptr;
181 _loggerFlags = 0;
182 #endif
183
184 _func = nullptr;
185 _stop = nullptr;
186 _extraBlock = nullptr;
187
188 // Reset `Zone` as nothing should persist between `runOnFunction()` calls.
189 zone->reset();
190
191 // We alter the compiler cursor, because it doesn't make sense to reference
192 // it after the compilation - some nodes may disappear and the old cursor
193 // can go out anyway.
194 cc()->_setCursor(cc()->lastNode());
195
196 return err;
197 }
198
199 Error RAPass::onPerformAllSteps() noexcept {
200 ASMJIT_PROPAGATE(buildCFG());
201 ASMJIT_PROPAGATE(buildViews());
202 ASMJIT_PROPAGATE(removeUnreachableBlocks());
203
204 ASMJIT_PROPAGATE(buildDominators());
205 ASMJIT_PROPAGATE(buildLiveness());
206 ASMJIT_PROPAGATE(assignArgIndexToWorkRegs());
207
208 #ifndef ASMJIT_NO_LOGGING
209 if (logger() && logger()->hasFlag(FormatOptions::kFlagAnnotations))
210 ASMJIT_PROPAGATE(annotateCode());
211 #endif
212
213 ASMJIT_PROPAGATE(runGlobalAllocator());
214 ASMJIT_PROPAGATE(runLocalAllocator());
215
216 ASMJIT_PROPAGATE(updateStackFrame());
217 ASMJIT_PROPAGATE(insertPrologEpilog());
218
219 ASMJIT_PROPAGATE(rewrite());
220
221 return kErrorOk;
222 }
223
224 // ============================================================================
225 // [asmjit::RAPass - CFG - Basic Block Management]
226 // ============================================================================
227
228 RABlock* RAPass::newBlock(BaseNode* initialNode) noexcept {
229 RABlock* block = zone()->newT<RABlock>(this);
230 if (ASMJIT_UNLIKELY(!block))
231 return nullptr;
232
233 block->setFirst(initialNode);
234 block->setLast(initialNode);
235
236 _createdBlockCount++;
237 return block;
238 }
239
240 RABlock* RAPass::newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt) noexcept {
241 if (cbLabel->hasPassData())
242 return cbLabel->passData<RABlock>();
243
244 FuncNode* func = this->func();
245 BaseNode* node = cbLabel->prev();
246 RABlock* block = nullptr;
247
248 // Try to find some label, but terminate the loop on any code. We try hard to
249 // coalesce code that contains two consecutive labels or a combination of
250 // non-code nodes between 2 or more labels.
251 //
252 // Possible cases that would share the same basic block:
253 //
254 // 1. Two or more consecutive labels:
255 // Label1:
256 // Label2:
257 //
258 // 2. Two or more labels separated by non-code nodes:
259 // Label1:
260 // ; Some comment...
261 // .align 16
262 // Label2:
263 size_t nPendingLabels = 0;
264
265 while (node) {
266 if (node->type() == BaseNode::kNodeLabel) {
267 // Function has a different NodeType, just make sure this was not messed
268 // up as we must never associate BasicBlock with a `func` itself.
269 ASMJIT_ASSERT(node != func);
270
271 block = node->passData<RABlock>();
272 if (block) {
273 // Exit node has always a block associated with it. If we went here it
274 // means that `cbLabel` passed here is after the end of the function
275 // and cannot be merged with the function exit block.
276 if (node == func->exitNode())
277 block = nullptr;
278 break;
279 }
280
281 nPendingLabels++;
282 }
283 else if (node->type() == BaseNode::kNodeAlign) {
284 // Align node is fine.
285 }
286 else {
287 break;
288 }
289
290 node = node->prev();
291 }
292
293 if (stoppedAt)
294 *stoppedAt = node;
295
296 if (!block) {
297 block = newBlock();
298 if (ASMJIT_UNLIKELY(!block))
299 return nullptr;
300 }
301
302 cbLabel->setPassData<RABlock>(block);
303 node = cbLabel;
304
305 while (nPendingLabels) {
306 node = node->prev();
307 for (;;) {
308 if (node->type() == BaseNode::kNodeLabel) {
309 node->setPassData<RABlock>(block);
310 nPendingLabels--;
311 break;
312 }
313
314 node = node->prev();
315 ASMJIT_ASSERT(node != nullptr);
316 }
317 }
318
319 if (!block->first()) {
320 block->setFirst(node);
321 block->setLast(cbLabel);
322 }
323
324 return block;
325 }
326
327 Error RAPass::addBlock(RABlock* block) noexcept {
328 ASMJIT_PROPAGATE(_blocks.willGrow(allocator()));
329
330 block->_blockId = blockCount();
331 _blocks.appendUnsafe(block);
332 return kErrorOk;
333 }
334
335 // ============================================================================
336 // [asmjit::RAPass - CFG - Views Order]
337 // ============================================================================
338
339 class RABlockVisitItem {
340 public:
341 inline RABlockVisitItem(RABlock* block, uint32_t index) noexcept
342 : _block(block),
343 _index(index) {}
344
345 inline RABlockVisitItem(const RABlockVisitItem& other) noexcept
346 : _block(other._block),
347 _index(other._index) {}
348
349 inline RABlockVisitItem& operator=(const RABlockVisitItem& other) noexcept = default;
350
351 inline RABlock* block() const noexcept { return _block; }
352 inline uint32_t index() const noexcept { return _index; }
353
354 RABlock* _block;
355 uint32_t _index;
356 };
357
358 Error RAPass::buildViews() noexcept {
359 #ifndef ASMJIT_NO_LOGGING
360 Logger* logger = debugLogger();
361 #endif
362
363 ASMJIT_RA_LOG_FORMAT("[RAPass::BuildViews]\n");
364
365 uint32_t count = blockCount();
366 if (ASMJIT_UNLIKELY(!count)) return kErrorOk;
367
368 ASMJIT_PROPAGATE(_pov.reserve(allocator(), count));
369
370 ZoneStack<RABlockVisitItem> stack;
371 ASMJIT_PROPAGATE(stack.init(allocator()));
372
373 ZoneBitVector visited;
374 ASMJIT_PROPAGATE(visited.resize(allocator(), count));
375
376 RABlock* current = _blocks[0];
377 uint32_t i = 0;
378
379 for (;;) {
380 for (;;) {
381 if (i >= current->successors().size())
382 break;
383
384 // Skip if already visited.
385 RABlock* child = current->successors()[i++];
386 if (visited.bitAt(child->blockId()))
387 continue;
388
389 // Mark as visited to prevent visiting the same block multiple times.
390 visited.setBit(child->blockId(), true);
391
392 // Add the current block on the stack, we will get back to it later.
393 ASMJIT_PROPAGATE(stack.append(RABlockVisitItem(current, i)));
394 current = child;
395 i = 0;
396 }
397
398 current->makeReachable();
399 current->_povOrder = _pov.size();
400 _pov.appendUnsafe(current);
401
402 if (stack.empty())
403 break;
404
405 RABlockVisitItem top = stack.pop();
406 current = top.block();
407 i = top.index();
408 }
409
410 visited.release(allocator());
411 return kErrorOk;
412 }
413
414 // ============================================================================
415 // [asmjit::RAPass - CFG - Dominators]
416 // ============================================================================
417
418 static ASMJIT_INLINE RABlock* intersectBlocks(RABlock* b1, RABlock* b2) noexcept {
419 while (b1 != b2) {
420 while (b2->povOrder() > b1->povOrder()) b1 = b1->iDom();
421 while (b1->povOrder() > b2->povOrder()) b2 = b2->iDom();
422 }
423 return b1;
424 }
425
426 // Based on "A Simple, Fast Dominance Algorithm".
427 Error RAPass::buildDominators() noexcept {
428 #ifndef ASMJIT_NO_LOGGING
429 Logger* logger = debugLogger();
430 #endif
431
432 ASMJIT_RA_LOG_FORMAT("[RAPass::BuildDominators]\n");
433
434 if (_blocks.empty())
435 return kErrorOk;
436
437 RABlock* entryBlock = this->entryBlock();
438 entryBlock->setIDom(entryBlock);
439
440 bool changed = true;
441 uint32_t nIters = 0;
442
443 while (changed) {
444 nIters++;
445 changed = false;
446
447 uint32_t i = _pov.size();
448 while (i) {
449 RABlock* block = _pov[--i];
450 if (block == entryBlock)
451 continue;
452
453 RABlock* iDom = nullptr;
454 const RABlocks& preds = block->predecessors();
455
456 uint32_t j = preds.size();
457 while (j) {
458 RABlock* p = preds[--j];
459 if (!p->iDom()) continue;
460 iDom = !iDom ? p : intersectBlocks(iDom, p);
461 }
462
463 if (block->iDom() != iDom) {
464 ASMJIT_RA_LOG_FORMAT(" IDom of #%u -> #%u\n", block->blockId(), iDom->blockId());
465 block->setIDom(iDom);
466 changed = true;
467 }
468 }
469 }
470
471 ASMJIT_RA_LOG_FORMAT(" Done (%u iterations)\n", nIters);
472 return kErrorOk;
473 }
474
475 bool RAPass::_strictlyDominates(const RABlock* a, const RABlock* b) const noexcept {
476 ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
477 ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
478 ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `strictlyDominates()`.
479
480 // Nothing strictly dominates the entry block.
481 const RABlock* entryBlock = this->entryBlock();
482 if (a == entryBlock)
483 return false;
484
485 const RABlock* iDom = b->iDom();
486 while (iDom != a && iDom != entryBlock)
487 iDom = iDom->iDom();
488
489 return iDom != entryBlock;
490 }
491
492 const RABlock* RAPass::_nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept {
493 ASMJIT_ASSERT(a != nullptr); // There must be at least one block if this function is
494 ASMJIT_ASSERT(b != nullptr); // called, as both `a` and `b` must be valid blocks.
495 ASMJIT_ASSERT(a != b); // Checked by `dominates()` and `properlyDominates()`.
496
497 if (a == b)
498 return a;
499
500 // If `a` strictly dominates `b` then `a` is the nearest common dominator.
501 if (_strictlyDominates(a, b))
502 return a;
503
504 // If `b` strictly dominates `a` then `b` is the nearest common dominator.
505 if (_strictlyDominates(b, a))
506 return b;
507
508 const RABlock* entryBlock = this->entryBlock();
509 uint64_t timestamp = nextTimestamp();
510
511 // Mark all A's dominators.
512 const RABlock* block = a->iDom();
513 while (block != entryBlock) {
514 block->setTimestamp(timestamp);
515 block = block->iDom();
516 }
517
518 // Check all B's dominators against marked dominators of A.
519 block = b->iDom();
520 while (block != entryBlock) {
521 if (block->hasTimestamp(timestamp))
522 return block;
523 block = block->iDom();
524 }
525
526 return entryBlock;
527 }
528
529 // ============================================================================
530 // [asmjit::RAPass - CFG - Utilities]
531 // ============================================================================
532
533 Error RAPass::removeUnreachableBlocks() noexcept {
534 uint32_t numAllBlocks = blockCount();
535 uint32_t numReachableBlocks = reachableBlockCount();
536
537 // All reachable -> nothing to do.
538 if (numAllBlocks == numReachableBlocks)
539 return kErrorOk;
540
541 #ifndef ASMJIT_NO_LOGGING
542 Logger* logger = debugLogger();
543 #endif
544 ASMJIT_RA_LOG_FORMAT("[RAPass::RemoveUnreachableBlocks (%u of %u unreachable)]\n", numAllBlocks - numReachableBlocks, numAllBlocks);
545
546 for (uint32_t i = 0; i < numAllBlocks; i++) {
547 RABlock* block = _blocks[i];
548 if (block->isReachable())
549 continue;
550
551 ASMJIT_RA_LOG_FORMAT(" Removing block {%u}\n", i);
552 BaseNode* first = block->first();
553 BaseNode* last = block->last();
554
555 BaseNode* beforeFirst = first->prev();
556 BaseNode* afterLast = last->next();
557
558 BaseNode* node = first;
559 while (node != afterLast) {
560 BaseNode* next = node->next();
561
562 if (node->isCode() || node->isRemovable())
563 cc()->removeNode(node);
564 node = next;
565 }
566
567 if (beforeFirst->next() == afterLast) {
568 block->setFirst(nullptr);
569 block->setLast(nullptr);
570 }
571 else {
572 block->setFirst(beforeFirst->next());
573 block->setLast(afterLast->prev());
574 }
575 }
576
577 return kErrorOk;
578 }
579
580 BaseNode* RAPass::findSuccessorStartingAt(BaseNode* node) noexcept {
581 while (node && (node->isInformative() || node->hasNoEffect()))
582 node = node->next();
583 return node;
584 }
585
586 bool RAPass::isNextTo(BaseNode* node, BaseNode* target) noexcept {
587 for (;;) {
588 node = node->next();
589 if (node == target)
590 return true;
591
592 if (!node)
593 return false;
594
595 if (node->isCode() || node->isData())
596 return false;
597 }
598 }
599
600 // ============================================================================
601 // [asmjit::RAPass - ?]
602 // ============================================================================
603
604 Error RAPass::_asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
605 // Checked by `asWorkReg()` - must be true.
606 ASMJIT_ASSERT(vReg->_workReg == nullptr);
607
608 uint32_t group = vReg->group();
609 ASMJIT_ASSERT(group < BaseReg::kGroupVirt);
610
611 RAWorkRegs& wRegs = workRegs();
612 RAWorkRegs& wRegsByGroup = workRegs(group);
613
614 ASMJIT_PROPAGATE(wRegs.willGrow(allocator()));
615 ASMJIT_PROPAGATE(wRegsByGroup.willGrow(allocator()));
616
617 RAWorkReg* wReg = zone()->newT<RAWorkReg>(vReg, wRegs.size());
618 if (ASMJIT_UNLIKELY(!wReg))
619 return DebugUtils::errored(kErrorOutOfMemory);
620
621 vReg->setWorkReg(wReg);
622 if (!vReg->isStack())
623 wReg->setRegByteMask(Support::lsbMask<uint64_t>(vReg->virtSize()));
624 wRegs.appendUnsafe(wReg);
625 wRegsByGroup.appendUnsafe(wReg);
626
627 // Only used by RA logging.
628 _maxWorkRegNameSize = Support::max(_maxWorkRegNameSize, vReg->nameSize());
629
630 *out = wReg;
631 return kErrorOk;
632 }
633
634 RAAssignment::WorkToPhysMap* RAPass::newWorkToPhysMap() noexcept {
635 uint32_t count = workRegCount();
636 size_t size = WorkToPhysMap::sizeOf(count);
637
638 // If no registers are used it could be zero, in that case return a dummy
639 // map instead of NULL.
640 if (ASMJIT_UNLIKELY(!size)) {
641 static const RAAssignment::WorkToPhysMap nullMap = {{ 0 }};
642 return const_cast<RAAssignment::WorkToPhysMap*>(&nullMap);
643 }
644
645 WorkToPhysMap* map = zone()->allocT<WorkToPhysMap>(size);
646 if (ASMJIT_UNLIKELY(!map))
647 return nullptr;
648
649 map->reset(count);
650 return map;
651 }
652
653 RAAssignment::PhysToWorkMap* RAPass::newPhysToWorkMap() noexcept {
654 uint32_t count = physRegTotal();
655 size_t size = PhysToWorkMap::sizeOf(count);
656
657 PhysToWorkMap* map = zone()->allocT<PhysToWorkMap>(size);
658 if (ASMJIT_UNLIKELY(!map))
659 return nullptr;
660
661 map->reset(count);
662 return map;
663 }
664
665 // ============================================================================
666 // [asmjit::RAPass - Registers - Liveness Analysis and Statistics]
667 // ============================================================================
668
669 namespace LiveOps {
670 typedef ZoneBitVector::BitWord BitWord;
671
672 struct In {
673 static ASMJIT_INLINE BitWord op(BitWord dst, BitWord out, BitWord gen, BitWord kill) noexcept {
674 ASMJIT_UNUSED(dst);
675 return (out | gen) & ~kill;
676 }
677 };
678
679 template<typename Operator>
680 static ASMJIT_INLINE bool op(BitWord* dst, const BitWord* a, uint32_t n) noexcept {
681 BitWord changed = 0;
682
683 for (uint32_t i = 0; i < n; i++) {
684 BitWord before = dst[i];
685 BitWord after = Operator::op(before, a[i]);
686
687 dst[i] = after;
688 changed |= (before ^ after);
689 }
690
691 return changed != 0;
692 }
693
694 template<typename Operator>
695 static ASMJIT_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, uint32_t n) noexcept {
696 BitWord changed = 0;
697
698 for (uint32_t i = 0; i < n; i++) {
699 BitWord before = dst[i];
700 BitWord after = Operator::op(before, a[i], b[i]);
701
702 dst[i] = after;
703 changed |= (before ^ after);
704 }
705
706 return changed != 0;
707 }
708
709 template<typename Operator>
710 static ASMJIT_INLINE bool op(BitWord* dst, const BitWord* a, const BitWord* b, const BitWord* c, uint32_t n) noexcept {
711 BitWord changed = 0;
712
713 for (uint32_t i = 0; i < n; i++) {
714 BitWord before = dst[i];
715 BitWord after = Operator::op(before, a[i], b[i], c[i]);
716
717 dst[i] = after;
718 changed |= (before ^ after);
719 }
720
721 return changed != 0;
722 }
723
724 static ASMJIT_INLINE bool recalcInOut(RABlock* block, uint32_t numBitWords, bool initial = false) noexcept {
725 bool changed = initial;
726
727 const RABlocks& successors = block->successors();
728 uint32_t numSuccessors = successors.size();
729
730 // Calculate `OUT` based on `IN` of all successors.
731 for (uint32_t i = 0; i < numSuccessors; i++)
732 changed |= op<Support::Or>(block->liveOut().data(), successors[i]->liveIn().data(), numBitWords);
733
734 // Calculate `IN` based on `OUT`, `GEN`, and `KILL` bits.
735 if (changed)
736 changed = op<In>(block->liveIn().data(), block->liveOut().data(), block->gen().data(), block->kill().data(), numBitWords);
737
738 return changed;
739 }
740 }
741
742 ASMJIT_FAVOR_SPEED Error RAPass::buildLiveness() noexcept {
743 #ifndef ASMJIT_NO_LOGGING
744 Logger* logger = debugLogger();
745 StringTmp<512> sb;
746 #endif
747
748 ASMJIT_RA_LOG_FORMAT("[RAPass::BuildLiveness]\n");
749
750 uint32_t i;
751
752 uint32_t numAllBlocks = blockCount();
753 uint32_t numReachableBlocks = reachableBlockCount();
754
755 uint32_t numVisits = numReachableBlocks;
756 uint32_t numWorkRegs = workRegCount();
757 uint32_t numBitWords = ZoneBitVector::_wordsPerBits(numWorkRegs);
758
759 if (!numWorkRegs) {
760 ASMJIT_RA_LOG_FORMAT(" Done (no virtual registers)\n");
761 return kErrorOk;
762 }
763
764 ZoneVector<uint32_t> nUsesPerWorkReg; // Number of USEs of each RAWorkReg.
765 ZoneVector<uint32_t> nOutsPerWorkReg; // Number of OUTs of each RAWorkReg.
766 ZoneVector<uint32_t> nInstsPerBlock; // Number of instructions of each RABlock.
767
768 ASMJIT_PROPAGATE(nUsesPerWorkReg.resize(allocator(), numWorkRegs));
769 ASMJIT_PROPAGATE(nOutsPerWorkReg.resize(allocator(), numWorkRegs));
770 ASMJIT_PROPAGATE(nInstsPerBlock.resize(allocator(), numAllBlocks));
771
772 // --------------------------------------------------------------------------
773 // Calculate GEN/KILL of each block.
774 // --------------------------------------------------------------------------
775
776 for (i = 0; i < numReachableBlocks; i++) {
777 RABlock* block = _pov[i];
778 ASMJIT_PROPAGATE(block->resizeLiveBits(numWorkRegs));
779
780 BaseNode* node = block->last();
781 BaseNode* stop = block->first();
782
783 uint32_t nInsts = 0;
784 for (;;) {
785 if (node->isInst()) {
786 InstNode* inst = node->as<InstNode>();
787 RAInst* raInst = inst->passData<RAInst>();
788 ASMJIT_ASSERT(raInst != nullptr);
789
790 RATiedReg* tiedRegs = raInst->tiedRegs();
791 uint32_t count = raInst->tiedCount();
792
793 for (uint32_t j = 0; j < count; j++) {
794 RATiedReg* tiedReg = &tiedRegs[j];
795 uint32_t workId = tiedReg->workId();
796
797 // Update `nUses` and `nOuts`.
798 nUsesPerWorkReg[workId] += 1u;
799 nOutsPerWorkReg[workId] += uint32_t(tiedReg->isWrite());
800
801 // Mark as:
802 // KILL - if this VirtReg is killed afterwards.
803 // LAST - if this VirtReg is last in this basic block.
804 if (block->kill().bitAt(workId))
805 tiedReg->addFlags(RATiedReg::kKill);
806 else if (!block->gen().bitAt(workId))
807 tiedReg->addFlags(RATiedReg::kLast);
808
809 if (tiedReg->isWriteOnly()) {
810 // KILL.
811 block->kill().setBit(workId, true);
812 }
813 else {
814 // GEN.
815 block->kill().setBit(workId, false);
816 block->gen().setBit(workId, true);
817 }
818 }
819
820 nInsts++;
821 }
822
823 if (node == stop)
824 break;
825
826 node = node->prev();
827 ASMJIT_ASSERT(node != nullptr);
828 }
829
830 nInstsPerBlock[block->blockId()] = nInsts;
831 }
832
833 // --------------------------------------------------------------------------
834 // Calculate IN/OUT of each block.
835 // --------------------------------------------------------------------------
836
837 {
838 ZoneStack<RABlock*> workList;
839 ZoneBitVector workBits;
840
841 ASMJIT_PROPAGATE(workList.init(allocator()));
842 ASMJIT_PROPAGATE(workBits.resize(allocator(), blockCount(), true));
843
844 for (i = 0; i < numReachableBlocks; i++) {
845 RABlock* block = _pov[i];
846 LiveOps::recalcInOut(block, numBitWords, true);
847 ASMJIT_PROPAGATE(workList.append(block));
848 }
849
850 while (!workList.empty()) {
851 RABlock* block = workList.popFirst();
852 uint32_t blockId = block->blockId();
853
854 workBits.setBit(blockId, false);
855 if (LiveOps::recalcInOut(block, numBitWords)) {
856 const RABlocks& predecessors = block->predecessors();
857 uint32_t numPredecessors = predecessors.size();
858
859 for (uint32_t j = 0; j < numPredecessors; j++) {
860 RABlock* pred = predecessors[j];
861 if (!workBits.bitAt(pred->blockId())) {
862 workBits.setBit(pred->blockId(), true);
863 ASMJIT_PROPAGATE(workList.append(pred));
864 }
865 }
866 }
867 numVisits++;
868 }
869
870 workList.reset();
871 workBits.release(allocator());
872 }
873
874 ASMJIT_RA_LOG_COMPLEX({
875 logger->logf(" LiveIn/Out Done (%u visits)\n", numVisits);
876 for (i = 0; i < numAllBlocks; i++) {
877 RABlock* block = _blocks[i];
878
879 ASMJIT_PROPAGATE(sb.assignFormat(" {#%u}\n", block->blockId()));
880 ASMJIT_PROPAGATE(_dumpBlockLiveness(sb, block));
881
882 logger->log(sb);
883 }
884 });
885
886 // --------------------------------------------------------------------------
887 // Reserve the space in each `RAWorkReg` for references.
888 // --------------------------------------------------------------------------
889
890 for (i = 0; i < numWorkRegs; i++) {
891 RAWorkReg* workReg = workRegById(i);
892 ASMJIT_PROPAGATE(workReg->_refs.reserve(allocator(), nUsesPerWorkReg[i]));
893 ASMJIT_PROPAGATE(workReg->_writes.reserve(allocator(), nOutsPerWorkReg[i]));
894 }
895
896 // --------------------------------------------------------------------------
897 // Assign block and instruction positions, build LiveCount and LiveSpans.
898 // --------------------------------------------------------------------------
899
900 uint32_t position = 2;
901 for (i = 0; i < numAllBlocks; i++) {
902 RABlock* block = _blocks[i];
903 if (!block->isReachable())
904 continue;
905
906 BaseNode* node = block->first();
907 BaseNode* stop = block->last();
908
909 uint32_t endPosition = position + nInstsPerBlock[i] * 2;
910 block->setFirstPosition(position);
911 block->setEndPosition(endPosition);
912
913 RALiveCount curLiveCount;
914 RALiveCount maxLiveCount;
915
916 // Process LIVE-IN.
917 ZoneBitVector::ForEachBitSet it(block->liveIn());
918 while (it.hasNext()) {
919 RAWorkReg* workReg = _workRegs[uint32_t(it.next())];
920 curLiveCount[workReg->group()]++;
921 ASMJIT_PROPAGATE(workReg->liveSpans().openAt(allocator(), position, endPosition));
922 }
923
924 for (;;) {
925 if (node->isInst()) {
926 InstNode* inst = node->as<InstNode>();
927 RAInst* raInst = inst->passData<RAInst>();
928 ASMJIT_ASSERT(raInst != nullptr);
929
930 RATiedReg* tiedRegs = raInst->tiedRegs();
931 uint32_t count = raInst->tiedCount();
932
933 inst->setPosition(position);
934 raInst->_liveCount = curLiveCount;
935
936 for (uint32_t j = 0; j < count; j++) {
937 RATiedReg* tiedReg = &tiedRegs[j];
938 uint32_t workId = tiedReg->workId();
939
940 // Create refs and writes.
941 RAWorkReg* workReg = workRegById(workId);
942 workReg->_refs.appendUnsafe(node);
943 if (tiedReg->isWrite())
944 workReg->_writes.appendUnsafe(node);
945
946 // We couldn't calculate this in previous steps, but since we know all LIVE-OUT
947 // at this point it becomes trivial. If this is the last instruction that uses
948 // this `workReg` and it's not LIVE-OUT then it is KILLed here.
949 if (tiedReg->isLast() && !block->liveOut().bitAt(workId))
950 tiedReg->addFlags(RATiedReg::kKill);
951
952 LiveRegSpans& liveSpans = workReg->liveSpans();
953 bool wasOpen;
954 ASMJIT_PROPAGATE(liveSpans.openAt(allocator(), position + !tiedReg->isRead(), endPosition, wasOpen));
955
956 uint32_t group = workReg->group();
957 if (!wasOpen) {
958 curLiveCount[group]++;
959 raInst->_liveCount[group]++;
960 }
961
962 if (tiedReg->isKill()) {
963 liveSpans.closeAt(position + !tiedReg->isRead() + 1);
964 curLiveCount[group]--;
965 }
966
967 // Update `RAWorkReg::hintRegId`.
968 if (tiedReg->hasUseId() && !workReg->hasHintRegId()) {
969 uint32_t useId = tiedReg->useId();
970 if (!(raInst->_clobberedRegs[group] & Support::bitMask(useId)))
971 workReg->setHintRegId(useId);
972 }
973
974 // Update `RAWorkReg::clobberedSurvivalMask`.
975 if (raInst->_clobberedRegs[group] && !tiedReg->isOutOrKill())
976 workReg->addClobberSurvivalMask(raInst->_clobberedRegs[group]);
977 }
978
979 position += 2;
980 maxLiveCount.op<Support::Max>(raInst->_liveCount);
981 }
982
983 if (node == stop)
984 break;
985
986 node = node->next();
987 ASMJIT_ASSERT(node != nullptr);
988 }
989
990 block->_maxLiveCount = maxLiveCount;
991 _globalMaxLiveCount.op<Support::Max>(maxLiveCount);
992 ASMJIT_ASSERT(position == block->endPosition());
993 }
994
995 // --------------------------------------------------------------------------
996 // Calculate WorkReg statistics.
997 // --------------------------------------------------------------------------
998
999 for (i = 0; i < numWorkRegs; i++) {
1000 RAWorkReg* workReg = _workRegs[i];
1001
1002 LiveRegSpans& spans = workReg->liveSpans();
1003 uint32_t width = spans.width();
1004 float freq = width ? float(double(workReg->_refs.size()) / double(width)) : float(0);
1005
1006 RALiveStats& stats = workReg->liveStats();
1007 stats._width = width;
1008 stats._freq = freq;
1009 stats._priority = freq + float(int(workReg->virtReg()->weight())) * 0.01f;
1010 }
1011
1012 ASMJIT_RA_LOG_COMPLEX({
1013 sb.clear();
1014 _dumpLiveSpans(sb);
1015 logger->log(sb);
1016 });
1017
1018 nUsesPerWorkReg.release(allocator());
1019 nOutsPerWorkReg.release(allocator());
1020 nInstsPerBlock.release(allocator());
1021
1022 return kErrorOk;
1023 }
1024
1025 Error RAPass::assignArgIndexToWorkRegs() noexcept {
1026 ZoneBitVector& liveIn = entryBlock()->liveIn();
1027 uint32_t argCount = func()->argCount();
1028
1029 for (uint32_t i = 0; i < argCount; i++) {
1030 // Unassigned argument.
1031 VirtReg* virtReg = func()->arg(i);
1032 if (!virtReg) continue;
1033
1034 // Unreferenced argument.
1035 RAWorkReg* workReg = virtReg->workReg();
1036 if (!workReg) continue;
1037
1038 // Overwritten argument.
1039 uint32_t workId = workReg->workId();
1040 if (!liveIn.bitAt(workId))
1041 continue;
1042
1043 workReg->setArgIndex(i);
1044
1045 const FuncValue& arg = func()->detail().arg(i);
1046 if (arg.isReg() && _archRegsInfo->regInfo[arg.regType()].group() == workReg->group()) {
1047 workReg->setHintRegId(arg.regId());
1048 }
1049 }
1050
1051 return kErrorOk;
1052 }
1053 // ============================================================================
1054 // [asmjit::RAPass - Allocation - Global]
1055 // ============================================================================
1056
1057 static void RAPass_dumpSpans(String& sb, uint32_t index, const LiveRegSpans& liveSpans) noexcept {
1058 sb.appendFormat(" %02u: ", index);
1059
1060 for (uint32_t i = 0; i < liveSpans.size(); i++) {
1061 const LiveRegSpan& liveSpan = liveSpans[i];
1062 if (i) sb.appendString(", ");
1063 sb.appendFormat("[%u:%u@%u]", liveSpan.a, liveSpan.b, liveSpan.id);
1064 }
1065
1066 sb.appendChar('\n');
1067 }
1068
1069 Error RAPass::runGlobalAllocator() noexcept {
1070 ASMJIT_PROPAGATE(initGlobalLiveSpans());
1071
1072 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
1073 ASMJIT_PROPAGATE(binPack(group));
1074 }
1075
1076 return kErrorOk;
1077 }
1078
1079 ASMJIT_FAVOR_SPEED Error RAPass::initGlobalLiveSpans() noexcept {
1080 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
1081 size_t physCount = _physRegCount[group];
1082 LiveRegSpans* liveSpans = allocator()->allocT<LiveRegSpans>(physCount * sizeof(LiveRegSpans));
1083
1084 if (ASMJIT_UNLIKELY(!liveSpans))
1085 return DebugUtils::errored(kErrorOutOfMemory);
1086
1087 for (size_t physId = 0; physId < physCount; physId++)
1088 new(&liveSpans[physId]) LiveRegSpans();
1089
1090 _globalLiveSpans[group] = liveSpans;
1091 }
1092
1093 return kErrorOk;
1094 }
1095
1096 ASMJIT_FAVOR_SPEED Error RAPass::binPack(uint32_t group) noexcept {
1097 if (workRegCount(group) == 0)
1098 return kErrorOk;
1099
1100 #ifndef ASMJIT_NO_LOGGING
1101 Logger* logger = debugLogger();
1102 StringTmp<512> sb;
1103 #endif
1104
1105 ASMJIT_RA_LOG_FORMAT("[RAPass::BinPack] Available=%u (0x%08X) Count=%u\n",
1106 Support::popcnt(_availableRegs[group]),
1107 _availableRegs[group],
1108 workRegCount(group));
1109
1110 uint32_t i;
1111 uint32_t physCount = _physRegCount[group];
1112
1113 RAWorkRegs workRegs;
1114 LiveRegSpans tmpSpans;
1115
1116 ASMJIT_PROPAGATE(workRegs.concat(allocator(), this->workRegs(group)));
1117 workRegs.sort([](const RAWorkReg* a, const RAWorkReg* b) noexcept {
1118 return b->liveStats().priority() - a->liveStats().priority();
1119 });
1120
1121 uint32_t numWorkRegs = workRegs.size();
1122 uint32_t availableRegs = _availableRegs[group];
1123
1124 // First try to pack everything that provides register-id hint as these are
1125 // most likely function arguments and fixed (precolored) virtual registers.
1126 if (!workRegs.empty()) {
1127 uint32_t dstIndex = 0;
1128
1129 for (i = 0; i < numWorkRegs; i++) {
1130 RAWorkReg* workReg = workRegs[i];
1131 if (workReg->hasHintRegId()) {
1132 uint32_t physId = workReg->hintRegId();
1133 if (availableRegs & Support::bitMask(physId)) {
1134 LiveRegSpans& live = _globalLiveSpans[group][physId];
1135 Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1136
1137 if (err == kErrorOk) {
1138 workReg->setHomeRegId(physId);
1139 live.swap(tmpSpans);
1140 continue;
1141 }
1142
1143 if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1144 return err;
1145 }
1146 }
1147
1148 workRegs[dstIndex++] = workReg;
1149 }
1150
1151 workRegs._setSize(dstIndex);
1152 numWorkRegs = dstIndex;
1153 }
1154
1155 // Try to pack the rest.
1156 if (!workRegs.empty()) {
1157 uint32_t dstIndex = 0;
1158
1159 for (i = 0; i < numWorkRegs; i++) {
1160 RAWorkReg* workReg = workRegs[i];
1161 uint32_t physRegs = availableRegs;
1162
1163 while (physRegs) {
1164 uint32_t physId = Support::ctz(physRegs);
1165 if (workReg->clobberSurvivalMask()) {
1166 uint32_t preferredMask = physRegs & workReg->clobberSurvivalMask();
1167 if (preferredMask)
1168 physId = Support::ctz(preferredMask);
1169 }
1170
1171 LiveRegSpans& live = _globalLiveSpans[group][physId];
1172 Error err = tmpSpans.nonOverlappingUnionOf(allocator(), live, workReg->liveSpans(), LiveRegData(workReg->virtId()));
1173
1174 if (err == kErrorOk) {
1175 workReg->setHomeRegId(physId);
1176 live.swap(tmpSpans);
1177 break;
1178 }
1179
1180 if (ASMJIT_UNLIKELY(err != 0xFFFFFFFFu))
1181 return err;
1182
1183 physRegs ^= Support::bitMask(physId);
1184 }
1185
1186 // Keep it in `workRegs` if it was not allocated.
1187 if (!physRegs)
1188 workRegs[dstIndex++] = workReg;
1189 }
1190
1191 workRegs._setSize(dstIndex);
1192 numWorkRegs = dstIndex;
1193 }
1194
1195 ASMJIT_RA_LOG_COMPLEX({
1196 for (uint32_t physId = 0; physId < physCount; physId++) {
1197 LiveRegSpans& live = _globalLiveSpans[group][physId];
1198 if (live.empty())
1199 continue;
1200
1201 sb.clear();
1202 RAPass_dumpSpans(sb, physId, live);
1203 logger->log(sb);
1204 }
1205 });
1206
1207 if (workRegs.empty()) {
1208 ASMJIT_RA_LOG_FORMAT(" Completed.\n");
1209 }
1210 else {
1211 _strategy[group].setType(RAStrategy::kStrategyComplex);
1212 for (RAWorkReg* workReg : workRegs)
1213 workReg->markStackPreferred();
1214
1215 ASMJIT_RA_LOG_COMPLEX({
1216 uint32_t count = workRegs.size();
1217 sb.clear();
1218 sb.appendFormat(" Unassigned (%u): ", count);
1219 for (i = 0; i < numWorkRegs; i++) {
1220 RAWorkReg* workReg = workRegs[i];
1221 if (i) sb.appendString(", ");
1222 sb.appendString(workReg->name());
1223 }
1224 sb.appendChar('\n');
1225 logger->log(sb);
1226 });
1227 }
1228
1229 return kErrorOk;
1230 }
1231
1232 // ============================================================================
1233 // [asmjit::RAPass - Allocation - Local]
1234 // ============================================================================
1235
1236 Error RAPass::runLocalAllocator() noexcept {
1237 RALocalAllocator lra(this);
1238 ASMJIT_PROPAGATE(lra.init());
1239
1240 if (!blockCount())
1241 return kErrorOk;
1242
1243 // The allocation is done when this reaches zero.
1244 uint32_t blocksRemaining = reachableBlockCount();
1245
1246 // Current block.
1247 uint32_t blockId = 0;
1248 RABlock* block = _blocks[blockId];
1249
1250 // The first block (entry) must always be reachable.
1251 ASMJIT_ASSERT(block->isReachable());
1252
1253 // Assign function arguments for the initial block. The `lra` is valid now.
1254 lra.makeInitialAssignment();
1255 ASMJIT_PROPAGATE(setBlockEntryAssignment(block, block, lra._curAssignment));
1256
1257 // The loop starts from the first block and iterates blocks in order, however,
1258 // the algorithm also allows to jump to any other block when finished if it's
1259 // a jump target. In-order iteration just makes sure that all blocks are visited.
1260 for (;;) {
1261 BaseNode* first = block->first();
1262 BaseNode* last = block->last();
1263 BaseNode* terminator = block->hasTerminator() ? last : nullptr;
1264
1265 BaseNode* beforeFirst = first->prev();
1266 BaseNode* afterLast = last->next();
1267
1268 bool unconditionalJump = false;
1269 RABlock* consecutive = nullptr;
1270
1271 if (block->hasSuccessors())
1272 consecutive = block->successors()[0];
1273
1274 lra.setBlock(block);
1275 block->makeAllocated();
1276
1277 BaseNode* node = first;
1278 while (node != afterLast) {
1279 BaseNode* next = node->next();
1280 if (node->isInst()) {
1281 InstNode* inst = node->as<InstNode>();
1282
1283 if (ASMJIT_UNLIKELY(inst == terminator)) {
1284 const RABlocks& successors = block->successors();
1285 if (block->hasConsecutive()) {
1286 ASMJIT_PROPAGATE(lra.allocBranch(inst, successors.last(), successors.first()));
1287
1288 node = next;
1289 continue;
1290 }
1291 else if (successors.size() > 1) {
1292 // TODO: Jump table.
1293 ASMJIT_ASSERT(false);
1294 }
1295 else {
1296 // Otherwise this is an unconditional jump, special handling isn't required.
1297 unconditionalJump = true;
1298 }
1299 }
1300
1301 ASMJIT_PROPAGATE(lra.allocInst(inst));
1302 if (inst->type() == BaseNode::kNodeFuncCall)
1303 ASMJIT_PROPAGATE(onEmitPreCall(inst->as<FuncCallNode>()));
1304 else
1305 ASMJIT_PROPAGATE(lra.spillAfterAllocation(inst));
1306 }
1307 node = next;
1308 }
1309
1310 if (consecutive) {
1311 if (consecutive->hasEntryAssignment()) {
1312 BaseNode* prev = afterLast ? afterLast->prev() : cc()->lastNode();
1313 cc()->_setCursor(unconditionalJump ? prev->prev() : prev);
1314
1315 ASMJIT_PROPAGATE(
1316 lra.switchToAssignment(
1317 consecutive->entryPhysToWorkMap(),
1318 consecutive->entryWorkToPhysMap(),
1319 consecutive->liveIn(),
1320 consecutive->isAllocated(),
1321 false));
1322 }
1323 else {
1324 ASMJIT_PROPAGATE(setBlockEntryAssignment(consecutive, block, lra._curAssignment));
1325 lra._curAssignment.copyFrom(consecutive->entryPhysToWorkMap(), consecutive->entryWorkToPhysMap());
1326 }
1327 }
1328
1329 // Important as the local allocator can insert instructions before
1330 // and after any instruction within the basic block.
1331 block->setFirst(beforeFirst->next());
1332 block->setLast(afterLast ? afterLast->prev() : cc()->lastNode());
1333
1334 if (--blocksRemaining == 0)
1335 break;
1336
1337 // Switch to the next consecutive block, if any.
1338 if (consecutive) {
1339 block = consecutive;
1340 if (!block->isAllocated())
1341 continue;
1342 }
1343
1344 // Get the next block.
1345 for (;;) {
1346 if (++blockId >= blockCount())
1347 blockId = 0;
1348
1349 block = _blocks[blockId];
1350 if (!block->isReachable() || block->isAllocated() || !block->hasEntryAssignment())
1351 continue;
1352
1353 break;
1354 }
1355
1356 // If we switched to some block we have to update `lra`.
1357 lra.replaceAssignment(block->entryPhysToWorkMap(), block->entryWorkToPhysMap());
1358 }
1359
1360 _clobberedRegs.op<Support::Or>(lra._clobberedRegs);
1361 return kErrorOk;
1362 }
1363
1364 Error RAPass::setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept {
1365 PhysToWorkMap* physToWorkMap = clonePhysToWorkMap(fromAssignment.physToWorkMap());
1366 WorkToPhysMap* workToPhysMap = cloneWorkToPhysMap(fromAssignment.workToPhysMap());
1367
1368 if (ASMJIT_UNLIKELY(!physToWorkMap || !workToPhysMap))
1369 return DebugUtils::errored(kErrorOutOfMemory);
1370
1371 block->setEntryAssignment(physToWorkMap, workToPhysMap);
1372
1373 // Must be first block, otherwise impossible.
1374 if (block == fromBlock)
1375 return kErrorOk;
1376
1377 const ZoneBitVector& liveOut = fromBlock->liveOut();
1378 const ZoneBitVector& liveIn = block->liveIn();
1379
1380 RAAssignment as;
1381 as.initLayout(_physRegCount, workRegs());
1382 as.initMaps(physToWorkMap, workToPhysMap);
1383
1384 // It's possible that `fromBlock` has LIVE-OUT regs that `block` doesn't
1385 // have in LIVE-IN, these have to be unassigned.
1386 {
1387 ZoneBitVector::ForEachBitOp<Support::AndNot> it(liveOut, liveIn);
1388 while (it.hasNext()) {
1389 uint32_t workId = uint32_t(it.next());
1390 RAWorkReg* workReg = workRegById(workId);
1391
1392 uint32_t group = workReg->group();
1393 uint32_t physId = as.workToPhysId(group, workId);
1394
1395 if (physId != RAAssignment::kPhysNone)
1396 as.unassign(group, workId, physId);
1397 }
1398 }
1399
1400 // Complex allocation strategy: Record register assignments upon block entry.
1401 {
1402 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
1403 if (_strategy[group].isComplex()) {
1404 Support::BitWordIterator<uint32_t> it(as.assigned(group));
1405 while (it.hasNext()) {
1406 uint32_t physId = it.next();
1407 uint32_t workId = as.physToWorkId(group, physId);
1408
1409 RAWorkReg* workReg = workRegById(workId);
1410 workReg->addAllocatedMask(Support::bitMask(physId));
1411 }
1412 }
1413 }
1414 }
1415
1416 return kErrorOk;
1417 }
1418
1419 // ============================================================================
1420 // [asmjit::RAPass - Allocation - Utilities]
1421 // ============================================================================
1422
1423 Error RAPass::useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept {
1424 ASMJIT_ASSERT(alignment <= 64);
1425
1426 if (_temporaryMem.isNone()) {
1427 ASMJIT_PROPAGATE(cc()->_newStack(_temporaryMem.as<BaseMem>(), size, alignment));
1428 }
1429 else {
1430 ASMJIT_ASSERT(_temporaryMem.as<BaseMem>().isRegHome());
1431 uint32_t virtId = _temporaryMem.as<BaseMem>().baseId();
1432
1433 VirtReg* virtReg = cc()->virtRegById(virtId);
1434 virtReg->_virtSize = Support::max(virtReg->virtSize(), size);
1435 virtReg->_alignment = uint8_t(Support::max(virtReg->alignment(), alignment));
1436 }
1437
1438 out = _temporaryMem.as<BaseMem>();
1439 return kErrorOk;
1440 }
1441
1442 // ============================================================================
1443 // [asmjit::RAPass - Allocation - Prolog / Epilog]
1444 // ============================================================================
1445
1446 Error RAPass::updateStackFrame() noexcept {
1447 // Update some StackFrame information that we updated during allocation. The
1448 // only information we don't have at the moment is final local stack size,
1449 // which is calculated last.
1450 FuncFrame& frame = func()->frame();
1451 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
1452 frame.addDirtyRegs(group, _clobberedRegs[group]);
1453 frame.setLocalStackAlignment(_stackAllocator.alignment());
1454
1455 // If there are stack arguments that are not assigned to registers upon entry
1456 // and the function doesn't require dynamic stack alignment we keep these
1457 // arguments where they are. This will also mark all stack slots that match
1458 // these arguments as allocated.
1459 if (_numStackArgsToStackSlots)
1460 ASMJIT_PROPAGATE(_markStackArgsToKeep());
1461
1462 // Calculate offsets of all stack slots and update StackSize to reflect the calculated local stack size.
1463 ASMJIT_PROPAGATE(_stackAllocator.calculateStackFrame());
1464 frame.setLocalStackSize(_stackAllocator.stackSize());
1465
1466 // Update the stack frame based on `_argsAssignment` and finalize it.
1467 // Finalization means to apply final calculation to the stack layout.
1468 ASMJIT_PROPAGATE(_argsAssignment.updateFuncFrame(frame));
1469 ASMJIT_PROPAGATE(frame.finalize());
1470
1471 // StackAllocator allocates all stots starting from [0], adjust them when necessary.
1472 if (frame.localStackOffset() != 0)
1473 ASMJIT_PROPAGATE(_stackAllocator.adjustSlotOffsets(int32_t(frame.localStackOffset())));
1474
1475 // Again, if there are stack arguments allocated in function's stack we have
1476 // to handle them. This handles all cases (either regular or dynamic stack
1477 // alignment).
1478 if (_numStackArgsToStackSlots)
1479 ASMJIT_PROPAGATE(_updateStackArgs());
1480
1481 return kErrorOk;
1482 }
1483
1484 Error RAPass::_markStackArgsToKeep() noexcept {
1485 FuncFrame& frame = func()->frame();
1486 bool hasSAReg = frame.hasPreservedFP() || !frame.hasDynamicAlignment();
1487
1488 RAWorkRegs& workRegs = _workRegs;
1489 uint32_t numWorkRegs = workRegCount();
1490
1491 for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1492 RAWorkReg* workReg = workRegs[workId];
1493 if (workReg->hasFlag(RAWorkReg::kFlagStackArgToStack)) {
1494 ASMJIT_ASSERT(workReg->hasArgIndex());
1495 const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1496
1497 // If the register doesn't have stack slot then we failed. It doesn't
1498 // make much sense as it was marked as `kFlagStackArgToStack`, which
1499 // requires the WorkReg was live-in upon function entry.
1500 RAStackSlot* slot = workReg->stackSlot();
1501 if (ASMJIT_UNLIKELY(!slot))
1502 return DebugUtils::errored(kErrorInvalidState);
1503
1504 if (hasSAReg && srcArg.isStack() && !srcArg.isIndirect()) {
1505 uint32_t typeSize = Type::sizeOf(srcArg.typeId());
1506 if (typeSize == slot->size()) {
1507 slot->addFlags(RAStackSlot::kFlagStackArg);
1508 continue;
1509 }
1510 }
1511
1512 // NOTE: Update StackOffset here so when `_argsAssignment.updateFuncFrame()`
1513 // is called it will take into consideration moving to stack slots. Without
1514 // this we may miss some scratch registers later.
1515 FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex());
1516 dstArg.assignStackOffset(0);
1517 }
1518 }
1519
1520 return kErrorOk;
1521 }
1522
1523 Error RAPass::_updateStackArgs() noexcept {
1524 FuncFrame& frame = func()->frame();
1525 RAWorkRegs& workRegs = _workRegs;
1526 uint32_t numWorkRegs = workRegCount();
1527
1528 for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1529 RAWorkReg* workReg = workRegs[workId];
1530 if (workReg->hasFlag(RAWorkReg::kFlagStackArgToStack)) {
1531 ASMJIT_ASSERT(workReg->hasArgIndex());
1532 RAStackSlot* slot = workReg->stackSlot();
1533
1534 if (ASMJIT_UNLIKELY(!slot))
1535 return DebugUtils::errored(kErrorInvalidState);
1536
1537 if (slot->isStackArg()) {
1538 const FuncValue& srcArg = _func->detail().arg(workReg->argIndex());
1539 if (frame.hasPreservedFP()) {
1540 slot->setBaseRegId(_fp.id());
1541 slot->setOffset(int32_t(frame.saOffsetFromSA()) + srcArg.stackOffset());
1542 }
1543 else {
1544 slot->setOffset(int32_t(frame.saOffsetFromSP()) + srcArg.stackOffset());
1545 }
1546 }
1547 else {
1548 FuncValue& dstArg = _argsAssignment.arg(workReg->argIndex());
1549 dstArg.setStackOffset(slot->offset());
1550 }
1551 }
1552 }
1553
1554 return kErrorOk;
1555 }
1556
1557 Error RAPass::insertPrologEpilog() noexcept {
1558 FuncFrame& frame = _func->frame();
1559
1560 cc()->_setCursor(func());
1561 ASMJIT_PROPAGATE(cc()->emitProlog(frame));
1562 ASMJIT_PROPAGATE(cc()->emitArgsAssignment(frame, _argsAssignment));
1563
1564 cc()->_setCursor(func()->exitNode());
1565 ASMJIT_PROPAGATE(cc()->emitEpilog(frame));
1566
1567 return kErrorOk;
1568 }
1569
1570 // ============================================================================
1571 // [asmjit::RAPass - Rewriter]
1572 // ============================================================================
1573
1574 Error RAPass::rewrite() noexcept {
1575 #ifndef ASMJIT_NO_LOGGING
1576 Logger* logger = debugLogger();
1577 #endif
1578
1579 ASMJIT_RA_LOG_FORMAT("[RAPass::Rewrite]\n");
1580 return _rewrite(_func, _stop);
1581 }
1582
1583 ASMJIT_FAVOR_SPEED Error RAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
1584 uint32_t virtCount = cc()->_vRegArray.size();
1585
1586 BaseNode* node = first;
1587 while (node != stop) {
1588 BaseNode* next = node->next();
1589 if (node->isInst()) {
1590 InstNode* inst = node->as<InstNode>();
1591 RAInst* raInst = node->passData<RAInst>();
1592
1593 Operand* operands = inst->operands();
1594 uint32_t opCount = inst->opCount();
1595 uint32_t i;
1596
1597 // Rewrite virtual registers into physical registers.
1598 if (ASMJIT_LIKELY(raInst)) {
1599 // If the instruction contains pass data (raInst) then it was a subject
1600 // for register allocation and must be rewritten to use physical regs.
1601 RATiedReg* tiedRegs = raInst->tiedRegs();
1602 uint32_t tiedCount = raInst->tiedCount();
1603
1604 for (i = 0; i < tiedCount; i++) {
1605 RATiedReg* tiedReg = &tiedRegs[i];
1606
1607 Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
1608 uint32_t useId = tiedReg->useId();
1609 while (useIt.hasNext()) inst->rewriteIdAtIndex(useIt.next(), useId);
1610
1611 Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
1612 uint32_t outId = tiedReg->outId();
1613 while (outIt.hasNext()) inst->rewriteIdAtIndex(outIt.next(), outId);
1614 }
1615
1616 // This data is allocated by Zone passed to `runOnFunction()`, which
1617 // will be reset after the RA pass finishes. So reset this data to
1618 // prevent having a dead pointer after RA pass is complete.
1619 node->resetPassData();
1620
1621 if (ASMJIT_UNLIKELY(node->type() != BaseNode::kNodeInst)) {
1622 // FuncRet terminates the flow, it must either be removed if the exit
1623 // label is next to it (optimization) or patched to an architecture
1624 // dependent jump instruction that jumps to the function's exit before
1625 // the epilog.
1626 if (node->type() == BaseNode::kNodeFuncRet) {
1627 RABlock* block = raInst->block();
1628 if (!isNextTo(node, _func->exitNode())) {
1629 cc()->_setCursor(node->prev());
1630 ASMJIT_PROPAGATE(onEmitJump(_func->exitNode()->label()));
1631 }
1632
1633 BaseNode* prev = node->prev();
1634 cc()->removeNode(node);
1635 block->setLast(prev);
1636 }
1637 }
1638 }
1639
1640 // Rewrite stack slot addresses.
1641 for (i = 0; i < opCount; i++) {
1642 Operand& op = operands[i];
1643 if (op.isMem()) {
1644 BaseMem& mem = op.as<BaseMem>();
1645 if (mem.isRegHome()) {
1646 uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
1647 if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
1648 return DebugUtils::errored(kErrorInvalidVirtId);
1649
1650 VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
1651 RAWorkReg* workReg = virtReg->workReg();
1652 ASMJIT_ASSERT(workReg != nullptr);
1653
1654 RAStackSlot* slot = workReg->stackSlot();
1655 int32_t offset = slot->offset();
1656
1657 mem._setBase(_sp.type(), slot->baseRegId());
1658 mem.clearRegHome();
1659 mem.addOffsetLo32(offset);
1660 }
1661 }
1662 }
1663 }
1664
1665 node = next;
1666 }
1667
1668 return kErrorOk;
1669 }
1670
1671 // ============================================================================
1672 // [asmjit::RAPass - Logging]
1673 // ============================================================================
1674
1675 #ifndef ASMJIT_NO_LOGGING
1676 static void RAPass_dumpRAInst(RAPass* pass, String& sb, const RAInst* raInst) noexcept {
1677 const RATiedReg* tiedRegs = raInst->tiedRegs();
1678 uint32_t tiedCount = raInst->tiedCount();
1679
1680 for (uint32_t i = 0; i < tiedCount; i++) {
1681 const RATiedReg& tiedReg = tiedRegs[i];
1682
1683 if (i != 0) sb.appendChar(' ');
1684
1685 sb.appendFormat("%s{", pass->workRegById(tiedReg.workId())->name());
1686 sb.appendChar(tiedReg.isReadWrite() ? 'X' :
1687 tiedReg.isRead() ? 'R' :
1688 tiedReg.isWrite() ? 'W' : '?');
1689
1690 if (tiedReg.hasUseId())
1691 sb.appendFormat("|Use=%u", tiedReg.useId());
1692 else if (tiedReg.isUse())
1693 sb.appendString("|Use");
1694
1695 if (tiedReg.hasOutId())
1696 sb.appendFormat("|Out=%u", tiedReg.outId());
1697 else if (tiedReg.isOut())
1698 sb.appendString("|Out");
1699
1700 if (tiedReg.isLast()) sb.appendString("|Last");
1701 if (tiedReg.isKill()) sb.appendString("|Kill");
1702
1703 sb.appendString("}");
1704 }
1705 }
1706
1707 ASMJIT_FAVOR_SIZE Error RAPass::annotateCode() noexcept {
1708 uint32_t loggerFlags = _loggerFlags;
1709 StringTmp<1024> sb;
1710
1711 for (const RABlock* block : _blocks) {
1712 BaseNode* node = block->first();
1713 if (!node) continue;
1714
1715 BaseNode* last = block->last();
1716 for (;;) {
1717 sb.clear();
1718 Logging::formatNode(sb, loggerFlags, cc(), node);
1719
1720 if ((loggerFlags & FormatOptions::kFlagDebugRA) != 0 && node->isInst() && node->hasPassData()) {
1721 const RAInst* raInst = node->passData<RAInst>();
1722 if (raInst->tiedCount() > 0) {
1723 sb.padEnd(40);
1724 sb.appendString(" | ");
1725 RAPass_dumpRAInst(this, sb, raInst);
1726 }
1727 }
1728
1729 node->setInlineComment(
1730 static_cast<char*>(
1731 cc()->_dataZone.dup(sb.data(), sb.size(), true)));
1732
1733 if (node == last)
1734 break;
1735 node = node->next();
1736 }
1737 }
1738
1739 return kErrorOk;
1740 }
1741
1742 ASMJIT_FAVOR_SIZE Error RAPass::_logBlockIds(const RABlocks& blocks) noexcept {
1743 // Can only be called if the `Logger` is present.
1744 ASMJIT_ASSERT(debugLogger());
1745
1746 StringTmp<1024> sb;
1747 sb.appendString(" [Succ] {");
1748
1749 for (uint32_t i = 0, size = blocks.size(); i < size; i++) {
1750 const RABlock* block = blocks[i];
1751 if (i != 0) sb.appendString(", ");
1752 sb.appendFormat("#%u", block->blockId());
1753 }
1754
1755 sb.appendString("}\n");
1756 return debugLogger()->log(sb.data(), sb.size());
1757 }
1758
1759 ASMJIT_FAVOR_SIZE Error RAPass::_dumpBlockLiveness(String& sb, const RABlock* block) noexcept {
1760 for (uint32_t liveType = 0; liveType < RABlock::kLiveCount; liveType++) {
1761 const char* bitsName = liveType == RABlock::kLiveIn ? "IN " :
1762 liveType == RABlock::kLiveOut ? "OUT " :
1763 liveType == RABlock::kLiveGen ? "GEN " : "KILL";
1764
1765 const ZoneBitVector& bits = block->_liveBits[liveType];
1766 uint32_t size = bits.size();
1767 ASMJIT_ASSERT(size <= workRegCount());
1768
1769 uint32_t n = 0;
1770 for (uint32_t workId = 0; workId < size; workId++) {
1771 if (bits.bitAt(workId)) {
1772 RAWorkReg* wReg = workRegById(workId);
1773
1774 if (!n)
1775 sb.appendFormat(" %s [", bitsName);
1776 else
1777 sb.appendString(", ");
1778
1779 sb.appendString(wReg->name());
1780 n++;
1781 }
1782 }
1783
1784 if (n)
1785 sb.appendString("]\n");
1786 }
1787
1788 return kErrorOk;
1789 }
1790
1791 ASMJIT_FAVOR_SIZE Error RAPass::_dumpLiveSpans(String& sb) noexcept {
1792 uint32_t numWorkRegs = _workRegs.size();
1793 uint32_t maxSize = _maxWorkRegNameSize;
1794
1795 for (uint32_t workId = 0; workId < numWorkRegs; workId++) {
1796 RAWorkReg* workReg = _workRegs[workId];
1797
1798 sb.appendString(" ");
1799
1800 size_t oldSize = sb.size();
1801 sb.appendString(workReg->name());
1802 sb.padEnd(oldSize + maxSize);
1803
1804 RALiveStats& stats = workReg->liveStats();
1805 sb.appendFormat(" {id:%04u width: %-4u freq: %0.4f priority=%0.4f}",
1806 workReg->virtId(),
1807 stats.width(),
1808 stats.freq(),
1809 stats.priority());
1810 sb.appendString(": ");
1811
1812 LiveRegSpans& liveSpans = workReg->liveSpans();
1813 for (uint32_t x = 0; x < liveSpans.size(); x++) {
1814 const LiveRegSpan& liveSpan = liveSpans[x];
1815 if (x) sb.appendString(", ");
1816 sb.appendFormat("[%u:%u]", liveSpan.a, liveSpan.b);
1817 }
1818
1819 sb.appendChar('\n');
1820 }
1821
1822 return kErrorOk;
1823 }
1824 #endif
1825
1826 ASMJIT_END_NAMESPACE
1827
1828 #endif // !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RAPASS_P_H
7 #define _ASMJIT_CORE_RAPASS_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/raassignment_p.h"
13 #include "../core/radefs_p.h"
14 #include "../core/rastack_p.h"
15 #include "../core/support.h"
16
17 ASMJIT_BEGIN_NAMESPACE
18
19 //! \cond INTERNAL
20 //! \addtogroup asmjit_ra
21 //! \{
22
23 // ============================================================================
24 // [asmjit::RABlock]
25 // ============================================================================
26
27 class RABlock {
28 public:
29 ASMJIT_NONCOPYABLE(RABlock)
30
31 typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
32 typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
33
34 enum Id : uint32_t {
35 kUnassignedId = 0xFFFFFFFFu
36 };
37
38 enum Flags : uint32_t {
39 //! Block has been constructed from nodes.
40 kFlagIsConstructed = 0x00000001u,
41 //! Block is reachable (set by `buildViews()`).
42 kFlagIsReachable = 0x00000002u,
43 //! Block has been allocated.
44 kFlagIsAllocated = 0x00000004u,
45 //! Block is a function-exit.
46 kFlagIsFuncExit = 0x00000008u,
47
48 //! Block has a terminator (jump, conditional jump, ret).
49 kFlagHasTerminator = 0x00000010u,
50 //! Block naturally flows to the next block.
51 kFlagHasConsecutive = 0x00000020u,
52 //! Block contains fixed registers (precolored).
53 kFlagHasFixedRegs = 0x00000040u,
54 //! Block contains function calls.
55 kFlagHasFuncCalls = 0x00000080u
56 };
57
58 //! Register allocator pass.
59 RAPass* _ra;
60
61 //! Block id (indexed from zero).
62 uint32_t _blockId;
63 //! Block flags, see `Flags`.
64 uint32_t _flags;
65
66 //! First `BaseNode` of this block (inclusive).
67 BaseNode* _first;
68 //! Last `BaseNode` of this block (inclusive).
69 BaseNode* _last;
70
71 //! Initial position of this block (inclusive).
72 uint32_t _firstPosition;
73 //! End position of this block (exclusive).
74 uint32_t _endPosition;
75
76 //! Weight of this block (default 0, each loop adds one).
77 uint32_t _weight;
78 //! Post-order view order, used during POV construction.
79 uint32_t _povOrder;
80 //! Basic statistics about registers.
81 RARegsStats _regsStats;
82 //! Maximum live-count per register group.
83 RALiveCount _maxLiveCount;
84
85 //! Timestamp (used by block visitors).
86 mutable uint64_t _timestamp;
87 //! Immediate dominator of this block.
88 RABlock* _idom;
89
90 //! Block predecessors.
91 RABlocks _predecessors;
92 //! Block successors.
93 RABlocks _successors;
94
95 // TODO: Used?
96 RABlocks _doms;
97
98 enum LiveType : uint32_t {
99 kLiveIn = 0,
100 kLiveOut = 1,
101 kLiveGen = 2,
102 kLiveKill = 3,
103 kLiveCount = 4
104 };
105
106 //! Liveness in/out/use/kill.
107 ZoneBitVector _liveBits[kLiveCount];
108
109 //! Register assignment (PhysToWork) on entry.
110 PhysToWorkMap* _entryPhysToWorkMap;
111 //! Register assignment (WorkToPhys) on entry.
112 WorkToPhysMap* _entryWorkToPhysMap;
113
114 //! \name Construction & Destruction
115 //! \{
116
117 inline RABlock(RAPass* ra) noexcept
118 : _ra(ra),
119 _blockId(kUnassignedId),
120 _flags(0),
121 _first(nullptr),
122 _last(nullptr),
123 _firstPosition(0),
124 _endPosition(0),
125 _weight(0),
126 _povOrder(kUnassignedId),
127 _regsStats(),
128 _maxLiveCount(),
129 _timestamp(0),
130 _idom(nullptr),
131 _predecessors(),
132 _successors(),
133 _entryPhysToWorkMap(nullptr),
134 _entryWorkToPhysMap(nullptr) {}
135
136 //! \}
137
138 //! \name Accessors
139 //! \{
140
141 inline RAPass* pass() const noexcept { return _ra; }
142 inline ZoneAllocator* allocator() const noexcept;
143
144 inline uint32_t blockId() const noexcept { return _blockId; }
145 inline uint32_t flags() const noexcept { return _flags; }
146
147 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
148 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
149
150 inline bool isAssigned() const noexcept { return _blockId != kUnassignedId; }
151
152 inline bool isConstructed() const noexcept { return hasFlag(kFlagIsConstructed); }
153 inline bool isReachable() const noexcept { return hasFlag(kFlagIsReachable); }
154 inline bool isAllocated() const noexcept { return hasFlag(kFlagIsAllocated); }
155 inline bool isFuncExit() const noexcept { return hasFlag(kFlagIsFuncExit); }
156
157 inline void makeConstructed(const RARegsStats& regStats) noexcept {
158 _flags |= kFlagIsConstructed;
159 _regsStats.combineWith(regStats);
160 }
161
162 inline void makeReachable() noexcept { _flags |= kFlagIsReachable; }
163 inline void makeAllocated() noexcept { _flags |= kFlagIsAllocated; }
164
165 inline const RARegsStats& regsStats() const noexcept { return _regsStats; }
166
167 inline bool hasTerminator() const noexcept { return hasFlag(kFlagHasTerminator); }
168 inline bool hasConsecutive() const noexcept { return hasFlag(kFlagHasConsecutive); }
169
170 inline bool hasPredecessors() const noexcept { return !_predecessors.empty(); }
171 inline bool hasSuccessors() const noexcept { return !_successors.empty(); }
172
173 inline const RABlocks& predecessors() const noexcept { return _predecessors; }
174 inline const RABlocks& successors() const noexcept { return _successors; }
175
176 inline BaseNode* first() const noexcept { return _first; }
177 inline BaseNode* last() const noexcept { return _last; }
178
179 inline void setFirst(BaseNode* node) noexcept { _first = node; }
180 inline void setLast(BaseNode* node) noexcept { _last = node; }
181
182 inline uint32_t firstPosition() const noexcept { return _firstPosition; }
183 inline void setFirstPosition(uint32_t position) noexcept { _firstPosition = position; }
184
185 inline uint32_t endPosition() const noexcept { return _endPosition; }
186 inline void setEndPosition(uint32_t position) noexcept { _endPosition = position; }
187
188 inline uint32_t povOrder() const noexcept { return _povOrder; }
189
190 inline uint64_t timestamp() const noexcept { return _timestamp; }
191 inline bool hasTimestamp(uint64_t ts) const noexcept { return _timestamp == ts; }
192 inline void setTimestamp(uint64_t ts) const noexcept { _timestamp = ts; }
193 inline void resetTimestamp() const noexcept { _timestamp = 0; }
194
195 inline RABlock* consecutive() const noexcept { return hasConsecutive() ? _successors[0] : nullptr; }
196
197 inline RABlock* iDom() noexcept { return _idom; }
198 inline const RABlock* iDom() const noexcept { return _idom; }
199 inline void setIDom(RABlock* block) noexcept { _idom = block; }
200
201 inline ZoneBitVector& liveIn() noexcept { return _liveBits[kLiveIn]; }
202 inline const ZoneBitVector& liveIn() const noexcept { return _liveBits[kLiveIn]; }
203
204 inline ZoneBitVector& liveOut() noexcept { return _liveBits[kLiveOut]; }
205 inline const ZoneBitVector& liveOut() const noexcept { return _liveBits[kLiveOut]; }
206
207 inline ZoneBitVector& gen() noexcept { return _liveBits[kLiveGen]; }
208 inline const ZoneBitVector& gen() const noexcept { return _liveBits[kLiveGen]; }
209
210 inline ZoneBitVector& kill() noexcept { return _liveBits[kLiveKill]; }
211 inline const ZoneBitVector& kill() const noexcept { return _liveBits[kLiveKill]; }
212
213 inline Error resizeLiveBits(uint32_t size) noexcept {
214 ASMJIT_PROPAGATE(_liveBits[kLiveIn ].resize(allocator(), size));
215 ASMJIT_PROPAGATE(_liveBits[kLiveOut ].resize(allocator(), size));
216 ASMJIT_PROPAGATE(_liveBits[kLiveGen ].resize(allocator(), size));
217 ASMJIT_PROPAGATE(_liveBits[kLiveKill].resize(allocator(), size));
218 return kErrorOk;
219 }
220
221 inline bool hasEntryAssignment() const noexcept { return _entryPhysToWorkMap != nullptr; }
222 inline WorkToPhysMap* entryWorkToPhysMap() const noexcept { return _entryWorkToPhysMap; }
223 inline PhysToWorkMap* entryPhysToWorkMap() const noexcept { return _entryPhysToWorkMap; }
224
225 inline void setEntryAssignment(PhysToWorkMap* physToWorkMap, WorkToPhysMap* workToPhysMap) noexcept {
226 _entryPhysToWorkMap = physToWorkMap;
227 _entryWorkToPhysMap = workToPhysMap;
228 }
229
230 //! \}
231
232 //! \name Utilities
233 //! \{
234
235 //! Adds a successor to this block, and predecessor to `successor`, making
236 //! connection on both sides.
237 //!
238 //! This API must be used to manage successors and predecessors, never manage
239 //! it manually.
240 Error appendSuccessor(RABlock* successor) noexcept;
241
242 //! Similar to `appendSuccessor()`, but does prepend instead append.
243 //!
244 //! This function is used to add a natural flow (always first) to the block.
245 Error prependSuccessor(RABlock* successor) noexcept;
246
247 //! \}
248 };
249
250 // ============================================================================
251 // [asmjit::RAInst]
252 // ============================================================================
253
254 //! Register allocator's data associated with each `InstNode`.
255 class RAInst {
256 public:
257 ASMJIT_NONCOPYABLE(RAInst)
258
259 //! Parent block.
260 RABlock* _block;
261 //! Instruction flags.
262 uint32_t _flags;
263 //! Total count of RATiedReg's.
264 uint32_t _tiedTotal;
265 //! Index of RATiedReg's per register group.
266 RARegIndex _tiedIndex;
267 //! Count of RATiedReg's per register group.
268 RARegCount _tiedCount;
269 //! Number of live, and thus interfering VirtReg's at this point.
270 RALiveCount _liveCount;
271 //! Fixed physical registers used.
272 RARegMask _usedRegs;
273 //! Clobbered registers (by a function call).
274 RARegMask _clobberedRegs;
275 //! Tied registers.
276 RATiedReg _tiedRegs[1];
277
278 enum Flags : uint32_t {
279 kFlagIsTerminator = 0x00000001u
280 };
281
282 //! \name Construction & Destruction
283 //! \{
284
285 ASMJIT_INLINE RAInst(RABlock* block, uint32_t flags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
286 _block = block;
287 _flags = flags;
288 _tiedTotal = tiedTotal;
289 _tiedIndex.reset();
290 _tiedCount.reset();
291 _liveCount.reset();
292 _usedRegs.reset();
293 _clobberedRegs = clobberedRegs;
294 }
295
296 //! \}
297
298 //! \name Accessors
299 //! \{
300
301 //! Returns the instruction flags.
302 inline uint32_t flags() const noexcept { return _flags; }
303 //! Tests whether the instruction has flag `flag`.
304 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
305 //! Replaces the existing instruction flags with `flags`.
306 inline void setFlags(uint32_t flags) noexcept { _flags = flags; }
307 //! Adds instruction `flags` to this RAInst.
308 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
309 //! Clears instruction `flags` from this RAInst.
310 inline void clearFlags(uint32_t flags) noexcept { _flags &= ~flags; }
311
312 //! Returns whether the RAInst represents an instruction that terminates this basic block.
313 inline bool isTerminator() const noexcept { return hasFlag(kFlagIsTerminator); }
314
315 //! Returns the associated block with this RAInst.
316 inline RABlock* block() const noexcept { return _block; }
317
318 //! Returns tied registers (all).
319 inline RATiedReg* tiedRegs() const noexcept { return const_cast<RATiedReg*>(_tiedRegs); }
320 //! Returns tied registers for a given `group`.
321 inline RATiedReg* tiedRegs(uint32_t group) const noexcept { return const_cast<RATiedReg*>(_tiedRegs) + _tiedIndex.get(group); }
322
323 //! Returns count of all tied registers.
324 inline uint32_t tiedCount() const noexcept { return _tiedTotal; }
325 //! Returns count of tied registers of a given `group`.
326 inline uint32_t tiedCount(uint32_t group) const noexcept { return _tiedCount[group]; }
327
328 //! Returns `RATiedReg` at the given `index`.
329 inline RATiedReg* tiedAt(uint32_t index) const noexcept {
330 ASMJIT_ASSERT(index < _tiedTotal);
331 return tiedRegs() + index;
332 }
333
334 //! Returns `RATiedReg` at the given `index` of the given register `group`.
335 inline RATiedReg* tiedOf(uint32_t group, uint32_t index) const noexcept {
336 ASMJIT_ASSERT(index < _tiedCount._regs[group]);
337 return tiedRegs(group) + index;
338 }
339
340 inline void setTiedAt(uint32_t index, RATiedReg& tied) noexcept {
341 ASMJIT_ASSERT(index < _tiedTotal);
342 _tiedRegs[index] = tied;
343 }
344
345 //! \name Static Functions
346 //! \{
347
348 static inline size_t sizeOf(uint32_t tiedRegCount) noexcept {
349 return sizeof(RAInst) - sizeof(RATiedReg) + tiedRegCount * sizeof(RATiedReg);
350 }
351
352 //! \}
353 };
354
355 // ============================================================================
356 // [asmjit::RAInstBuilder]
357 // ============================================================================
358
359 //! A helper class that is used to build an array of RATiedReg items that are
360 //! then copied to `RAInst`.
361 class RAInstBuilder {
362 public:
363 ASMJIT_NONCOPYABLE(RAInstBuilder)
364
365 //! Flags combined from all RATiedReg's.
366 uint32_t _aggregatedFlags;
367 //! Flags that will be cleared before storing the aggregated flags to `RAInst`.
368 uint32_t _forbiddenFlags;
369 RARegCount _count;
370 RARegsStats _stats;
371
372 RARegMask _used;
373 RARegMask _clobbered;
374
375 //! Current tied register in `_tiedRegs`.
376 RATiedReg* _cur;
377 //! Array of temporary tied registers.
378 RATiedReg _tiedRegs[128];
379
380 //! \name Construction & Destruction
381 //! \{
382
383 inline RAInstBuilder() noexcept { reset(); }
384
385 inline void init() noexcept { reset(); }
386 inline void reset() noexcept {
387 _aggregatedFlags = 0;
388 _forbiddenFlags = 0;
389 _count.reset();
390 _stats.reset();
391 _used.reset();
392 _clobbered.reset();
393 _cur = _tiedRegs;
394 }
395
396 //! \}
397
398 //! \name Accessors
399 //! \{
400
401 inline uint32_t aggregatedFlags() const noexcept { return _aggregatedFlags; }
402 inline uint32_t forbiddenFlags() const noexcept { return _forbiddenFlags; }
403
404 inline void addAggregatedFlags(uint32_t flags) noexcept { _aggregatedFlags |= flags; }
405 inline void addForbiddenFlags(uint32_t flags) noexcept { _forbiddenFlags |= flags; }
406
407 //! Returns the number of tied registers added to the builder.
408 inline uint32_t tiedRegCount() const noexcept { return uint32_t((size_t)(_cur - _tiedRegs)); }
409
410 //! Returns `RATiedReg` at the given `index`.
411 inline RATiedReg* operator[](uint32_t index) noexcept {
412 ASMJIT_ASSERT(index < tiedRegCount());
413 return &_tiedRegs[index];
414 }
415
416 //! Returns `RATiedReg` at the given `index`. (const).
417 inline const RATiedReg* operator[](uint32_t index) const noexcept {
418 ASMJIT_ASSERT(index < tiedRegCount());
419 return &_tiedRegs[index];
420 }
421
422 //! \}
423
424 //! \name Utilities
425 //! \{
426
427 ASMJIT_INLINE Error add(RAWorkReg* workReg, uint32_t flags, uint32_t allocable, uint32_t useId, uint32_t useRewriteMask, uint32_t outId, uint32_t outRewriteMask, uint32_t rmSize = 0) noexcept {
428 uint32_t group = workReg->group();
429 RATiedReg* tiedReg = workReg->tiedReg();
430
431 if (useId != BaseReg::kIdBad) {
432 _stats.makeFixed(group);
433 _used[group] |= Support::bitMask(useId);
434 flags |= RATiedReg::kUseFixed;
435 }
436
437 if (outId != BaseReg::kIdBad) {
438 _clobbered[group] |= Support::bitMask(outId);
439 flags |= RATiedReg::kOutFixed;
440 }
441
442 _aggregatedFlags |= flags;
443 _stats.makeUsed(group);
444
445 if (!tiedReg) {
446 // Could happen when the builder is not reset properly after each instruction.
447 ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
448
449 tiedReg = _cur++;
450 tiedReg->init(workReg->workId(), flags, allocable, useId, useRewriteMask, outId, outRewriteMask, rmSize);
451 workReg->setTiedReg(tiedReg);
452
453 _count.add(group);
454 return kErrorOk;
455 }
456 else {
457 if (useId != BaseReg::kIdBad) {
458 if (ASMJIT_UNLIKELY(tiedReg->hasUseId()))
459 return DebugUtils::errored(kErrorOverlappedRegs);
460 tiedReg->setUseId(useId);
461 }
462
463 if (outId != BaseReg::kIdBad) {
464 if (ASMJIT_UNLIKELY(tiedReg->hasOutId()))
465 return DebugUtils::errored(kErrorOverlappedRegs);
466 tiedReg->setOutId(outId);
467 // TODO: ? _used[group] |= Support::bitMask(outId);
468 }
469
470 tiedReg->addRefCount();
471 tiedReg->addFlags(flags);
472 tiedReg->_allocableRegs &= allocable;
473 tiedReg->_useRewriteMask |= useRewriteMask;
474 tiedReg->_outRewriteMask |= outRewriteMask;
475 tiedReg->_rmSize = uint8_t(Support::max<uint32_t>(tiedReg->rmSize(), rmSize));
476 return kErrorOk;
477 }
478 }
479
480 ASMJIT_INLINE Error addCallArg(RAWorkReg* workReg, uint32_t useId) noexcept {
481 ASMJIT_ASSERT(useId != BaseReg::kIdBad);
482
483 uint32_t flags = RATiedReg::kUse | RATiedReg::kRead | RATiedReg::kUseFixed;
484 uint32_t group = workReg->group();
485 uint32_t allocable = Support::bitMask(useId);
486
487 _aggregatedFlags |= flags;
488 _used[group] |= allocable;
489 _stats.makeFixed(group);
490 _stats.makeUsed(group);
491
492 RATiedReg* tiedReg = workReg->tiedReg();
493 if (!tiedReg) {
494 // Could happen when the builder is not reset properly after each instruction.
495 ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
496
497 tiedReg = _cur++;
498 tiedReg->init(workReg->workId(), flags, allocable, useId, 0, BaseReg::kIdBad, 0);
499 workReg->setTiedReg(tiedReg);
500
501 _count.add(group);
502 return kErrorOk;
503 }
504 else {
505 if (tiedReg->hasUseId()) {
506 flags |= RATiedReg::kDuplicate;
507 tiedReg->_allocableRegs |= allocable;
508 }
509 else {
510 tiedReg->setUseId(useId);
511 tiedReg->_allocableRegs &= allocable;
512 }
513
514 tiedReg->addRefCount();
515 tiedReg->addFlags(flags);
516 return kErrorOk;
517 }
518 }
519
520 ASMJIT_INLINE Error addCallRet(RAWorkReg* workReg, uint32_t outId) noexcept {
521 ASMJIT_ASSERT(outId != BaseReg::kIdBad);
522
523 uint32_t flags = RATiedReg::kOut | RATiedReg::kWrite | RATiedReg::kOutFixed;
524 uint32_t group = workReg->group();
525 uint32_t allocable = Support::bitMask(outId);
526
527 _aggregatedFlags |= flags;
528 _used[group] |= allocable;
529 _stats.makeFixed(group);
530 _stats.makeUsed(group);
531
532 RATiedReg* tiedReg = workReg->tiedReg();
533 if (!tiedReg) {
534 // Could happen when the builder is not reset properly after each instruction.
535 ASMJIT_ASSERT(tiedRegCount() < ASMJIT_ARRAY_SIZE(_tiedRegs));
536
537 tiedReg = _cur++;
538 tiedReg->init(workReg->workId(), flags, allocable, BaseReg::kIdBad, 0, outId, 0);
539 workReg->setTiedReg(tiedReg);
540
541 _count.add(group);
542 return kErrorOk;
543 }
544 else {
545 if (tiedReg->hasOutId())
546 return DebugUtils::errored(kErrorOverlappedRegs);
547
548 tiedReg->addRefCount();
549 tiedReg->addFlags(flags);
550 tiedReg->setOutId(outId);
551 return kErrorOk;
552 }
553 }
554
555 //! \}
556 };
557
558 // ============================================================================
559 // [asmjit::RAPass]
560 // ============================================================================
561
562 //! Register allocation pass used by `BaseCompiler`.
563 class RAPass : public FuncPass {
564 public:
565 ASMJIT_NONCOPYABLE(RAPass)
566 typedef FuncPass Base;
567
568 enum Weights : uint32_t {
569 kCallArgWeight = 80
570 };
571
572 typedef RAAssignment::PhysToWorkMap PhysToWorkMap;
573 typedef RAAssignment::WorkToPhysMap WorkToPhysMap;
574
575 //! Allocator that uses zone passed to `runOnFunction()`.
576 ZoneAllocator _allocator;
577 //! Logger, disabled if null.
578 Logger* _logger;
579 //! Debug logger, non-null only if `kOptionDebugPasses` option is set.
580 Logger* _debugLogger;
581 //! Logger flags.
582 uint32_t _loggerFlags;
583
584 //! Function being processed.
585 FuncNode* _func;
586 //! Stop node.
587 BaseNode* _stop;
588 //! Node that is used to insert extra code after the function body.
589 BaseNode* _extraBlock;
590
591 //! Blocks (first block is the entry, always exists).
592 RABlocks _blocks;
593 //! Function exit blocks (usually one, but can contain more).
594 RABlocks _exits;
595 //! Post order view (POV).
596 RABlocks _pov;
597
598 //! Number of instruction nodes.
599 uint32_t _instructionCount;
600 //! Number of created blocks (internal).
601 uint32_t _createdBlockCount;
602 //! Timestamp generator (incremental).
603 mutable uint64_t _lastTimestamp;
604
605 //!< Architecture registers information.
606 const ArchRegs* _archRegsInfo;
607 //! Architecture traits.
608 RAArchTraits _archTraits;
609 //! Index to physical registers in `RAAssignment::PhysToWorkMap`.
610 RARegIndex _physRegIndex;
611 //! Count of physical registers in `RAAssignment::PhysToWorkMap`.
612 RARegCount _physRegCount;
613 //! Total number of physical registers.
614 uint32_t _physRegTotal;
615
616 //! Registers available for allocation.
617 RARegMask _availableRegs;
618 //! Count of physical registers per group.
619 RARegCount _availableRegCount;
620 //! Registers clobbered by the function.
621 RARegMask _clobberedRegs;
622
623 //! Work registers (registers used by the function).
624 RAWorkRegs _workRegs;
625 //! Work registers per register group.
626 RAWorkRegs _workRegsOfGroup[BaseReg::kGroupVirt];
627
628 //! Register allocation strategy per register group.
629 RAStrategy _strategy[BaseReg::kGroupVirt];
630 //! Global max live-count (from all blocks) per register group.
631 RALiveCount _globalMaxLiveCount;
632 //! Global live spans per register group.
633 LiveRegSpans* _globalLiveSpans[BaseReg::kGroupVirt];
634 //! Temporary stack slot.
635 Operand _temporaryMem;
636
637 //! Stack pointer.
638 BaseReg _sp;
639 //! Frame pointer.
640 BaseReg _fp;
641 //! Stack manager.
642 RAStackAllocator _stackAllocator;
643 //! Function arguments assignment.
644 FuncArgsAssignment _argsAssignment;
645 //! Some StackArgs have to be assigned to StackSlots.
646 uint32_t _numStackArgsToStackSlots;
647
648 //! Maximum name-size computed from all WorkRegs.
649 uint32_t _maxWorkRegNameSize;
650 //! Temporary string builder used to format comments.
651 StringTmp<80> _tmpString;
652
653 //! \name Construction & Reset
654 //! \{
655
656 RAPass() noexcept;
657 virtual ~RAPass() noexcept;
658
659 //! \}
660
661 //! \name Accessors
662 //! \{
663
664 //! Returns `Logger` passed to `runOnFunction()`.
665 inline Logger* logger() const noexcept { return _logger; }
666 //! Returns `Logger` passed to `runOnFunction()` or null if `kOptionDebugPasses` is not set.
667 inline Logger* debugLogger() const noexcept { return _debugLogger; }
668
669 //! Returns `Zone` passed to `runOnFunction()`.
670 inline Zone* zone() const noexcept { return _allocator.zone(); }
671 //! Returns `ZoneAllocator` used by the register allocator.
672 inline ZoneAllocator* allocator() const noexcept { return const_cast<ZoneAllocator*>(&_allocator); }
673
674 //! Returns the current function node.
675 inline FuncNode* func() const noexcept { return _func; }
676 //! Returns the stop of the current function.
677 inline BaseNode* stop() const noexcept { return _stop; }
678
679 //! Returns an extra block used by the current function being processed.
680 inline BaseNode* extraBlock() const noexcept { return _extraBlock; }
681 //! Sets an extra block, see `extraBlock()`.
682 inline void setExtraBlock(BaseNode* node) noexcept { _extraBlock = node; }
683
684 inline uint32_t endPosition() const noexcept { return _instructionCount * 2; }
685
686 inline const RARegMask& availableRegs() const noexcept { return _availableRegs; }
687 inline const RARegMask& cloberredRegs() const noexcept { return _clobberedRegs; }
688
689 //! \}
690
691 //! \name Utilities
692 //! \{
693
694 inline void makeUnavailable(uint32_t group, uint32_t regId) noexcept {
695 _availableRegs[group] &= ~Support::bitMask(regId);
696 _availableRegCount[group]--;
697 }
698
699 //! Runs the register allocator for the given `func`.
700 Error runOnFunction(Zone* zone, Logger* logger, FuncNode* func) noexcept override;
701
702 //! Performs all allocation steps sequentially, called by `runOnFunction()`.
703 Error onPerformAllSteps() noexcept;
704
705 //! \}
706
707 //! \name Events
708 //! \{
709
710 //! Called by `runOnFunction()` before the register allocation to initialize
711 //! architecture-specific data and constraints.
712 virtual void onInit() noexcept = 0;
713
714 //! Called by `runOnFunction()` after register allocation to clean everything
715 //! up. Called even if the register allocation failed.
716 virtual void onDone() noexcept = 0;
717
718 //! \}
719
720 //! \name CFG - Basic-Block Management
721 //! \{
722
723 //! Returns the function's entry block.
724 inline RABlock* entryBlock() noexcept {
725 ASMJIT_ASSERT(!_blocks.empty());
726 return _blocks[0];
727 }
728
729 //! \overload
730 inline const RABlock* entryBlock() const noexcept {
731 ASMJIT_ASSERT(!_blocks.empty());
732 return _blocks[0];
733 }
734
735 //! Returns the count of basic blocks (returns size of `_blocks` array).
736 inline uint32_t blockCount() const noexcept { return _blocks.size(); }
737 //! Returns the count of reachable basic blocks (returns size of `_pov` array).
738 inline uint32_t reachableBlockCount() const noexcept { return _pov.size(); }
739
740 //! Tests whether the CFG has dangling blocks - these were created by `newBlock()`,
741 //! but not added to CFG through `addBlocks()`. If `true` is returned and the
742 //! CFG is constructed it means that something is missing and it's incomplete.
743 //!
744 //! \note This is only used to check if the number of created blocks matches
745 //! the number of added blocks.
746 inline bool hasDanglingBlocks() const noexcept { return _createdBlockCount != blockCount(); }
747
748 //! Gest a next timestamp to be used to mark CFG blocks.
749 inline uint64_t nextTimestamp() const noexcept { return ++_lastTimestamp; }
750
751 //! Createss a new `RABlock` instance.
752 //!
753 //! \note New blocks don't have ID assigned until they are added to the block
754 //! array by calling `addBlock()`.
755 RABlock* newBlock(BaseNode* initialNode = nullptr) noexcept;
756
757 //! Tries to find a neighboring LabelNode (without going through code) that is
758 //! already connected with `RABlock`. If no label is found then a new RABlock
759 //! is created and assigned to all possible labels in a backward direction.
760 RABlock* newBlockOrExistingAt(LabelNode* cbLabel, BaseNode** stoppedAt = nullptr) noexcept;
761
762 //! Adds the given `block` to the block list and assign it a unique block id.
763 Error addBlock(RABlock* block) noexcept;
764
765 inline Error addExitBlock(RABlock* block) noexcept {
766 block->addFlags(RABlock::kFlagIsFuncExit);
767 return _exits.append(allocator(), block);
768 }
769
770 ASMJIT_INLINE RAInst* newRAInst(RABlock* block, uint32_t flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
771 void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
772 if (ASMJIT_UNLIKELY(!p))
773 return nullptr;
774 return new(p) RAInst(block, flags, tiedRegCount, clobberedRegs);
775 }
776
777 ASMJIT_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
778 uint32_t tiedRegCount = ib.tiedRegCount();
779 RAInst* raInst = newRAInst(block, ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
780
781 if (ASMJIT_UNLIKELY(!raInst))
782 return DebugUtils::errored(kErrorOutOfMemory);
783
784 RARegIndex index;
785 uint32_t flagsFilter = ~ib.forbiddenFlags();
786
787 index.buildIndexes(ib._count);
788 raInst->_tiedIndex = index;
789 raInst->_tiedCount = ib._count;
790
791 for (uint32_t i = 0; i < tiedRegCount; i++) {
792 RATiedReg* tiedReg = ib[i];
793 RAWorkReg* workReg = workRegById(tiedReg->workId());
794
795 workReg->resetTiedReg();
796 uint32_t group = workReg->group();
797
798 if (tiedReg->hasUseId()) {
799 block->addFlags(RABlock::kFlagHasFixedRegs);
800 raInst->_usedRegs[group] |= Support::bitMask(tiedReg->useId());
801 }
802
803 if (tiedReg->hasOutId()) {
804 block->addFlags(RABlock::kFlagHasFixedRegs);
805 }
806
807 RATiedReg& dst = raInst->_tiedRegs[index[group]++];
808 dst = *tiedReg;
809 dst._flags &= flagsFilter;
810
811 if (!tiedReg->isDuplicate())
812 dst._allocableRegs &= ~ib._used[group];
813 }
814
815 node->setPassData<RAInst>(raInst);
816 return kErrorOk;
817 }
818
819 //! \}
820
821 //! \name CFG - Build CFG
822 //! \{
823
824 //! Traverse the whole function and do the following:
825 //!
826 //! 1. Construct CFG (represented by `RABlock`) by populating `_blocks` and
827 //! `_exits`. Blocks describe the control flow of the function and contain
828 //! some additional information that is used by the register allocator.
829 //!
830 //! 2. Remove unreachable code immediately. This is not strictly necessary
831 //! for BaseCompiler itself as the register allocator cannot reach such
832 //! nodes, but keeping instructions that use virtual registers would fail
833 //! during instruction encoding phase (Assembler).
834 //!
835 //! 3. `RAInst` is created for each `InstNode` or compatible. It contains
836 //! information that is essential for further analysis and register
837 //! allocation.
838 //!
839 //! Use `RACFGBuilder` template that provides the necessary boilerplate.
840 virtual Error buildCFG() noexcept = 0;
841
842 //! \}
843
844 //! \name CFG - Views Order
845 //! \{
846
847 //! Constructs CFG views (only POV at the moment).
848 Error buildViews() noexcept;
849
850 //! \}
851
852 //! \name CFG - Dominators
853 //! \{
854
855 // Terminology:
856 // - A node `X` dominates a node `Z` if any path from the entry point to
857 // `Z` has to go through `X`.
858 // - A node `Z` post-dominates a node `X` if any path from `X` to the end
859 // of the graph has to go through `Z`.
860
861 //! Constructs a dominator-tree from CFG.
862 Error buildDominators() noexcept;
863
864 bool _strictlyDominates(const RABlock* a, const RABlock* b) const noexcept;
865 const RABlock* _nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept;
866
867 //! Tests whether the basic block `a` dominates `b` - non-strict, returns true when `a == b`.
868 inline bool dominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? true : _strictlyDominates(a, b); }
869 //! Tests whether the basic block `a` dominates `b` - strict dominance check, returns false when `a == b`.
870 inline bool strictlyDominates(const RABlock* a, const RABlock* b) const noexcept { return a == b ? false : _strictlyDominates(a, b); }
871
872 //! Returns a nearest common dominator of `a` and `b`.
873 inline RABlock* nearestCommonDominator(RABlock* a, RABlock* b) const noexcept { return const_cast<RABlock*>(_nearestCommonDominator(a, b)); }
874 //! Returns a nearest common dominator of `a` and `b` (const).
875 inline const RABlock* nearestCommonDominator(const RABlock* a, const RABlock* b) const noexcept { return _nearestCommonDominator(a, b); }
876
877 //! \}
878
879 //! \name CFG - Utilities
880 //! \{
881
882 Error removeUnreachableBlocks() noexcept;
883
884 //! Returns `node` or some node after that is ideal for beginning a new block.
885 //! This function is mostly used after a conditional or unconditional jump to
886 //! select the successor node. In some cases the next node could be a label,
887 //! which means it could have assigned some block already.
888 BaseNode* findSuccessorStartingAt(BaseNode* node) noexcept;
889
890 //! Returns `true` of the `node` can flow to `target` without reaching code
891 //! nor data. It's used to eliminate jumps to labels that are next right to
892 //! them.
893 bool isNextTo(BaseNode* node, BaseNode* target) noexcept;
894
895 //! \}
896
897 //! \name Virtual Register Management
898 //! \{
899
900 //! Returns a native size of the general-purpose register of the target architecture.
901 inline uint32_t gpSize() const noexcept { return _sp.size(); }
902 inline uint32_t availableRegCount(uint32_t group) const noexcept { return _availableRegCount[group]; }
903
904 inline RAWorkReg* workRegById(uint32_t workId) const noexcept { return _workRegs[workId]; }
905
906 inline RAWorkRegs& workRegs() noexcept { return _workRegs; }
907 inline RAWorkRegs& workRegs(uint32_t group) noexcept { return _workRegsOfGroup[group]; }
908
909 inline const RAWorkRegs& workRegs() const noexcept { return _workRegs; }
910 inline const RAWorkRegs& workRegs(uint32_t group) const noexcept { return _workRegsOfGroup[group]; }
911
912 inline uint32_t workRegCount() const noexcept { return _workRegs.size(); }
913 inline uint32_t workRegCount(uint32_t group) const noexcept { return _workRegsOfGroup[group].size(); }
914
915 inline void _buildPhysIndex() noexcept {
916 _physRegIndex.buildIndexes(_physRegCount);
917 _physRegTotal = uint32_t(_physRegIndex[BaseReg::kGroupVirt - 1]) +
918 uint32_t(_physRegCount[BaseReg::kGroupVirt - 1]) ;
919 }
920 inline uint32_t physRegIndex(uint32_t group) const noexcept { return _physRegIndex[group]; }
921 inline uint32_t physRegTotal() const noexcept { return _physRegTotal; }
922
923 Error _asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept;
924
925 //! Creates `RAWorkReg` data for the given `vReg`. The function does nothing
926 //! if `vReg` already contains link to `RAWorkReg`. Called by `constructBlocks()`.
927 inline Error asWorkReg(VirtReg* vReg, RAWorkReg** out) noexcept {
928 *out = vReg->workReg();
929 return *out ? kErrorOk : _asWorkReg(vReg, out);
930 }
931
932 inline Error virtIndexAsWorkReg(uint32_t vIndex, RAWorkReg** out) noexcept {
933 const ZoneVector<VirtReg*>& virtRegs = cc()->virtRegs();
934 if (ASMJIT_UNLIKELY(vIndex >= virtRegs.size()))
935 return DebugUtils::errored(kErrorInvalidVirtId);
936 return asWorkReg(virtRegs[vIndex], out);
937 }
938
939 inline RAStackSlot* getOrCreateStackSlot(RAWorkReg* workReg) noexcept {
940 RAStackSlot* slot = workReg->stackSlot();
941 if (slot) return slot;
942
943 slot = _stackAllocator.newSlot(_sp.id(), workReg->virtReg()->virtSize(), workReg->virtReg()->alignment(), 0);
944 workReg->_stackSlot = slot;
945 workReg->markStackUsed();
946 return slot;
947 }
948
949 inline BaseMem workRegAsMem(RAWorkReg* workReg) noexcept {
950 getOrCreateStackSlot(workReg);
951 return BaseMem(BaseMem::Decomposed { _sp.type(), workReg->virtId(), BaseReg::kTypeNone, 0, 0, 0, BaseMem::kSignatureMemRegHomeFlag });
952 }
953
954 WorkToPhysMap* newWorkToPhysMap() noexcept;
955 PhysToWorkMap* newPhysToWorkMap() noexcept;
956
957 inline PhysToWorkMap* clonePhysToWorkMap(const PhysToWorkMap* map) noexcept {
958 size_t size = PhysToWorkMap::sizeOf(_physRegTotal);
959 return static_cast<PhysToWorkMap*>(zone()->dupAligned(map, size, sizeof(uint32_t)));
960 }
961
962 inline WorkToPhysMap* cloneWorkToPhysMap(const WorkToPhysMap* map) noexcept {
963 size_t size = WorkToPhysMap::sizeOf(_workRegs.size());
964 if (ASMJIT_UNLIKELY(size == 0))
965 return const_cast<WorkToPhysMap*>(map);
966 return static_cast<WorkToPhysMap*>(zone()->dup(map, size));
967 }
968
969 //! \name Liveness Analysis & Statistics
970 //! \{
971
972 //! 1. Calculates GEN/KILL/IN/OUT of each block.
973 //! 2. Calculates live spans and basic statistics of each work register.
974 Error buildLiveness() noexcept;
975
976 //! Assigns argIndex to WorkRegs. Must be called after the liveness analysis
977 //! finishes as it checks whether the argument is live upon entry.
978 Error assignArgIndexToWorkRegs() noexcept;
979
980 //! \}
981
982 //! \name Register Allocation - Global
983 //! \{
984
985 //! Runs a global register allocator.
986 Error runGlobalAllocator() noexcept;
987
988 //! Initializes data structures used for global live spans.
989 Error initGlobalLiveSpans() noexcept;
990
991 Error binPack(uint32_t group) noexcept;
992
993 //! \}
994
995 //! \name Register Allocation - Local
996 //! \{
997
998 //! Runs a local register allocator.
999 Error runLocalAllocator() noexcept;
1000 Error setBlockEntryAssignment(RABlock* block, const RABlock* fromBlock, const RAAssignment& fromAssignment) noexcept;
1001
1002 //! \}
1003
1004 //! \name Register Allocation Utilities
1005 //! \{
1006
1007 Error useTemporaryMem(BaseMem& out, uint32_t size, uint32_t alignment) noexcept;
1008
1009 //! \}
1010
1011 //! \name Function Prolog & Epilog
1012 //! \{
1013
1014 Error updateStackFrame() noexcept;
1015 Error _markStackArgsToKeep() noexcept;
1016 Error _updateStackArgs() noexcept;
1017 Error insertPrologEpilog() noexcept;
1018
1019 //! \}
1020
1021 //! \name Instruction Rewriter
1022 //! \{
1023
1024 Error rewrite() noexcept;
1025 Error _rewrite(BaseNode* first, BaseNode* stop) noexcept;
1026
1027 //! \}
1028
1029 #ifndef ASMJIT_NO_LOGGING
1030 //! \name Logging
1031 //! \{
1032
1033 Error annotateCode() noexcept;
1034
1035 Error _logBlockIds(const RABlocks& blocks) noexcept;
1036 Error _dumpBlockLiveness(String& sb, const RABlock* block) noexcept;
1037 Error _dumpLiveSpans(String& sb) noexcept;
1038
1039 //! \}
1040 #endif
1041
1042 //! \name Emit
1043 //! \{
1044
1045 virtual Error onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept = 0;
1046 virtual Error onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept = 0;
1047
1048 virtual Error onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept = 0;
1049 virtual Error onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept = 0;
1050
1051 virtual Error onEmitJump(const Label& label) noexcept = 0;
1052 virtual Error onEmitPreCall(FuncCallNode* call) noexcept = 0;
1053
1054 //! \}
1055 };
1056
1057 inline ZoneAllocator* RABlock::allocator() const noexcept { return _ra->allocator(); }
1058
1059 //! \}
1060 //! \endcond
1061
1062 ASMJIT_END_NAMESPACE
1063
1064 #endif // !ASMJIT_NO_COMPILER
1065 #endif // _ASMJIT_CORE_RAPASS_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_COMPILER
8
9 #include "../core/rastack_p.h"
10 #include "../core/support.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 // ============================================================================
15 // [asmjit::RAStackAllocator - Slots]
16 // ============================================================================
17
18 RAStackSlot* RAStackAllocator::newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags) noexcept {
19 if (ASMJIT_UNLIKELY(_slots.willGrow(allocator(), 1) != kErrorOk))
20 return nullptr;
21
22 RAStackSlot* slot = allocator()->allocT<RAStackSlot>();
23 if (ASMJIT_UNLIKELY(!slot))
24 return nullptr;
25
26 slot->_baseRegId = uint8_t(baseRegId);
27 slot->_alignment = uint8_t(Support::max<uint32_t>(alignment, 1));
28 slot->_reserved[0] = 0;
29 slot->_reserved[1] = 0;
30 slot->_useCount = 0;
31 slot->_size = size;
32 slot->_flags = flags;
33
34 slot->_weight = 0;
35 slot->_offset = 0;
36
37 _alignment = Support::max<uint32_t>(_alignment, alignment);
38 _slots.appendUnsafe(slot);
39 return slot;
40 }
41
42 // ============================================================================
43 // [asmjit::RAStackAllocator - Utilities]
44 // ============================================================================
45
46 struct RAStackGap {
47 inline RAStackGap() noexcept
48 : offset(0),
49 size(0) {}
50
51 inline RAStackGap(uint32_t offset, uint32_t size) noexcept
52 : offset(offset),
53 size(size) {}
54
55 inline RAStackGap(const RAStackGap& other) noexcept
56 : offset(other.offset),
57 size(other.size) {}
58
59 uint32_t offset;
60 uint32_t size;
61 };
62
63 Error RAStackAllocator::calculateStackFrame() noexcept {
64 // Base weight added to all registers regardless of their size and alignment.
65 uint32_t kBaseRegWeight = 16;
66
67 // STEP 1:
68 //
69 // Update usage based on the size of the slot. We boost smaller slots in a way
70 // that 32-bit register has higher priority than a 128-bit register, however,
71 // if one 128-bit register is used 4 times more than some other 32-bit register
72 // it will overweight it.
73 for (RAStackSlot* slot : _slots) {
74 uint32_t alignment = slot->alignment();
75 ASMJIT_ASSERT(alignment > 0);
76
77 uint32_t power = Support::ctz(alignment);
78 uint64_t weight;
79
80 if (slot->isRegHome())
81 weight = kBaseRegWeight + (uint64_t(slot->useCount()) * (7 - power));
82 else
83 weight = power;
84
85 // If overflown, which has less chance of winning a lottery, just use max
86 // possible weight. In such case it probably doesn't matter at all.
87 if (weight > 0xFFFFFFFFu)
88 weight = 0xFFFFFFFFu;
89
90 slot->setWeight(uint32_t(weight));
91 }
92
93 // STEP 2:
94 //
95 // Sort stack slots based on their newly calculated weight (in descending order).
96 _slots.sort([](const RAStackSlot* a, const RAStackSlot* b) noexcept {
97 return a->weight() > b->weight() ? 1 :
98 a->weight() == b->weight() ? 0 : -1;
99 });
100
101 // STEP 3:
102 //
103 // Calculate offset of each slot. We start from the slot that has the highest
104 // weight and advance to slots with lower weight. It could look that offsets
105 // start from the first slot in our list and then simply increase, but it's
106 // not always the case as we also try to fill all gaps introduced by the fact
107 // that slots are sorted by weight and not by size & alignment, so when we need
108 // to align some slot we distribute the gap caused by the alignment to `gaps`.
109 uint32_t offset = 0;
110 ZoneVector<RAStackGap> gaps[kSizeCount - 1];
111
112 for (RAStackSlot* slot : _slots) {
113 if (slot->isStackArg()) continue;
114
115 uint32_t slotAlignment = slot->alignment();
116 uint32_t alignedOffset = Support::alignUp(offset, slotAlignment);
117
118 // Try to find a slot within gaps first, before advancing the `offset`.
119 bool foundGap = false;
120 uint32_t gapSize = 0;
121 uint32_t gapOffset = 0;
122
123 {
124 uint32_t slotSize = slot->size();
125 if (slotSize < (1u << uint32_t(ASMJIT_ARRAY_SIZE(gaps)))) {
126 // Iterate from the lowest to the highest possible.
127 uint32_t index = Support::ctz(slotSize);
128 do {
129 if (!gaps[index].empty()) {
130 RAStackGap gap = gaps[index].pop();
131
132 ASMJIT_ASSERT(Support::isAligned(gap.offset, slotAlignment));
133 slot->setOffset(int32_t(gap.offset));
134
135 gapSize = gap.size - slotSize;
136 gapOffset = gap.offset - slotSize;
137
138 foundGap = true;
139 break;
140 }
141 } while (++index < uint32_t(ASMJIT_ARRAY_SIZE(gaps)));
142 }
143 }
144
145 // No gap found, we may create a new one(s) if the current offset is not aligned.
146 if (!foundGap && offset != alignedOffset) {
147 gapSize = alignedOffset - offset;
148 gapOffset = alignedOffset;
149
150 offset = alignedOffset;
151 }
152
153 // True if we have found a gap and not filled all of it or we aligned the current offset.
154 if (gapSize) {
155 uint32_t gapEnd = gapSize + gapOffset;
156 while (gapOffset < gapEnd) {
157 uint32_t index = Support::ctz(gapOffset);
158 uint32_t slotSize = 1u << index;
159
160 // Weird case, better to bail...
161 if (gapEnd - gapOffset < slotSize)
162 break;
163
164 ASMJIT_PROPAGATE(gaps[index].append(allocator(), RAStackGap(gapOffset, slotSize)));
165 gapOffset += slotSize;
166 }
167 }
168
169 if (!foundGap) {
170 ASMJIT_ASSERT(Support::isAligned(offset, slotAlignment));
171 slot->setOffset(int32_t(offset));
172 offset += slot->size();
173 }
174 }
175
176 _stackSize = Support::alignUp(offset, _alignment);
177 return kErrorOk;
178 }
179
180 Error RAStackAllocator::adjustSlotOffsets(int32_t offset) noexcept {
181 for (RAStackSlot* slot : _slots)
182 if (!slot->isStackArg())
183 slot->_offset += offset;
184 return kErrorOk;
185 }
186
187 ASMJIT_END_NAMESPACE
188
189 #endif // !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_RASTACK_P_H
7 #define _ASMJIT_CORE_RASTACK_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/radefs_p.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \cond INTERNAL
17 //! \addtogroup asmjit_ra
18 //! \{
19
20 // ============================================================================
21 // [asmjit::RAStackSlot]
22 // ============================================================================
23
24 //! Stack slot.
25 struct RAStackSlot {
26 enum Flags : uint32_t {
27 // TODO: kFlagRegHome is apparently not used, but isRegHome() is.
28 kFlagRegHome = 0x00000001u, //!< Stack slot is register home slot.
29 kFlagStackArg = 0x00000002u //!< Stack slot position matches argument passed via stack.
30 };
31
32 enum ArgIndex : uint32_t {
33 kNoArgIndex = 0xFF
34 };
35
36 //! Base register used to address the stack.
37 uint8_t _baseRegId;
38 //! Minimum alignment required by the slot.
39 uint8_t _alignment;
40 //! Reserved for future use.
41 uint8_t _reserved[2];
42 //! Size of memory required by the slot.
43 uint32_t _size;
44 //! Slot flags.
45 uint32_t _flags;
46
47 //! Usage counter (one unit equals one memory access).
48 uint32_t _useCount;
49 //! Weight of the slot (calculated by `calculateStackFrame()`).
50 uint32_t _weight;
51 //! Stack offset (calculated by `calculateStackFrame()`).
52 int32_t _offset;
53
54 //! \name Accessors
55 //! \{
56
57 inline uint32_t baseRegId() const noexcept { return _baseRegId; }
58 inline void setBaseRegId(uint32_t id) noexcept { _baseRegId = uint8_t(id); }
59
60 inline uint32_t size() const noexcept { return _size; }
61 inline uint32_t alignment() const noexcept { return _alignment; }
62
63 inline uint32_t flags() const noexcept { return _flags; }
64 inline void addFlags(uint32_t flags) noexcept { _flags |= flags; }
65 inline bool isRegHome() const noexcept { return (_flags & kFlagRegHome) != 0; }
66 inline bool isStackArg() const noexcept { return (_flags & kFlagStackArg) != 0; }
67
68 inline uint32_t useCount() const noexcept { return _useCount; }
69 inline void addUseCount(uint32_t n = 1) noexcept { _useCount += n; }
70
71 inline uint32_t weight() const noexcept { return _weight; }
72 inline void setWeight(uint32_t weight) noexcept { _weight = weight; }
73
74 inline int32_t offset() const noexcept { return _offset; }
75 inline void setOffset(int32_t offset) noexcept { _offset = offset; }
76
77 //! \}
78 };
79
80 typedef ZoneVector<RAStackSlot*> RAStackSlots;
81
82 // ============================================================================
83 // [asmjit::RAStackAllocator]
84 // ============================================================================
85
86 //! Stack allocator.
87 class RAStackAllocator {
88 public:
89 ASMJIT_NONCOPYABLE(RAStackAllocator)
90
91 enum Size : uint32_t {
92 kSize1 = 0,
93 kSize2 = 1,
94 kSize4 = 2,
95 kSize8 = 3,
96 kSize16 = 4,
97 kSize32 = 5,
98 kSize64 = 6,
99 kSizeCount = 7
100 };
101
102 //! Allocator used to allocate internal data.
103 ZoneAllocator* _allocator;
104 //! Count of bytes used by all slots.
105 uint32_t _bytesUsed;
106 //! Calculated stack size (can be a bit greater than `_bytesUsed`).
107 uint32_t _stackSize;
108 //! Minimum stack alignment.
109 uint32_t _alignment;
110 //! Stack slots vector.
111 RAStackSlots _slots;
112
113 //! \name Construction / Destruction
114 //! \{
115
116 inline RAStackAllocator() noexcept
117 : _allocator(nullptr),
118 _bytesUsed(0),
119 _stackSize(0),
120 _alignment(1),
121 _slots() {}
122
123 inline void reset(ZoneAllocator* allocator) noexcept {
124 _allocator = allocator;
125 _bytesUsed = 0;
126 _stackSize = 0;
127 _alignment = 1;
128 _slots.reset();
129 }
130
131 //! \}
132
133 //! \name Accessors
134 //! \{
135
136 inline ZoneAllocator* allocator() const noexcept { return _allocator; }
137
138 inline uint32_t bytesUsed() const noexcept { return _bytesUsed; }
139 inline uint32_t stackSize() const noexcept { return _stackSize; }
140 inline uint32_t alignment() const noexcept { return _alignment; }
141
142 inline RAStackSlots& slots() noexcept { return _slots; }
143 inline const RAStackSlots& slots() const noexcept { return _slots; }
144 inline uint32_t slotCount() const noexcept { return _slots.size(); }
145
146 //! \}
147
148 //! \name Utilities
149 //! \{
150
151 RAStackSlot* newSlot(uint32_t baseRegId, uint32_t size, uint32_t alignment, uint32_t flags = 0) noexcept;
152
153 Error calculateStackFrame() noexcept;
154 Error adjustSlotOffsets(int32_t offset) noexcept;
155
156 //! \}
157 };
158
159 //! \}
160 //! \endcond
161
162 ASMJIT_END_NAMESPACE
163
164 #endif // !ASMJIT_NO_COMPILER
165 #endif // _ASMJIT_CORE_RASTACK_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/string.h"
8 #include "../core/support.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::String - Globals]
14 // ============================================================================
15
16 static const char String_baseN[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
17
18 constexpr size_t kMinAllocSize = 64;
19 constexpr size_t kMaxAllocSize = std::numeric_limits<size_t>::max() - Globals::kGrowThreshold;
20
21 // ============================================================================
22 // [asmjit::String]
23 // ============================================================================
24
25 Error String::reset() noexcept {
26 if (_type == kTypeLarge)
27 ::free(_large.data);
28
29 _resetInternal();
30 return kErrorOk;
31 }
32
33 Error String::clear() noexcept {
34 if (isLarge()) {
35 _large.size = 0;
36 _large.data[0] = '\0';
37 }
38 else {
39 _raw.uptr[0] = 0;
40 }
41
42 return kErrorOk;
43 }
44
45 char* String::prepare(uint32_t op, size_t size) noexcept {
46 char* curData;
47 size_t curSize;
48 size_t curCapacity;
49
50 if (isLarge()) {
51 curData = this->_large.data;
52 curSize = this->_large.size;
53 curCapacity = this->_large.capacity;
54 }
55 else {
56 curData = this->_small.data;
57 curSize = this->_small.type;
58 curCapacity = kSSOCapacity;
59 }
60
61 if (op == kOpAssign) {
62 if (size > curCapacity) {
63 // Prevent arithmetic overflow.
64 if (ASMJIT_UNLIKELY(size >= kMaxAllocSize))
65 return nullptr;
66
67 size_t newCapacity = Support::alignUp<size_t>(size + 1, kMinAllocSize);
68 char* newData = static_cast<char*>(::malloc(newCapacity));
69
70 if (ASMJIT_UNLIKELY(!newData))
71 return nullptr;
72
73 if (_type == kTypeLarge)
74 ::free(curData);
75
76 _large.type = kTypeLarge;
77 _large.size = size;
78 _large.capacity = newCapacity - 1;
79 _large.data = newData;
80
81 newData[size] = '\0';
82 return newData;
83 }
84 else {
85 _setSize(size);
86 curData[size] = '\0';
87 return curData;
88 }
89 }
90 else {
91 // Prevent arithmetic overflow.
92 if (ASMJIT_UNLIKELY(size >= kMaxAllocSize - curSize))
93 return nullptr;
94
95 size_t newSize = size + curSize;
96 size_t newSizePlusOne = newSize + 1;
97
98 if (newSizePlusOne > curCapacity) {
99 size_t newCapacity = Support::max<size_t>(curCapacity + 1, kMinAllocSize);
100
101 if (newCapacity < newSizePlusOne && newCapacity < Globals::kGrowThreshold)
102 newCapacity = Support::alignUpPowerOf2(newCapacity);
103
104 if (newCapacity < newSizePlusOne)
105 newCapacity = Support::alignUp(newSizePlusOne, Globals::kGrowThreshold);
106
107 if (ASMJIT_UNLIKELY(newCapacity < newSizePlusOne))
108 return nullptr;
109
110 char* newData = static_cast<char*>(::malloc(newCapacity));
111 if (ASMJIT_UNLIKELY(!newData))
112 return nullptr;
113
114 memcpy(newData, curData, curSize);
115
116 if (_type == kTypeLarge)
117 ::free(curData);
118
119 _large.type = kTypeLarge;
120 _large.size = newSize;
121 _large.capacity = newCapacity - 1;
122 _large.data = newData;
123
124 newData[newSize] = '\0';
125 return newData + curSize;
126 }
127 else {
128 _setSize(newSize);
129 curData[newSize] = '\0';
130 return curData + curSize;
131 }
132 }
133 }
134
135 Error String::assignString(const char* data, size_t size) noexcept {
136 char* dst = nullptr;
137
138 // Null terminated string without `size` specified.
139 if (size == SIZE_MAX)
140 size = data ? strlen(data) : size_t(0);
141
142 if (isLarge()) {
143 if (size <= _large.capacity) {
144 dst = _large.data;
145 _large.size = size;
146 }
147 else {
148 size_t capacityPlusOne = Support::alignUp(size + 1, 32);
149 if (ASMJIT_UNLIKELY(capacityPlusOne < size))
150 return DebugUtils::errored(kErrorOutOfMemory);
151
152 dst = static_cast<char*>(::malloc(capacityPlusOne));
153 if (ASMJIT_UNLIKELY(!dst))
154 return DebugUtils::errored(kErrorOutOfMemory);
155
156 if (!isExternal())
157 ::free(_large.data);
158
159 _large.type = kTypeLarge;
160 _large.data = dst;
161 _large.size = size;
162 _large.capacity = capacityPlusOne - 1;
163 }
164 }
165 else {
166 if (size <= kSSOCapacity) {
167 ASMJIT_ASSERT(size < 0xFFu);
168
169 dst = _small.data;
170 _small.type = uint8_t(size);
171 }
172 else {
173 dst = static_cast<char*>(::malloc(size + 1));
174 if (ASMJIT_UNLIKELY(!dst))
175 return DebugUtils::errored(kErrorOutOfMemory);
176
177 _large.type = kTypeLarge;
178 _large.data = dst;
179 _large.size = size;
180 _large.capacity = size;
181 }
182 }
183
184 // Optionally copy data from `data` and null-terminate.
185 if (data && size) {
186 // NOTE: It's better to use `memmove()`. If, for any reason, somebody uses
187 // this function to substring the same string it would work as expected.
188 ::memmove(dst, data, size);
189 }
190
191 dst[size] = '\0';
192 return kErrorOk;
193 }
194
195 // ============================================================================
196 // [asmjit::String - Operations]
197 // ============================================================================
198
199 Error String::_opString(uint32_t op, const char* str, size_t size) noexcept {
200 if (size == SIZE_MAX)
201 size = str ? strlen(str) : size_t(0);
202
203 if (!size)
204 return kErrorOk;
205
206 char* p = prepare(op, size);
207 if (!p) return DebugUtils::errored(kErrorOutOfMemory);
208
209 memcpy(p, str, size);
210 return kErrorOk;
211 }
212
213 Error String::_opChar(uint32_t op, char c) noexcept {
214 char* p = prepare(op, 1);
215 if (!p) return DebugUtils::errored(kErrorOutOfMemory);
216
217 *p = c;
218 return kErrorOk;
219 }
220
221 Error String::_opChars(uint32_t op, char c, size_t n) noexcept {
222 if (!n)
223 return kErrorOk;
224
225 char* p = prepare(op, n);
226 if (!p) return DebugUtils::errored(kErrorOutOfMemory);
227
228 memset(p, c, n);
229 return kErrorOk;
230 }
231
232 Error String::padEnd(size_t n, char c) noexcept {
233 size_t size = this->size();
234 return n > size ? appendChars(c, n - size) : kErrorOk;
235 }
236
237 Error String::_opNumber(uint32_t op, uint64_t i, uint32_t base, size_t width, uint32_t flags) noexcept {
238 if (base < 2 || base > 36)
239 base = 10;
240
241 char buf[128];
242 char* p = buf + ASMJIT_ARRAY_SIZE(buf);
243
244 uint64_t orig = i;
245 char sign = '\0';
246
247 // --------------------------------------------------------------------------
248 // [Sign]
249 // --------------------------------------------------------------------------
250
251 if ((flags & kFormatSigned) != 0 && int64_t(i) < 0) {
252 i = uint64_t(-int64_t(i));
253 sign = '-';
254 }
255 else if ((flags & kFormatShowSign) != 0) {
256 sign = '+';
257 }
258 else if ((flags & kFormatShowSpace) != 0) {
259 sign = ' ';
260 }
261
262 // --------------------------------------------------------------------------
263 // [Number]
264 // --------------------------------------------------------------------------
265
266 do {
267 uint64_t d = i / base;
268 uint64_t r = i % base;
269
270 *--p = String_baseN[r];
271 i = d;
272 } while (i);
273
274 size_t numberSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p);
275
276 // --------------------------------------------------------------------------
277 // [Alternate Form]
278 // --------------------------------------------------------------------------
279
280 if ((flags & kFormatAlternate) != 0) {
281 if (base == 8) {
282 if (orig != 0)
283 *--p = '0';
284 }
285 if (base == 16) {
286 *--p = 'x';
287 *--p = '0';
288 }
289 }
290
291 // --------------------------------------------------------------------------
292 // [Width]
293 // --------------------------------------------------------------------------
294
295 if (sign != 0)
296 *--p = sign;
297
298 if (width > 256)
299 width = 256;
300
301 if (width <= numberSize)
302 width = 0;
303 else
304 width -= numberSize;
305
306 // --------------------------------------------------------------------------
307 // Write]
308 // --------------------------------------------------------------------------
309
310 size_t prefixSize = (size_t)(buf + ASMJIT_ARRAY_SIZE(buf) - p) - numberSize;
311 char* data = prepare(op, prefixSize + width + numberSize);
312
313 if (!data)
314 return DebugUtils::errored(kErrorOutOfMemory);
315
316 memcpy(data, p, prefixSize);
317 data += prefixSize;
318
319 memset(data, '0', width);
320 data += width;
321
322 memcpy(data, p + prefixSize, numberSize);
323 return kErrorOk;
324 }
325
326 Error String::_opHex(uint32_t op, const void* data, size_t size, char separator) noexcept {
327 char* dst;
328 const uint8_t* src = static_cast<const uint8_t*>(data);
329
330 if (!size)
331 return kErrorOk;
332
333 if (separator) {
334 if (ASMJIT_UNLIKELY(size >= std::numeric_limits<size_t>::max() / 3))
335 return DebugUtils::errored(kErrorOutOfMemory);
336
337 dst = prepare(op, size * 3 - 1);
338 if (ASMJIT_UNLIKELY(!dst))
339 return DebugUtils::errored(kErrorOutOfMemory);
340
341 size_t i = 0;
342 for (;;) {
343 dst[0] = String_baseN[(src[0] >> 4) & 0xF];
344 dst[1] = String_baseN[(src[0] ) & 0xF];
345 if (++i == size)
346 break;
347 // This makes sure that the separator is only put between two hexadecimal bytes.
348 dst[2] = separator;
349 dst += 3;
350 src++;
351 }
352 }
353 else {
354 if (ASMJIT_UNLIKELY(size >= std::numeric_limits<size_t>::max() / 2))
355 return DebugUtils::errored(kErrorOutOfMemory);
356
357 dst = prepare(op, size * 2);
358 if (ASMJIT_UNLIKELY(!dst))
359 return DebugUtils::errored(kErrorOutOfMemory);
360
361 for (size_t i = 0; i < size; i++, dst += 2, src++) {
362 dst[0] = String_baseN[(src[0] >> 4) & 0xF];
363 dst[1] = String_baseN[(src[0] ) & 0xF];
364 }
365 }
366
367 return kErrorOk;
368 }
369
370 Error String::_opFormat(uint32_t op, const char* fmt, ...) noexcept {
371 Error err;
372 va_list ap;
373
374 va_start(ap, fmt);
375 err = _opVFormat(op, fmt, ap);
376 va_end(ap);
377
378 return err;
379 }
380
381 Error String::_opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept {
382 size_t startAt = (op == kOpAssign) ? size_t(0) : size();
383 size_t remainingCapacity = capacity() - startAt;
384
385 char buf[1024];
386 int fmtResult;
387 size_t outputSize;
388
389 if (remainingCapacity >= 128) {
390 fmtResult = vsnprintf(data() + startAt, remainingCapacity, fmt, ap);
391 outputSize = size_t(fmtResult);
392
393 if (ASMJIT_LIKELY(outputSize <= remainingCapacity)) {
394 _setSize(startAt + outputSize);
395 return kErrorOk;
396 }
397 }
398 else {
399 fmtResult = vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf), fmt, ap);
400 outputSize = size_t(fmtResult);
401
402 if (ASMJIT_LIKELY(outputSize < ASMJIT_ARRAY_SIZE(buf)))
403 return _opString(op, buf, outputSize);
404 }
405
406 if (ASMJIT_UNLIKELY(fmtResult < 0))
407 return DebugUtils::errored(kErrorInvalidState);
408
409 char* p = prepare(op, outputSize);
410 if (ASMJIT_UNLIKELY(!p))
411 return DebugUtils::errored(kErrorOutOfMemory);
412
413 fmtResult = vsnprintf(p, outputSize + 1, fmt, ap);
414 ASMJIT_ASSERT(size_t(fmtResult) == outputSize);
415
416 return kErrorOk;
417 }
418
419 Error String::truncate(size_t newSize) noexcept {
420 if (isLarge()) {
421 if (newSize < _large.size) {
422 _large.data[newSize] = '\0';
423 _large.size = newSize;
424 }
425 }
426 else {
427 if (newSize < _type) {
428 _small.data[newSize] = '\0';
429 _small.type = uint8_t(newSize);
430 }
431 }
432
433 return kErrorOk;
434 }
435
436 bool String::eq(const char* other, size_t size) const noexcept {
437 const char* aData = data();
438 const char* bData = other;
439
440 size_t aSize = this->size();
441 size_t bSize = size;
442
443 if (bSize == SIZE_MAX) {
444 size_t i;
445 for (i = 0; i < aSize; i++)
446 if (aData[i] != bData[i] || bData[i] == 0)
447 return false;
448 return bData[i] == 0;
449 }
450 else {
451 if (aSize != bSize)
452 return false;
453 return ::memcmp(aData, bData, aSize) == 0;
454 }
455 }
456
457 // ============================================================================
458 // [asmjit::Support - Unit]
459 // ============================================================================
460
461 #if defined(ASMJIT_TEST)
462 UNIT(core_string) {
463 String s;
464
465 EXPECT(s.isLarge() == false);
466 EXPECT(s.isExternal() == false);
467
468 EXPECT(s.assignChar('a') == kErrorOk);
469 EXPECT(s.size() == 1);
470 EXPECT(s.capacity() == String::kSSOCapacity);
471 EXPECT(s.data()[0] == 'a');
472 EXPECT(s.data()[1] == '\0');
473 EXPECT(s.eq("a") == true);
474 EXPECT(s.eq("a", 1) == true);
475
476 EXPECT(s.assignChars('b', 4) == kErrorOk);
477 EXPECT(s.size() == 4);
478 EXPECT(s.capacity() == String::kSSOCapacity);
479 EXPECT(s.data()[0] == 'b');
480 EXPECT(s.data()[1] == 'b');
481 EXPECT(s.data()[2] == 'b');
482 EXPECT(s.data()[3] == 'b');
483 EXPECT(s.data()[4] == '\0');
484 EXPECT(s.eq("bbbb") == true);
485 EXPECT(s.eq("bbbb", 4) == true);
486
487 EXPECT(s.assignString("abc") == kErrorOk);
488 EXPECT(s.size() == 3);
489 EXPECT(s.capacity() == String::kSSOCapacity);
490 EXPECT(s.data()[0] == 'a');
491 EXPECT(s.data()[1] == 'b');
492 EXPECT(s.data()[2] == 'c');
493 EXPECT(s.data()[3] == '\0');
494 EXPECT(s.eq("abc") == true);
495 EXPECT(s.eq("abc", 3) == true);
496
497 const char* large = "Large string that will not fit into SSO buffer";
498 EXPECT(s.assignString(large) == kErrorOk);
499 EXPECT(s.isLarge() == true);
500 EXPECT(s.size() == strlen(large));
501 EXPECT(s.capacity() > String::kSSOCapacity);
502 EXPECT(s.eq(large) == true);
503 EXPECT(s.eq(large, strlen(large)) == true);
504
505 const char* additional = " (additional content)";
506 EXPECT(s.isLarge() == true);
507 EXPECT(s.appendString(additional) == kErrorOk);
508 EXPECT(s.size() == strlen(large) + strlen(additional));
509
510 EXPECT(s.clear() == kErrorOk);
511 EXPECT(s.size() == 0);
512 EXPECT(s.empty() == true);
513 EXPECT(s.data()[0] == '\0');
514 EXPECT(s.isLarge() == true); // Clear should never release the memory.
515
516 EXPECT(s.appendUInt(1234) == kErrorOk);
517 EXPECT(s.eq("1234") == true);
518
519 StringTmp<64> sTmp;
520 EXPECT(sTmp.isLarge());
521 EXPECT(sTmp.isExternal());
522 EXPECT(sTmp.appendChars(' ', 1000) == kErrorOk);
523 EXPECT(!sTmp.isExternal());
524 }
525 #endif
526
527 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_STRING_H
7 #define _ASMJIT_CORE_STRING_H
8
9 #include "../core/support.h"
10 #include "../core/zone.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_support
15 //! \{
16
17 // ============================================================================
18 // [asmjit::String]
19 // ============================================================================
20
21 //! A simple non-reference counted string that uses small string optimization (SSO).
22 //!
23 //! This string has 3 allocation possibilities:
24 //!
25 //! 1. Small - embedded buffer is used for up to `kSSOCapacity` characters.
26 //! This should handle most small strings and thus avoid dynamic
27 //! memory allocation for most use-cases.
28 //!
29 //! 2. Large - string that doesn't fit into an embedded buffer (or string
30 //! that was truncated from a larger buffer) and is owned by
31 //! AsmJit. When you destroy the string AsmJit would automatically
32 //! release the large buffer.
33 //!
34 //! 3. External - like Large (2), however, the large buffer is not owned by
35 //! AsmJit and won't be released when the string is destroyed
36 //! or reallocated. This is mostly useful for working with
37 //! larger temporary strings allocated on stack or with immutable
38 //! strings.
39 class String {
40 public:
41 ASMJIT_NONCOPYABLE(String)
42
43 //! String operation.
44 enum Op : uint32_t {
45 kOpAssign = 0,
46 kOpAppend = 1
47 };
48
49 //! String format flags.
50 enum FormatFlags : uint32_t {
51 kFormatShowSign = 0x00000001u,
52 kFormatShowSpace = 0x00000002u,
53 kFormatAlternate = 0x00000004u,
54 kFormatSigned = 0x80000000u
55 };
56
57 //! \cond INTERNAL
58 enum : uint32_t {
59 kLayoutSize = 32,
60 kSSOCapacity = kLayoutSize - 2
61 };
62
63 //! String type.
64 enum Type : uint8_t {
65 kTypeLarge = 0x1Fu, //!< Large string (owned by String).
66 kTypeExternal = 0x20u //!< External string (zone allocated or not owned by String).
67 };
68
69 union Raw {
70 uint8_t u8[kLayoutSize];
71 uint64_t u64[kLayoutSize / sizeof(uint64_t)];
72 uintptr_t uptr[kLayoutSize / sizeof(uintptr_t)];
73 };
74
75 struct Small {
76 uint8_t type;
77 char data[kSSOCapacity + 1u];
78 };
79
80 struct Large {
81 uint8_t type;
82 uint8_t reserved[sizeof(uintptr_t) - 1];
83 size_t size;
84 size_t capacity;
85 char* data;
86 };
87
88 union {
89 uint8_t _type;
90 Raw _raw;
91 Small _small;
92 Large _large;
93 };
94 //! \endcond
95
96 //! \name Construction & Destruction
97 //! \{
98
99 inline String() noexcept
100 : _small {} {}
101
102 inline String(String&& other) noexcept {
103 for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_raw.uptr); i++)
104 _raw.uptr[i] = other._raw.uptr[i];
105 other._resetInternal();
106 }
107
108 inline ~String() noexcept {
109 reset();
110 }
111
112 //! Reset the string into a construction state.
113 ASMJIT_API Error reset() noexcept;
114
115 //! \}
116
117 //! \name Overloaded Operators
118 //! \{
119
120 inline bool operator==(const char* other) const noexcept { return eq(other); }
121 inline bool operator!=(const char* other) const noexcept { return !eq(other); }
122
123 inline bool operator==(const String& other) const noexcept { return eq(other); }
124 inline bool operator!=(const String& other) const noexcept { return !eq(other); }
125
126 //! \}
127
128 //! \name Accessors
129 //! \{
130
131 inline bool isLarge() const noexcept { return _type >= kTypeLarge; }
132 inline bool isExternal() const noexcept { return _type == kTypeExternal; }
133
134 inline bool empty() const noexcept { return size() == 0; }
135 inline size_t size() const noexcept { return isLarge() ? size_t(_large.size) : size_t(_type); }
136 inline size_t capacity() const noexcept { return isLarge() ? _large.capacity : size_t(kSSOCapacity); }
137
138 inline char* data() noexcept { return isLarge() ? _large.data : _small.data; }
139 inline const char* data() const noexcept { return isLarge() ? _large.data : _small.data; }
140
141 inline char* end() noexcept { return data() + size(); }
142 inline const char* end() const noexcept { return data() + size(); }
143
144 //! \}
145
146 //! \name String Operations
147 //! \{
148
149 //! Clear the content of the string.
150 ASMJIT_API Error clear() noexcept;
151
152 ASMJIT_API char* prepare(uint32_t op, size_t size) noexcept;
153
154 ASMJIT_API Error _opString(uint32_t op, const char* str, size_t size = SIZE_MAX) noexcept;
155 ASMJIT_API Error _opFormat(uint32_t op, const char* fmt, ...) noexcept;
156 ASMJIT_API Error _opVFormat(uint32_t op, const char* fmt, va_list ap) noexcept;
157 ASMJIT_API Error _opChar(uint32_t op, char c) noexcept;
158 ASMJIT_API Error _opChars(uint32_t op, char c, size_t n) noexcept;
159 ASMJIT_API Error _opNumber(uint32_t op, uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept;
160 ASMJIT_API Error _opHex(uint32_t op, const void* data, size_t size, char separator = '\0') noexcept;
161
162 //! Replace the string content to a string specified by `data` and `size`. If
163 //! `size` is `SIZE_MAX` then it's considered null-terminated and its length
164 //! will be obtained through `strlen()`.
165 ASMJIT_API Error assignString(const char* data, size_t size = SIZE_MAX) noexcept;
166
167 //! Replace the current content by a formatted string `fmt`.
168 template<typename... Args>
169 inline Error assignFormat(const char* fmt, Args&&... args) noexcept {
170 return _opFormat(kOpAssign, fmt, std::forward<Args>(args)...);
171 }
172
173 //! Replace the current content by a formatted string `fmt` (va_list version).
174 inline Error assignVFormat(const char* fmt, va_list ap) noexcept {
175 return _opVFormat(kOpAssign, fmt, ap);
176 }
177
178 //! Replace the current content by a single `c` character.
179 inline Error assignChar(char c) noexcept {
180 return _opChar(kOpAssign, c);
181 }
182
183 //! Replace the current content by `c` character `n` times.
184 inline Error assignChars(char c, size_t n) noexcept {
185 return _opChars(kOpAssign, c, n);
186 }
187
188 //! Replace the current content by a formatted integer `i` (signed).
189 inline Error assignInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept {
190 return _opNumber(kOpAssign, uint64_t(i), base, width, flags | kFormatSigned);
191 }
192
193 //! Replace the current content by a formatted integer `i` (unsigned).
194 inline Error assignUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept {
195 return _opNumber(kOpAssign, i, base, width, flags);
196 }
197
198 //! Replace the current content by the given `data` converted to a HEX string.
199 inline Error assignHex(const void* data, size_t size, char separator = '\0') noexcept {
200 return _opHex(kOpAssign, data, size, separator);
201 }
202
203 //! Append string `str` of size `size` (or possibly null terminated).
204 inline Error appendString(const char* str, size_t size = SIZE_MAX) noexcept {
205 return _opString(kOpAppend, str, size);
206 }
207
208 template<typename... Args>
209 inline Error appendFormat(const char* fmt, Args&&... args) noexcept {
210 return _opFormat(kOpAppend, fmt, std::forward<Args>(args)...);
211 }
212
213 //! Append a formatted string `fmt` (va_list version).
214 inline Error appendVFormat(const char* fmt, va_list ap) noexcept {
215 return _opVFormat(kOpAppend, fmt, ap);
216 }
217
218 //! Append a single `c` character.
219 inline Error appendChar(char c) noexcept {
220 return _opChar(kOpAppend, c);
221 }
222
223 //! Append `c` character `n` times.
224 inline Error appendChars(char c, size_t n) noexcept {
225 return _opChars(kOpAppend, c, n);
226 }
227
228 ASMJIT_API Error padEnd(size_t n, char c = ' ') noexcept;
229
230 //! Append `i`.
231 inline Error appendInt(int64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept {
232 return _opNumber(kOpAppend, uint64_t(i), base, width, flags | kFormatSigned);
233 }
234
235 //! Append `i`.
236 inline Error appendUInt(uint64_t i, uint32_t base = 0, size_t width = 0, uint32_t flags = 0) noexcept {
237 return _opNumber(kOpAppend, i, base, width, flags);
238 }
239
240 //! Append the given `data` converted to a HEX string.
241 inline Error appendHex(const void* data, size_t size, char separator = '\0') noexcept {
242 return _opHex(kOpAppend, data, size, separator);
243 }
244
245 //! Truncate the string length into `newSize`.
246 ASMJIT_API Error truncate(size_t newSize) noexcept;
247
248 ASMJIT_API bool eq(const char* other, size_t size = SIZE_MAX) const noexcept;
249 inline bool eq(const String& other) const noexcept { return eq(other.data(), other.size()); }
250
251 //! \}
252
253 //! \name Internal Functions
254 //! \{
255
256 //! Resets string to embedded and makes it empty (zero length, zero first char)
257 //!
258 //! \note This is always called internally after an external buffer was released
259 //! as it zeroes all bytes used by String's embedded storage.
260 inline void _resetInternal() noexcept {
261 for (size_t i = 0; i < ASMJIT_ARRAY_SIZE(_raw.uptr); i++)
262 _raw.uptr[i] = 0;
263 }
264
265 inline void _setSize(size_t newSize) noexcept {
266 if (isLarge())
267 _large.size = newSize;
268 else
269 _small.type = uint8_t(newSize);
270 }
271
272 //! \}
273 };
274
275 // ============================================================================
276 // [asmjit::StringTmp]
277 // ============================================================================
278
279 //! Temporary string builder, has statically allocated `N` bytes.
280 template<size_t N>
281 class StringTmp : public String {
282 public:
283 ASMJIT_NONCOPYABLE(StringTmp<N>)
284
285 //! Embedded data.
286 char _embeddedData[Support::alignUp(N + 1, sizeof(size_t))];
287
288 //! \name Construction & Destruction
289 //! \{
290
291 inline StringTmp() noexcept {
292 _resetToTemporary();
293 }
294
295 inline void _resetToTemporary() noexcept {
296 _large.type = kTypeExternal;
297 _large.capacity = ASMJIT_ARRAY_SIZE(_embeddedData) - 1;
298 _large.data = _embeddedData;
299 _embeddedData[0] = '\0';
300 }
301
302 //! \}
303 };
304
305 // ============================================================================
306 // [asmjit::FixedString]
307 // ============================================================================
308
309 //! A fixed string - only useful for strings that would never exceed `N - 1`
310 //! characters; always null-terminated.
311 template<size_t N>
312 union FixedString {
313 enum : uint32_t {
314 kNumU32 = uint32_t((N + sizeof(uint32_t) - 1) / sizeof(uint32_t))
315 };
316
317 char str[kNumU32 * sizeof(uint32_t)];
318 uint32_t u32[kNumU32];
319
320 //! \name Utilities
321 //! \{
322
323 inline bool eq(const char* other) const noexcept {
324 return strcmp(str, other) == 0;
325 }
326
327 //! \}
328 };
329
330 //! \}
331
332 ASMJIT_END_NAMESPACE
333
334 #endif // _ASMJIT_CORE_STRING_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/support.h"
8
9 ASMJIT_BEGIN_NAMESPACE
10
11 // ============================================================================
12 // [asmjit::Support - Unit]
13 // ============================================================================
14
15 #if defined(ASMJIT_TEST)
16 template<typename T>
17 static void testArrays(const T* a, const T* b, size_t size) noexcept {
18 for (size_t i = 0; i < size; i++)
19 EXPECT(a[i] == b[i], "Mismatch at %u", unsigned(i));
20 }
21
22 static void testAlignment() noexcept {
23 INFO("Support::isAligned()");
24 EXPECT(Support::isAligned<size_t>(0xFFFF, 4) == false);
25 EXPECT(Support::isAligned<size_t>(0xFFF4, 4) == true);
26 EXPECT(Support::isAligned<size_t>(0xFFF8, 8) == true);
27 EXPECT(Support::isAligned<size_t>(0xFFF0, 16) == true);
28
29 INFO("Support::alignUp()");
30 EXPECT(Support::alignUp<size_t>(0xFFFF, 4) == 0x10000);
31 EXPECT(Support::alignUp<size_t>(0xFFF4, 4) == 0x0FFF4);
32 EXPECT(Support::alignUp<size_t>(0xFFF8, 8) == 0x0FFF8);
33 EXPECT(Support::alignUp<size_t>(0xFFF0, 16) == 0x0FFF0);
34 EXPECT(Support::alignUp<size_t>(0xFFF0, 32) == 0x10000);
35
36 INFO("Support::alignUpDiff()");
37 EXPECT(Support::alignUpDiff<size_t>(0xFFFF, 4) == 1);
38 EXPECT(Support::alignUpDiff<size_t>(0xFFF4, 4) == 0);
39 EXPECT(Support::alignUpDiff<size_t>(0xFFF8, 8) == 0);
40 EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 16) == 0);
41 EXPECT(Support::alignUpDiff<size_t>(0xFFF0, 32) == 16);
42
43 INFO("Support::alignUpPowerOf2()");
44 EXPECT(Support::alignUpPowerOf2<size_t>(0x0000) == 0x00000);
45 EXPECT(Support::alignUpPowerOf2<size_t>(0xFFFF) == 0x10000);
46 EXPECT(Support::alignUpPowerOf2<size_t>(0xF123) == 0x10000);
47 EXPECT(Support::alignUpPowerOf2<size_t>(0x0F00) == 0x01000);
48 EXPECT(Support::alignUpPowerOf2<size_t>(0x0100) == 0x00100);
49 EXPECT(Support::alignUpPowerOf2<size_t>(0x1001) == 0x02000);
50 }
51
52 static void testBitUtils() noexcept {
53 uint32_t i;
54
55 INFO("Support::shl() / shr()");
56 EXPECT(Support::shl(int32_t(0x00001111), 16) == int32_t(0x11110000u));
57 EXPECT(Support::shl(uint32_t(0x00001111), 16) == uint32_t(0x11110000u));
58 EXPECT(Support::shr(int32_t(0x11110000u), 16) == int32_t(0x00001111u));
59 EXPECT(Support::shr(uint32_t(0x11110000u), 16) == uint32_t(0x00001111u));
60 EXPECT(Support::sar(int32_t(0xFFFF0000u), 16) == int32_t(0xFFFFFFFFu));
61 EXPECT(Support::sar(uint32_t(0xFFFF0000u), 16) == uint32_t(0xFFFFFFFFu));
62
63 INFO("Support::blsi()");
64 for (i = 0; i < 32; i++) EXPECT(Support::blsi(uint32_t(1) << i) == uint32_t(1) << i);
65 for (i = 0; i < 31; i++) EXPECT(Support::blsi(uint32_t(3) << i) == uint32_t(1) << i);
66 for (i = 0; i < 64; i++) EXPECT(Support::blsi(uint64_t(1) << i) == uint64_t(1) << i);
67 for (i = 0; i < 63; i++) EXPECT(Support::blsi(uint64_t(3) << i) == uint64_t(1) << i);
68
69 INFO("Support::ctz()");
70 for (i = 0; i < 32; i++) EXPECT(Support::ctz(uint32_t(1) << i) == i);
71 for (i = 0; i < 64; i++) EXPECT(Support::ctz(uint64_t(1) << i) == i);
72 for (i = 0; i < 32; i++) EXPECT(Support::constCtz(uint32_t(1) << i) == i);
73 for (i = 0; i < 64; i++) EXPECT(Support::constCtz(uint64_t(1) << i) == i);
74
75 INFO("Support::bitMask()");
76 EXPECT(Support::bitMask(0, 1, 7) == 0x83u);
77 for (i = 0; i < 32; i++)
78 EXPECT(Support::bitMask(i) == (1u << i));
79
80 INFO("Support::bitTest()");
81 for (i = 0; i < 32; i++) {
82 EXPECT(Support::bitTest((1 << i), i) == true, "Support::bitTest(%X, %u) should return true", (1 << i), i);
83 }
84
85 INFO("Support::lsbMask()");
86 for (i = 0; i < 32; i++) {
87 uint32_t expectedBits = 0;
88 for (uint32_t b = 0; b < i; b++)
89 expectedBits |= uint32_t(1) << b;
90 EXPECT(Support::lsbMask<uint32_t>(i) == expectedBits);
91 }
92
93 INFO("Support::popcnt()");
94 for (i = 0; i < 32; i++) EXPECT(Support::popcnt((uint32_t(1) << i)) == 1);
95 for (i = 0; i < 64; i++) EXPECT(Support::popcnt((uint64_t(1) << i)) == 1);
96 EXPECT(Support::popcnt(0x000000F0) == 4);
97 EXPECT(Support::popcnt(0x10101010) == 4);
98 EXPECT(Support::popcnt(0xFF000000) == 8);
99 EXPECT(Support::popcnt(0xFFFFFFF7) == 31);
100 EXPECT(Support::popcnt(0x7FFFFFFF) == 31);
101
102 INFO("Support::isPowerOf2()");
103 for (i = 0; i < 64; i++) {
104 EXPECT(Support::isPowerOf2(uint64_t(1) << i) == true);
105 EXPECT(Support::isPowerOf2((uint64_t(1) << i) ^ 0x001101) == false);
106 }
107 }
108
109 static void testIntUtils() noexcept {
110 INFO("Support::byteswap()");
111 EXPECT(Support::byteswap32(int32_t(0x01020304)) == int32_t(0x04030201));
112 EXPECT(Support::byteswap32(uint32_t(0x01020304)) == uint32_t(0x04030201));
113
114 INFO("Support::bytepack()");
115 union BytePackData {
116 uint8_t bytes[4];
117 uint32_t u32;
118 } bpdata;
119
120 bpdata.u32 = Support::bytepack32_4x8(0x00, 0x11, 0x22, 0x33);
121 EXPECT(bpdata.bytes[0] == 0x00);
122 EXPECT(bpdata.bytes[1] == 0x11);
123 EXPECT(bpdata.bytes[2] == 0x22);
124 EXPECT(bpdata.bytes[3] == 0x33);
125
126 INFO("Support::isBetween()");
127 EXPECT(Support::isBetween<int>(10 , 10, 20) == true);
128 EXPECT(Support::isBetween<int>(11 , 10, 20) == true);
129 EXPECT(Support::isBetween<int>(20 , 10, 20) == true);
130 EXPECT(Support::isBetween<int>(9 , 10, 20) == false);
131 EXPECT(Support::isBetween<int>(21 , 10, 20) == false);
132 EXPECT(Support::isBetween<int>(101, 10, 20) == false);
133
134 INFO("Support::isInt8()");
135 EXPECT(Support::isInt8(-128) == true);
136 EXPECT(Support::isInt8( 127) == true);
137 EXPECT(Support::isInt8(-129) == false);
138 EXPECT(Support::isInt8( 128) == false);
139
140 INFO("Support::isInt16()");
141 EXPECT(Support::isInt16(-32768) == true);
142 EXPECT(Support::isInt16( 32767) == true);
143 EXPECT(Support::isInt16(-32769) == false);
144 EXPECT(Support::isInt16( 32768) == false);
145
146 INFO("Support::isInt32()");
147 EXPECT(Support::isInt32( 2147483647 ) == true);
148 EXPECT(Support::isInt32(-2147483647 - 1) == true);
149 EXPECT(Support::isInt32(uint64_t(2147483648u)) == false);
150 EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu)) == false);
151 EXPECT(Support::isInt32(uint64_t(0xFFFFFFFFu) + 1) == false);
152
153 INFO("Support::isUInt8()");
154 EXPECT(Support::isUInt8(0) == true);
155 EXPECT(Support::isUInt8(255) == true);
156 EXPECT(Support::isUInt8(256) == false);
157 EXPECT(Support::isUInt8(-1) == false);
158
159 INFO("Support::isUInt12()");
160 EXPECT(Support::isUInt12(0) == true);
161 EXPECT(Support::isUInt12(4095) == true);
162 EXPECT(Support::isUInt12(4096) == false);
163 EXPECT(Support::isUInt12(-1) == false);
164
165 INFO("Support::isUInt16()");
166 EXPECT(Support::isUInt16(0) == true);
167 EXPECT(Support::isUInt16(65535) == true);
168 EXPECT(Support::isUInt16(65536) == false);
169 EXPECT(Support::isUInt16(-1) == false);
170
171 INFO("Support::isUInt32()");
172 EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF)) == true);
173 EXPECT(Support::isUInt32(uint64_t(0xFFFFFFFF) + 1) == false);
174 EXPECT(Support::isUInt32(-1) == false);
175 }
176
177 static void testReadWrite() noexcept {
178 INFO("Support::readX() / writeX()");
179
180 uint8_t arr[32] = { 0 };
181
182 Support::writeU16uBE(arr + 1, 0x0102u);
183 Support::writeU16uBE(arr + 3, 0x0304u);
184 EXPECT(Support::readU32uBE(arr + 1) == 0x01020304u);
185 EXPECT(Support::readU32uLE(arr + 1) == 0x04030201u);
186 EXPECT(Support::readU32uBE(arr + 2) == 0x02030400u);
187 EXPECT(Support::readU32uLE(arr + 2) == 0x00040302u);
188
189 Support::writeU32uLE(arr + 5, 0x05060708u);
190 EXPECT(Support::readU64uBE(arr + 1) == 0x0102030408070605u);
191 EXPECT(Support::readU64uLE(arr + 1) == 0x0506070804030201u);
192
193 Support::writeU64uLE(arr + 7, 0x1122334455667788u);
194 EXPECT(Support::readU32uBE(arr + 8) == 0x77665544u);
195 }
196
197 static void testBitVector() noexcept {
198 INFO("Support::bitVectorOp");
199 {
200 uint32_t vec[3] = { 0 };
201 Support::bitVectorFill(vec, 1, 64);
202 EXPECT(vec[0] == 0xFFFFFFFEu);
203 EXPECT(vec[1] == 0xFFFFFFFFu);
204 EXPECT(vec[2] == 0x00000001u);
205
206 Support::bitVectorClear(vec, 1, 1);
207 EXPECT(vec[0] == 0xFFFFFFFCu);
208 EXPECT(vec[1] == 0xFFFFFFFFu);
209 EXPECT(vec[2] == 0x00000001u);
210
211 Support::bitVectorFill(vec, 0, 32);
212 EXPECT(vec[0] == 0xFFFFFFFFu);
213 EXPECT(vec[1] == 0xFFFFFFFFu);
214 EXPECT(vec[2] == 0x00000001u);
215
216 Support::bitVectorClear(vec, 0, 32);
217 EXPECT(vec[0] == 0x00000000u);
218 EXPECT(vec[1] == 0xFFFFFFFFu);
219 EXPECT(vec[2] == 0x00000001u);
220
221 Support::bitVectorFill(vec, 1, 30);
222 EXPECT(vec[0] == 0x7FFFFFFEu);
223 EXPECT(vec[1] == 0xFFFFFFFFu);
224 EXPECT(vec[2] == 0x00000001u);
225
226 Support::bitVectorClear(vec, 1, 95);
227 EXPECT(vec[0] == 0x00000000u);
228 EXPECT(vec[1] == 0x00000000u);
229 EXPECT(vec[2] == 0x00000000u);
230
231 Support::bitVectorFill(vec, 32, 64);
232 EXPECT(vec[0] == 0x00000000u);
233 EXPECT(vec[1] == 0xFFFFFFFFu);
234 EXPECT(vec[2] == 0xFFFFFFFFu);
235
236 Support::bitVectorSetBit(vec, 1, true);
237 EXPECT(vec[0] == 0x00000002u);
238 EXPECT(vec[1] == 0xFFFFFFFFu);
239 EXPECT(vec[2] == 0xFFFFFFFFu);
240
241 Support::bitVectorSetBit(vec, 95, false);
242 EXPECT(vec[0] == 0x00000002u);
243 EXPECT(vec[1] == 0xFFFFFFFFu);
244 EXPECT(vec[2] == 0x7FFFFFFFu);
245
246 Support::bitVectorClear(vec, 33, 32);
247 EXPECT(vec[0] == 0x00000002u);
248 EXPECT(vec[1] == 0x00000001u);
249 EXPECT(vec[2] == 0x7FFFFFFEu);
250 }
251
252 INFO("Support::bitVectorIndexOf");
253 {
254 uint32_t vec1[1] = { 0x80000000 };
255 EXPECT(Support::bitVectorIndexOf(vec1, 0, true) == 31);
256 EXPECT(Support::bitVectorIndexOf(vec1, 1, true) == 31);
257 EXPECT(Support::bitVectorIndexOf(vec1, 31, true) == 31);
258
259 uint32_t vec2[2] = { 0x00000000, 0x80000000 };
260 EXPECT(Support::bitVectorIndexOf(vec2, 0, true) == 63);
261 EXPECT(Support::bitVectorIndexOf(vec2, 1, true) == 63);
262 EXPECT(Support::bitVectorIndexOf(vec2, 31, true) == 63);
263 EXPECT(Support::bitVectorIndexOf(vec2, 32, true) == 63);
264 EXPECT(Support::bitVectorIndexOf(vec2, 33, true) == 63);
265 EXPECT(Support::bitVectorIndexOf(vec2, 63, true) == 63);
266
267 uint32_t vec3[3] = { 0x00000001, 0x00000000, 0x80000000 };
268 EXPECT(Support::bitVectorIndexOf(vec3, 0, true) == 0);
269 EXPECT(Support::bitVectorIndexOf(vec3, 1, true) == 95);
270 EXPECT(Support::bitVectorIndexOf(vec3, 2, true) == 95);
271 EXPECT(Support::bitVectorIndexOf(vec3, 31, true) == 95);
272 EXPECT(Support::bitVectorIndexOf(vec3, 32, true) == 95);
273 EXPECT(Support::bitVectorIndexOf(vec3, 63, true) == 95);
274 EXPECT(Support::bitVectorIndexOf(vec3, 64, true) == 95);
275 EXPECT(Support::bitVectorIndexOf(vec3, 95, true) == 95);
276
277 uint32_t vec4[3] = { ~vec3[0], ~vec3[1], ~vec3[2] };
278 EXPECT(Support::bitVectorIndexOf(vec4, 0, false) == 0);
279 EXPECT(Support::bitVectorIndexOf(vec4, 1, false) == 95);
280 EXPECT(Support::bitVectorIndexOf(vec4, 2, false) == 95);
281 EXPECT(Support::bitVectorIndexOf(vec4, 31, false) == 95);
282 EXPECT(Support::bitVectorIndexOf(vec4, 32, false) == 95);
283 EXPECT(Support::bitVectorIndexOf(vec4, 63, false) == 95);
284 EXPECT(Support::bitVectorIndexOf(vec4, 64, false) == 95);
285 EXPECT(Support::bitVectorIndexOf(vec4, 95, false) == 95);
286 }
287
288 INFO("Support::BitWordIterator<uint32_t>");
289 {
290 Support::BitWordIterator<uint32_t> it(0x80000F01u);
291 EXPECT(it.hasNext());
292 EXPECT(it.next() == 0);
293 EXPECT(it.hasNext());
294 EXPECT(it.next() == 8);
295 EXPECT(it.hasNext());
296 EXPECT(it.next() == 9);
297 EXPECT(it.hasNext());
298 EXPECT(it.next() == 10);
299 EXPECT(it.hasNext());
300 EXPECT(it.next() == 11);
301 EXPECT(it.hasNext());
302 EXPECT(it.next() == 31);
303 EXPECT(!it.hasNext());
304
305 // No bits set.
306 it.init(0x00000000u);
307 ASMJIT_ASSERT(!it.hasNext());
308
309 // Only first bit set.
310 it.init(0x00000001u);
311 EXPECT(it.hasNext());
312 EXPECT(it.next() == 0);
313 ASMJIT_ASSERT(!it.hasNext());
314
315 // Only last bit set (special case).
316 it.init(0x80000000u);
317 ASMJIT_ASSERT(it.hasNext());
318 ASMJIT_ASSERT(it.next() == 31);
319 ASMJIT_ASSERT(!it.hasNext());
320 }
321
322 INFO("Support::BitWordIterator<uint64_t>");
323 {
324 Support::BitWordIterator<uint64_t> it(uint64_t(1) << 63);
325 ASMJIT_ASSERT(it.hasNext());
326 ASMJIT_ASSERT(it.next() == 63);
327 ASMJIT_ASSERT(!it.hasNext());
328 }
329
330 INFO("Support::BitVectorIterator<uint32_t>");
331 {
332 // Border cases.
333 static const uint32_t bitsNone[] = { 0xFFFFFFFFu };
334 Support::BitVectorIterator<uint32_t> it(bitsNone, 0);
335
336 EXPECT(!it.hasNext());
337 it.init(bitsNone, 0, 1);
338 EXPECT(!it.hasNext());
339 it.init(bitsNone, 0, 128);
340 EXPECT(!it.hasNext());
341
342 static const uint32_t bits1[] = { 0x80000008u, 0x80000001u, 0x00000000u, 0x80000000u, 0x00000000u, 0x00000000u, 0x00003000u };
343 it.init(bits1, ASMJIT_ARRAY_SIZE(bits1));
344
345 EXPECT(it.hasNext());
346 EXPECT(it.next() == 3);
347 EXPECT(it.hasNext());
348 EXPECT(it.next() == 31);
349 EXPECT(it.hasNext());
350 EXPECT(it.next() == 32);
351 EXPECT(it.hasNext());
352 EXPECT(it.next() == 63);
353 EXPECT(it.hasNext());
354 EXPECT(it.next() == 127);
355 EXPECT(it.hasNext());
356 EXPECT(it.next() == 204);
357 EXPECT(it.hasNext());
358 EXPECT(it.next() == 205);
359 EXPECT(!it.hasNext());
360
361 it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 4);
362 EXPECT(it.hasNext());
363 EXPECT(it.next() == 31);
364
365 it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 64);
366 EXPECT(it.hasNext());
367 EXPECT(it.next() == 127);
368
369 it.init(bits1, ASMJIT_ARRAY_SIZE(bits1), 127);
370 EXPECT(it.hasNext());
371 EXPECT(it.next() == 127);
372
373 static const uint32_t bits2[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
374 it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
375
376 EXPECT(it.hasNext());
377 EXPECT(it.next() == 31);
378 EXPECT(it.hasNext());
379 EXPECT(it.next() == 63);
380 EXPECT(it.hasNext());
381 EXPECT(it.next() == 127);
382 EXPECT(!it.hasNext());
383
384 static const uint32_t bits3[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u };
385 it.init(bits3, ASMJIT_ARRAY_SIZE(bits3));
386 EXPECT(!it.hasNext());
387
388 static const uint32_t bits4[] = { 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u };
389 it.init(bits4, ASMJIT_ARRAY_SIZE(bits4));
390 EXPECT(it.hasNext());
391 EXPECT(it.next() == 127);
392 EXPECT(!it.hasNext());
393 }
394
395 INFO("Support::BitVectorIterator<uint64_t>");
396 {
397 static const uint64_t bits1[] = { 0x80000000u, 0x80000000u, 0x00000000u, 0x80000000u };
398 Support::BitVectorIterator<uint64_t> it(bits1, ASMJIT_ARRAY_SIZE(bits1));
399
400 EXPECT(it.hasNext());
401 EXPECT(it.next() == 31);
402 EXPECT(it.hasNext());
403 EXPECT(it.next() == 95);
404 EXPECT(it.hasNext());
405 EXPECT(it.next() == 223);
406 EXPECT(!it.hasNext());
407
408 static const uint64_t bits2[] = { 0x8000000000000000u, 0, 0, 0 };
409 it.init(bits2, ASMJIT_ARRAY_SIZE(bits2));
410
411 EXPECT(it.hasNext());
412 EXPECT(it.next() == 63);
413 EXPECT(!it.hasNext());
414 }
415 }
416
417 static void testSorting() noexcept {
418 INFO("Support::qSort() - Testing qsort and isort of predefined arrays");
419 {
420 constexpr size_t kArraySize = 11;
421
422 int ref_[kArraySize] = { -4, -2, -1, 0, 1, 9, 12, 13, 14, 19, 22 };
423 int arr1[kArraySize] = { 0, 1, -1, 19, 22, 14, -4, 9, 12, 13, -2 };
424 int arr2[kArraySize];
425
426 memcpy(arr2, arr1, kArraySize * sizeof(int));
427
428 Support::iSort(arr1, kArraySize);
429 Support::qSort(arr2, kArraySize);
430 testArrays(arr1, ref_, kArraySize);
431 testArrays(arr2, ref_, kArraySize);
432 }
433
434 INFO("Support::qSort() - Testing qsort and isort of artificial arrays");
435 {
436 constexpr size_t kArraySize = 200;
437
438 int arr1[kArraySize];
439 int arr2[kArraySize];
440 int ref_[kArraySize];
441
442 for (size_t size = 2; size < kArraySize; size++) {
443 for (size_t i = 0; i < size; i++) {
444 arr1[i] = int(size - 1 - i);
445 arr2[i] = int(size - 1 - i);
446 ref_[i] = int(i);
447 }
448
449 Support::iSort(arr1, size);
450 Support::qSort(arr2, size);
451 testArrays(arr1, ref_, size);
452 testArrays(arr2, ref_, size);
453 }
454 }
455
456 INFO("Support::qSort() - Testing qsort and isort with an unstable compare function");
457 {
458 constexpr size_t kArraySize = 5;
459
460 float arr1[kArraySize] = { 1.0f, 0.0f, 3.0f, -1.0f, std::numeric_limits<float>::quiet_NaN() };
461 float arr2[kArraySize] = { };
462
463 memcpy(arr2, arr1, kArraySize * sizeof(float));
464
465 // We don't test as it's undefined where the NaN would be.
466 Support::iSort(arr1, kArraySize);
467 Support::qSort(arr2, kArraySize);
468 }
469 }
470
471 UNIT(support) {
472 testAlignment();
473 testBitUtils();
474 testIntUtils();
475 testReadWrite();
476 testBitVector();
477 testSorting();
478 }
479 #endif
480
481 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_SUPPORT_H
7 #define _ASMJIT_CORE_SUPPORT_H
8
9 #include "../core/globals.h"
10
11 #if defined(_MSC_VER)
12 #include <intrin.h>
13 #endif
14
15 ASMJIT_BEGIN_NAMESPACE
16
17 //! \addtogroup asmjit_support
18 //! \{
19
20 //! Contains support classes and functions that may be used by AsmJit source
21 //! and header files. Anything defined here is considered internal and should
22 //! not be used outside of AsmJit and related projects like AsmTK.
23 namespace Support {
24
25 // ============================================================================
26 // [asmjit::Support - Architecture Features & Constraints]
27 // ============================================================================
28
29 //! \cond INTERNAL
30 static constexpr bool kUnalignedAccess16 = ASMJIT_ARCH_X86 != 0;
31 static constexpr bool kUnalignedAccess32 = ASMJIT_ARCH_X86 != 0;
32 static constexpr bool kUnalignedAccess64 = ASMJIT_ARCH_X86 != 0;
33 //! \endcond
34
35 // ============================================================================
36 // [asmjit::Support - Internal]
37 // ============================================================================
38
39 //! \cond INTERNAL
40 namespace Internal {
41 template<typename T, size_t Alignment>
42 struct AlignedInt {};
43
44 template<> struct AlignedInt<uint16_t, 1> { typedef uint16_t ASMJIT_ALIGN_TYPE(T, 1); };
45 template<> struct AlignedInt<uint16_t, 2> { typedef uint16_t T; };
46 template<> struct AlignedInt<uint32_t, 1> { typedef uint32_t ASMJIT_ALIGN_TYPE(T, 1); };
47 template<> struct AlignedInt<uint32_t, 2> { typedef uint32_t ASMJIT_ALIGN_TYPE(T, 2); };
48 template<> struct AlignedInt<uint32_t, 4> { typedef uint32_t T; };
49 template<> struct AlignedInt<uint64_t, 1> { typedef uint64_t ASMJIT_ALIGN_TYPE(T, 1); };
50 template<> struct AlignedInt<uint64_t, 2> { typedef uint64_t ASMJIT_ALIGN_TYPE(T, 2); };
51 template<> struct AlignedInt<uint64_t, 4> { typedef uint64_t ASMJIT_ALIGN_TYPE(T, 4); };
52 template<> struct AlignedInt<uint64_t, 8> { typedef uint64_t T; };
53
54 // IntBySize - Make an int-type by size (signed or unsigned) that is the
55 // same as types defined by <stdint.h>.
56 // Int32Or64 - Make an int-type that has at least 32 bits: [u]int[32|64]_t.
57
58 template<size_t SIZE, int IS_SIGNED>
59 struct IntBySize {}; // Fail if not specialized.
60
61 template<> struct IntBySize<1, 0> { typedef uint8_t Type; };
62 template<> struct IntBySize<1, 1> { typedef int8_t Type; };
63 template<> struct IntBySize<2, 0> { typedef uint16_t Type; };
64 template<> struct IntBySize<2, 1> { typedef int16_t Type; };
65 template<> struct IntBySize<4, 0> { typedef uint32_t Type; };
66 template<> struct IntBySize<4, 1> { typedef int32_t Type; };
67 template<> struct IntBySize<8, 0> { typedef uint64_t Type; };
68 template<> struct IntBySize<8, 1> { typedef int64_t Type; };
69
70 template<typename T, int IS_SIGNED = std::is_signed<T>::value>
71 struct Int32Or64 : public IntBySize<sizeof(T) <= 4 ? size_t(4) : sizeof(T), IS_SIGNED> {};
72 }
73 //! \endcond
74
75 // ============================================================================
76 // [asmjit::Support - FastUInt8]
77 // ============================================================================
78
79 #if ASMJIT_ARCH_X86
80 typedef uint8_t FastUInt8;
81 #else
82 typedef unsigned int FastUInt8;
83 #endif
84
85 // ============================================================================
86 // [asmjit::Support - IntBySize / Int32Or64]
87 // ============================================================================
88
89 //! Casts an integer `x` to either `int32_t` or `int64_t` depending on `T`.
90 template<typename T>
91 static constexpr typename Internal::Int32Or64<T, 1>::Type asInt(T x) noexcept { return (typename Internal::Int32Or64<T, 1>::Type)x; }
92
93 //! Casts an integer `x` to either `uint32_t` or `uint64_t` depending on `T`.
94 template<typename T>
95 static constexpr typename Internal::Int32Or64<T, 0>::Type asUInt(T x) noexcept { return (typename Internal::Int32Or64<T, 0>::Type)x; }
96
97 //! Casts an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t` depending on `T`.
98 template<typename T>
99 static constexpr typename Internal::Int32Or64<T>::Type asNormalized(T x) noexcept { return (typename Internal::Int32Or64<T>::Type)x; }
100
101 // ============================================================================
102 // [asmjit::Support - BitCast]
103 // ============================================================================
104
105 //! \cond
106 namespace Internal {
107 template<typename DstT, typename SrcT>
108 union BitCastUnion {
109 ASMJIT_INLINE BitCastUnion(SrcT src) noexcept : src(src) {}
110 SrcT src;
111 DstT dst;
112 };
113 }
114 //! \endcond
115
116 //! Bit-casts from `Src` type to `Dst` type.
117 //!
118 //! Useful to bit-cast between integers and floating points.
119 template<typename Dst, typename Src>
120 static inline Dst bitCast(const Src& x) noexcept { return Internal::BitCastUnion<Dst, Src>(x).dst; }
121
122 // ============================================================================
123 // [asmjit::Support - BitOps]
124 // ============================================================================
125
126 //! Storage used to store a pack of bits (should by compatible with a machine word).
127 typedef Internal::IntBySize<sizeof(uintptr_t), 0>::Type BitWord;
128
129 template<typename T>
130 static constexpr uint32_t bitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); }
131
132 //! Number of bits stored in a single `BitWord`.
133 static constexpr uint32_t kBitWordSizeInBits = bitSizeOf<BitWord>();
134
135 //! Returns `0 - x` in a safe way (no undefined behavior), works for unsigned numbers as well.
136 template<typename T>
137 static constexpr T neg(const T& x) noexcept {
138 typedef typename std::make_unsigned<T>::type U;
139 return T(U(0) - U(x));
140 }
141
142 template<typename T>
143 static constexpr T allOnes() noexcept { return neg<T>(T(1)); }
144
145 //! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back.
146 template<typename X, typename Y>
147 static constexpr X shl(const X& x, const Y& y) noexcept {
148 typedef typename std::make_unsigned<X>::type U;
149 return X(U(x) << y);
150 }
151
152 //! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back.
153 template<typename X, typename Y>
154 static constexpr X shr(const X& x, const Y& y) noexcept {
155 typedef typename std::make_unsigned<X>::type U;
156 return X(U(x) >> y);
157 }
158
159 //! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back.
160 template<typename X, typename Y>
161 static constexpr X sar(const X& x, const Y& y) noexcept {
162 typedef typename std::make_signed<X>::type S;
163 return X(S(x) >> y);
164 }
165
166 //! Returns `x | (x >> y)` - helper used by some bit manipulation helpers.
167 template<typename X, typename Y>
168 static constexpr X or_shr(const X& x, const Y& y) noexcept { return X(x | shr(x, y)); }
169
170 //! Returns `x & -x` - extracts lowest set isolated bit (like BLSI instruction).
171 template<typename T>
172 static constexpr T blsi(T x) noexcept {
173 typedef typename std::make_unsigned<T>::type U;
174 return T(U(x) & neg(U(x)));
175 }
176
177 //! Generate a trailing bit-mask that has `n` least significant (trailing) bits set.
178 template<typename T, typename CountT>
179 static constexpr T lsbMask(CountT n) noexcept {
180 typedef typename std::make_unsigned<T>::type U;
181 return (sizeof(U) < sizeof(uintptr_t))
182 ? T(U((uintptr_t(1) << n) - uintptr_t(1)))
183 // Shifting more bits than the type provides is UNDEFINED BEHAVIOR.
184 // In such case we trash the result by ORing it with a mask that has
185 // all bits set and discards the UNDEFINED RESULT of the shift.
186 : T(((U(1) << n) - U(1u)) | neg(U(n >= CountT(bitSizeOf<T>()))));
187 }
188
189 //! Tests whether the given value `x` has `n`th bit set.
190 template<typename T, typename IndexT>
191 static constexpr bool bitTest(T x, IndexT n) noexcept {
192 typedef typename std::make_unsigned<T>::type U;
193 return (U(x) & (U(1) << n)) != 0;
194 }
195
196 //! Returns a bit-mask that has `x` bit set.
197 template<typename T>
198 static constexpr uint32_t bitMask(T x) noexcept { return (1u << x); }
199
200 //! Returns a bit-mask that has `x` bit set (multiple arguments).
201 template<typename T, typename... Args>
202 static constexpr uint32_t bitMask(T x, Args... args) noexcept { return bitMask(x) | bitMask(args...); }
203
204 //! Converts a boolean value `b` to zero or full mask (all bits set).
205 template<typename DstT, typename SrcT>
206 static constexpr DstT bitMaskFromBool(SrcT b) noexcept {
207 typedef typename std::make_unsigned<DstT>::type U;
208 return DstT(U(0) - U(b));
209 }
210
211 //! \cond
212 namespace Internal {
213 // Fills all trailing bits right from the first most significant bit set.
214 static constexpr uint8_t fillTrailingBitsImpl(uint8_t x) noexcept { return or_shr(or_shr(or_shr(x, 1), 2), 4); }
215 // Fills all trailing bits right from the first most significant bit set.
216 static constexpr uint16_t fillTrailingBitsImpl(uint16_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8); }
217 // Fills all trailing bits right from the first most significant bit set.
218 static constexpr uint32_t fillTrailingBitsImpl(uint32_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16); }
219 // Fills all trailing bits right from the first most significant bit set.
220 static constexpr uint64_t fillTrailingBitsImpl(uint64_t x) noexcept { return or_shr(or_shr(or_shr(or_shr(or_shr(or_shr(x, 1), 2), 4), 8), 16), 32); }
221 }
222 //! \endcond
223
224 // Fills all trailing bits right from the first most significant bit set.
225 template<typename T>
226 static constexpr T fillTrailingBits(const T& x) noexcept {
227 typedef typename std::make_unsigned<T>::type U;
228 return T(Internal::fillTrailingBitsImpl(U(x)));
229 }
230
231 // ============================================================================
232 // [asmjit::Support - CTZ]
233 // ============================================================================
234
235 //! \cond
236 namespace Internal {
237 static constexpr uint32_t constCtzImpl(uint32_t xAndNegX) noexcept {
238 return 31 - ((xAndNegX & 0x0000FFFFu) ? 16 : 0)
239 - ((xAndNegX & 0x00FF00FFu) ? 8 : 0)
240 - ((xAndNegX & 0x0F0F0F0Fu) ? 4 : 0)
241 - ((xAndNegX & 0x33333333u) ? 2 : 0)
242 - ((xAndNegX & 0x55555555u) ? 1 : 0);
243 }
244
245 static constexpr uint32_t constCtzImpl(uint64_t xAndNegX) noexcept {
246 return 63 - ((xAndNegX & 0x00000000FFFFFFFFu) ? 32 : 0)
247 - ((xAndNegX & 0x0000FFFF0000FFFFu) ? 16 : 0)
248 - ((xAndNegX & 0x00FF00FF00FF00FFu) ? 8 : 0)
249 - ((xAndNegX & 0x0F0F0F0F0F0F0F0Fu) ? 4 : 0)
250 - ((xAndNegX & 0x3333333333333333u) ? 2 : 0)
251 - ((xAndNegX & 0x5555555555555555u) ? 1 : 0);
252 }
253
254 template<typename T>
255 static constexpr uint32_t constCtz(T x) noexcept {
256 return constCtzImpl(x & neg(x));
257 }
258
259 static ASMJIT_INLINE uint32_t ctz(uint32_t x) noexcept {
260 #if defined(__GNUC__)
261 return uint32_t(__builtin_ctz(x));
262 #elif defined(_MSC_VER) && (ASMJIT_ARCH_X86 || ASMJIT_ARCH_ARM)
263 unsigned long i;
264 _BitScanForward(&i, x);
265 return uint32_t(i);
266 #else
267 return constCtz(x);
268 #endif
269 }
270
271 static ASMJIT_INLINE uint32_t ctz(uint64_t x) noexcept {
272 #if defined(__GNUC__)
273 return uint32_t(__builtin_ctzll(x));
274 #elif defined(_MSC_VER) && (ASMJIT_ARCH_X86 == 64 || ASMJIT_ARCH_ARM == 64)
275 unsigned long i;
276 _BitScanForward64(&i, x);
277 return uint32_t(i);
278 #else
279 return constCtz(x);
280 #endif
281 }
282 }
283 //! \endcond
284
285 //! Count trailing zeros in `x` (returns a position of a first bit set in `x`).
286 //!
287 //! \note The input MUST NOT be zero, otherwise the result is undefined.
288 template<typename T>
289 static inline uint32_t ctz(T x) noexcept { return Internal::ctz(asUInt(x)); }
290
291 //! Count trailing zeros in `x` (constant expression).
292 template<typename T>
293 static constexpr uint32_t constCtz(T x) noexcept { return Internal::constCtz(asUInt(x)); }
294
295 // ============================================================================
296 // [asmjit::Support - PopCnt]
297 // ============================================================================
298
299 // Based on the following resource:
300 // http://graphics.stanford.edu/~seander/bithacks.html
301 //
302 // Alternatively, for a very small number of bits in `x`:
303 // uint32_t n = 0;
304 // while (x) {
305 // x &= x - 1;
306 // n++;
307 // }
308 // return n;
309
310 //! \cond
311 namespace Internal {
312 static inline uint32_t constPopcntImpl(uint32_t x) noexcept {
313 x = x - ((x >> 1) & 0x55555555u);
314 x = (x & 0x33333333u) + ((x >> 2) & 0x33333333u);
315 return (((x + (x >> 4)) & 0x0F0F0F0Fu) * 0x01010101u) >> 24;
316 }
317
318 static inline uint32_t constPopcntImpl(uint64_t x) noexcept {
319 if (ASMJIT_ARCH_BITS >= 64) {
320 x = x - ((x >> 1) & 0x5555555555555555u);
321 x = (x & 0x3333333333333333u) + ((x >> 2) & 0x3333333333333333u);
322 return uint32_t((((x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fu) * 0x0101010101010101u) >> 56);
323 }
324 else {
325 return constPopcntImpl(uint32_t(x >> 32)) +
326 constPopcntImpl(uint32_t(x & 0xFFFFFFFFu));
327 }
328 }
329
330 static inline uint32_t popcntImpl(uint32_t x) noexcept {
331 #if defined(__GNUC__)
332 return uint32_t(__builtin_popcount(x));
333 #else
334 return constPopcntImpl(asUInt(x));
335 #endif
336 }
337
338 static inline uint32_t popcntImpl(uint64_t x) noexcept {
339 #if defined(__GNUC__)
340 return uint32_t(__builtin_popcountll(x));
341 #else
342 return constPopcntImpl(asUInt(x));
343 #endif
344 }
345 }
346 //! \endcond
347
348 //! Calculates count of bits in `x`.
349 template<typename T>
350 static inline uint32_t popcnt(T x) noexcept { return Internal::popcntImpl(asUInt(x)); }
351
352 //! Calculates count of bits in `x` (useful in constant expressions).
353 template<typename T>
354 static inline uint32_t constPopcnt(T x) noexcept { return Internal::constPopcntImpl(asUInt(x)); }
355
356 // ============================================================================
357 // [asmjit::Support - Min/Max]
358 // ============================================================================
359
360 // NOTE: These are constexpr `min()` and `max()` implementations that are not
361 // exactly the same as `std::min()` and `std::max()`. The return value is not
362 // a reference to `a` or `b` but it's a new value instead.
363
364 template<typename T>
365 static constexpr T min(const T& a, const T& b) noexcept { return b < a ? b : a; }
366
367 template<typename T, typename... Args>
368 static constexpr T min(const T& a, const T& b, Args&&... args) noexcept { return min(min(a, b), std::forward<Args>(args)...); }
369
370 template<typename T>
371 static constexpr T max(const T& a, const T& b) noexcept { return a < b ? b : a; }
372
373 template<typename T, typename... Args>
374 static constexpr T max(const T& a, const T& b, Args&&... args) noexcept { return max(max(a, b), std::forward<Args>(args)...); }
375
376 // ============================================================================
377 // [asmjit::Support - Overflow Arithmetic]
378 // ============================================================================
379
380 //! \cond
381 namespace Internal {
382 template<typename T>
383 static ASMJIT_INLINE T addOverflowImpl(T x, T y, FastUInt8* of) noexcept {
384 typedef typename std::make_unsigned<T>::type U;
385
386 U result = U(x) + U(y);
387 *of = FastUInt8(*of | FastUInt8(std::is_unsigned<T>::value ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0));
388 return T(result);
389 }
390
391 template<typename T>
392 static ASMJIT_INLINE T subOverflowImpl(T x, T y, FastUInt8* of) noexcept {
393 typedef typename std::make_unsigned<T>::type U;
394
395 U result = U(x) - U(y);
396 *of = FastUInt8(*of | FastUInt8(std::is_unsigned<T>::value ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0));
397 return T(result);
398 }
399 }
400 //! \endcond
401
402 template<typename T>
403 static ASMJIT_INLINE T addOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::addOverflowImpl(x, y, of)); }
404
405 template<typename T>
406 static ASMJIT_INLINE T subOverflow(const T& x, const T& y, FastUInt8* of) noexcept { return T(Internal::subOverflowImpl(x, y, of)); }
407
408 // ============================================================================
409 // [asmjit::Support - Alignment]
410 // ============================================================================
411
412 template<typename X, typename Y>
413 static constexpr bool isAligned(X base, Y alignment) noexcept {
414 typedef typename Internal::IntBySize<sizeof(X), 0>::Type U;
415 return ((U)base % (U)alignment) == 0;
416 }
417
418 //! Tests whether the `x` is a power of two (only one bit is set).
419 template<typename T>
420 static constexpr bool isPowerOf2(T x) noexcept {
421 typedef typename std::make_unsigned<T>::type U;
422 return x && !(U(x) & (U(x) - U(1)));
423 }
424
425 template<typename X, typename Y>
426 static constexpr X alignUp(X x, Y alignment) noexcept {
427 typedef typename Internal::IntBySize<sizeof(X), 0>::Type U;
428 return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) );
429 }
430
431 template<typename T>
432 static constexpr T alignUpPowerOf2(T x) noexcept {
433 typedef typename Internal::IntBySize<sizeof(T), 0>::Type U;
434 return (T)(fillTrailingBits(U(x) - 1u) + 1u);
435 }
436
437 //! Returns either zero or a positive difference between `base` and `base` when
438 //! aligned to `alignment`.
439 template<typename X, typename Y>
440 static constexpr typename Internal::IntBySize<sizeof(X), 0>::Type alignUpDiff(X base, Y alignment) noexcept {
441 typedef typename Internal::IntBySize<sizeof(X), 0>::Type U;
442 return alignUp(U(base), alignment) - U(base);
443 }
444
445 template<typename X, typename Y>
446 static constexpr X alignDown(X x, Y alignment) noexcept {
447 typedef typename Internal::IntBySize<sizeof(X), 0>::Type U;
448 return (X)( (U)x & ~((U)(alignment) - 1u) );
449 }
450
451 // ============================================================================
452 // [asmjit::Support - NumGranularized]
453 // ============================================================================
454
455 //! Calculates the number of elements that would be required if `base` is
456 //! granularized by `granularity`. This function can be used to calculate
457 //! the number of BitWords to represent N bits, for example.
458 template<typename X, typename Y>
459 static constexpr X numGranularized(X base, Y granularity) noexcept {
460 typedef typename Internal::IntBySize<sizeof(X), 0>::Type U;
461 return X((U(base) + U(granularity) - 1) / U(granularity));
462 }
463
464 // ============================================================================
465 // [asmjit::Support - IsBetween]
466 // ============================================================================
467
468 //! Checks whether `x` is greater than or equal to `a` and lesser than or equal to `b`.
469 template<typename T>
470 static constexpr bool isBetween(const T& x, const T& a, const T& b) noexcept {
471 return x >= a && x <= b;
472 }
473
474 // ============================================================================
475 // [asmjit::Support - IsInt / IsUInt]
476 // ============================================================================
477
478 //! Checks whether the given integer `x` can be casted to a 4-bit signed integer.
479 template<typename T>
480 static constexpr bool isInt4(T x) noexcept {
481 typedef typename std::make_signed<T>::type S;
482 typedef typename std::make_unsigned<T>::type U;
483
484 return std::is_signed<T>::value ? isBetween<S>(S(x), -8, 7)
485 : U(x) <= U(7u);
486 }
487
488 //! Checks whether the given integer `x` can be casted to an 8-bit signed integer.
489 template<typename T>
490 static constexpr bool isInt8(T x) noexcept {
491 typedef typename std::make_signed<T>::type S;
492 typedef typename std::make_unsigned<T>::type U;
493
494 return std::is_signed<T>::value ? sizeof(T) <= 1 || isBetween<S>(S(x), -128, 127)
495 : U(x) <= U(127u);
496 }
497
498 //! Checks whether the given integer `x` can be casted to a 16-bit signed integer.
499 template<typename T>
500 static constexpr bool isInt16(T x) noexcept {
501 typedef typename std::make_signed<T>::type S;
502 typedef typename std::make_unsigned<T>::type U;
503
504 return std::is_signed<T>::value ? sizeof(T) <= 2 || isBetween<S>(S(x), -32768, 32767)
505 : sizeof(T) <= 1 || U(x) <= U(32767u);
506 }
507
508 //! Checks whether the given integer `x` can be casted to a 32-bit signed integer.
509 template<typename T>
510 static constexpr bool isInt32(T x) noexcept {
511 typedef typename std::make_signed<T>::type S;
512 typedef typename std::make_unsigned<T>::type U;
513
514 return std::is_signed<T>::value ? sizeof(T) <= 4 || isBetween<S>(S(x), -2147483647 - 1, 2147483647)
515 : sizeof(T) <= 2 || U(x) <= U(2147483647u);
516 }
517
518 //! Checks whether the given integer `x` can be casted to a 4-bit unsigned integer.
519 template<typename T>
520 static constexpr bool isUInt4(T x) noexcept {
521 typedef typename std::make_unsigned<T>::type U;
522
523 return std::is_signed<T>::value ? x >= T(0) && x <= T(15)
524 : U(x) <= U(15u);
525 }
526
527 //! Checks whether the given integer `x` can be casted to an 8-bit unsigned integer.
528 template<typename T>
529 static constexpr bool isUInt8(T x) noexcept {
530 typedef typename std::make_unsigned<T>::type U;
531
532 return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(255)) && x >= T(0)
533 : (sizeof(T) <= 1 || U(x) <= U(255u));
534 }
535
536 //! Checks whether the given integer `x` can be casted to a 12-bit unsigned integer (ARM specific).
537 template<typename T>
538 static constexpr bool isUInt12(T x) noexcept {
539 typedef typename std::make_unsigned<T>::type U;
540
541 return std::is_signed<T>::value ? (sizeof(T) <= 1 || T(x) <= T(4095)) && x >= T(0)
542 : (sizeof(T) <= 1 || U(x) <= U(4095u));
543 }
544
545 //! Checks whether the given integer `x` can be casted to a 16-bit unsigned integer.
546 template<typename T>
547 static constexpr bool isUInt16(T x) noexcept {
548 typedef typename std::make_unsigned<T>::type U;
549
550 return std::is_signed<T>::value ? (sizeof(T) <= 2 || T(x) <= T(65535)) && x >= T(0)
551 : (sizeof(T) <= 2 || U(x) <= U(65535u));
552 }
553
554 //! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
555 template<typename T>
556 static constexpr bool isUInt32(T x) noexcept {
557 typedef typename std::make_unsigned<T>::type U;
558
559 return std::is_signed<T>::value ? (sizeof(T) <= 4 || T(x) <= T(4294967295u)) && x >= T(0)
560 : (sizeof(T) <= 4 || U(x) <= U(4294967295u));
561 }
562
563 //! Checks whether the given integer `x` can be casted to a 32-bit unsigned integer.
564 template<typename T>
565 static constexpr bool isIntOrUInt32(T x) noexcept {
566 return sizeof(T) <= 4 ? true : (uint32_t(uint64_t(x) >> 32) + 1u) <= 1u;
567 }
568
569 // ============================================================================
570 // [asmjit::Support - ByteSwap]
571 // ============================================================================
572
573 static constexpr uint32_t byteswap32(uint32_t x) noexcept {
574 return (x << 24) | (x >> 24) | ((x << 8) & 0x00FF0000u) | ((x >> 8) & 0x0000FF00);
575 }
576
577 // ============================================================================
578 // [asmjit::Support - BytePack / Unpack]
579 // ============================================================================
580
581 //! Pack four 8-bit integer into a 32-bit integer as it is an array of `{b0,b1,b2,b3}`.
582 static constexpr uint32_t bytepack32_4x8(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
583 return ASMJIT_ARCH_LE ? (a | (b << 8) | (c << 16) | (d << 24))
584 : (d | (c << 8) | (b << 16) | (a << 24));
585 }
586
587 template<typename T>
588 static constexpr uint32_t unpackU32At0(T x) noexcept { return ASMJIT_ARCH_LE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
589 template<typename T>
590 static constexpr uint32_t unpackU32At1(T x) noexcept { return ASMJIT_ARCH_BE ? uint32_t(uint64_t(x) & 0xFFFFFFFFu) : uint32_t(uint64_t(x) >> 32); }
591
592 // ============================================================================
593 // [asmjit::Support - Position of byte (in bit-shift)]
594 // ============================================================================
595
596 static inline uint32_t byteShiftOfDWordStruct(uint32_t index) noexcept {
597 return ASMJIT_ARCH_LE ? index * 8 : (uint32_t(sizeof(uint32_t)) - 1u - index) * 8;
598 }
599
600 // ============================================================================
601 // [asmjit::Support - String Utilities]
602 // ============================================================================
603
604 template<typename T>
605 static constexpr T asciiToLower(T c) noexcept { return c ^ (T(c >= T('A') && c <= T('Z')) << 5); }
606
607 template<typename T>
608 static constexpr T asciiToUpper(T c) noexcept { return c ^ (T(c >= T('a') && c <= T('z')) << 5); }
609
610 static ASMJIT_INLINE size_t strLen(const char* s, size_t maxSize) noexcept {
611 size_t i = 0;
612 while (i < maxSize && s[i] != '\0')
613 i++;
614 return i;
615 }
616
617 static constexpr uint32_t hashRound(uint32_t hash, uint32_t c) noexcept { return hash * 65599 + c; }
618
619 // Gets a hash of the given string `data` of size `size`. Size must be valid
620 // as this function doesn't check for a null terminator and allows it in the
621 // middle of the string.
622 static inline uint32_t hashString(const char* data, size_t size) noexcept {
623 uint32_t hashCode = 0;
624 for (uint32_t i = 0; i < size; i++)
625 hashCode = hashRound(hashCode, uint8_t(data[i]));
626 return hashCode;
627 }
628
629 static ASMJIT_INLINE const char* findPackedString(const char* p, uint32_t id) noexcept {
630 uint32_t i = 0;
631 while (i < id) {
632 while (p[0])
633 p++;
634 p++;
635 i++;
636 }
637 return p;
638 }
639
640 //! Compares two instruction names.
641 //!
642 //! `a` is a null terminated instruction name from arch-specific `nameData[]`
643 //! table. `b` is a possibly non-null terminated instruction name passed to
644 //! `InstAPI::stringToInstId()`.
645 static ASMJIT_INLINE int cmpInstName(const char* a, const char* b, size_t size) noexcept {
646 for (size_t i = 0; i < size; i++) {
647 int c = int(uint8_t(a[i])) - int(uint8_t(b[i]));
648 if (c != 0) return c;
649 }
650 return int(uint8_t(a[size]));
651 }
652
653 // ============================================================================
654 // [asmjit::Support - Read / Write]
655 // ============================================================================
656
657 static inline uint32_t readU8(const void* p) noexcept { return uint32_t(static_cast<const uint8_t*>(p)[0]); }
658 static inline int32_t readI8(const void* p) noexcept { return int32_t(static_cast<const int8_t*>(p)[0]); }
659
660 template<uint32_t BO, size_t Alignment>
661 static inline uint32_t readU16x(const void* p) noexcept {
662 if (BO == ByteOrder::kNative && (kUnalignedAccess16 || Alignment >= 2)) {
663 typedef typename Internal::AlignedInt<uint16_t, Alignment>::T U16AlignedToN;
664 return uint32_t(static_cast<const U16AlignedToN*>(p)[0]);
665 }
666 else {
667 uint32_t hi = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 1 : 0));
668 uint32_t lo = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 1));
669 return shl(hi, 8) | lo;
670 }
671 }
672
673 template<uint32_t BO, size_t Alignment>
674 static inline int32_t readI16x(const void* p) noexcept {
675 if (BO == ByteOrder::kNative && (kUnalignedAccess16 || Alignment >= 2)) {
676 typedef typename Internal::AlignedInt<uint16_t, Alignment>::T U16AlignedToN;
677 return int32_t(int16_t(static_cast<const U16AlignedToN*>(p)[0]));
678 }
679 else {
680 int32_t hi = readI8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 1 : 0));
681 uint32_t lo = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 1));
682 return shl(hi, 8) | int32_t(lo);
683 }
684 }
685
686 template<uint32_t BO = ByteOrder::kNative>
687 static inline uint32_t readU24u(const void* p) noexcept {
688 uint32_t b0 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 2 : 0));
689 uint32_t b1 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 1 : 1));
690 uint32_t b2 = readU8(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 2));
691 return shl(b0, 16) | shl(b1, 8) | b2;
692 }
693
694 template<uint32_t BO, size_t Alignment>
695 static inline uint32_t readU32x(const void* p) noexcept {
696 if (kUnalignedAccess32 || Alignment >= 4) {
697 typedef typename Internal::AlignedInt<uint32_t, Alignment>::T U32AlignedToN;
698 uint32_t x = static_cast<const U32AlignedToN*>(p)[0];
699 return BO == ByteOrder::kNative ? x : byteswap32(x);
700 }
701 else {
702 uint32_t hi = readU16x<BO, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 2 : 0));
703 uint32_t lo = readU16x<BO, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 2));
704 return shl(hi, 16) | lo;
705 }
706 }
707
708 template<uint32_t BO, size_t Alignment>
709 static inline uint64_t readU64x(const void* p) noexcept {
710 if (BO == ByteOrder::kNative && (kUnalignedAccess64 || Alignment >= 8)) {
711 typedef typename Internal::AlignedInt<uint64_t, Alignment>::T U64AlignedToN;
712 return static_cast<const U64AlignedToN*>(p)[0];
713 }
714 else {
715 uint32_t hi = readU32x<BO, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 4 : 0));
716 uint32_t lo = readU32x<BO, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (BO == ByteOrder::kLE ? 0 : 4));
717 return shl(uint64_t(hi), 32) | lo;
718 }
719 }
720
721 template<uint32_t BO, size_t Alignment>
722 static inline int32_t readI32x(const void* p) noexcept { return int32_t(readU32x<BO, Alignment>(p)); }
723
724 template<uint32_t BO, size_t Alignment>
725 static inline int64_t readI64x(const void* p) noexcept { return int64_t(readU64x<BO, Alignment>(p)); }
726
727 template<size_t Alignment> static inline int32_t readI16xLE(const void* p) noexcept { return readI16x<ByteOrder::kLE, Alignment>(p); }
728 template<size_t Alignment> static inline int32_t readI16xBE(const void* p) noexcept { return readI16x<ByteOrder::kBE, Alignment>(p); }
729 template<size_t Alignment> static inline uint32_t readU16xLE(const void* p) noexcept { return readU16x<ByteOrder::kLE, Alignment>(p); }
730 template<size_t Alignment> static inline uint32_t readU16xBE(const void* p) noexcept { return readU16x<ByteOrder::kBE, Alignment>(p); }
731 template<size_t Alignment> static inline int32_t readI32xLE(const void* p) noexcept { return readI32x<ByteOrder::kLE, Alignment>(p); }
732 template<size_t Alignment> static inline int32_t readI32xBE(const void* p) noexcept { return readI32x<ByteOrder::kBE, Alignment>(p); }
733 template<size_t Alignment> static inline uint32_t readU32xLE(const void* p) noexcept { return readU32x<ByteOrder::kLE, Alignment>(p); }
734 template<size_t Alignment> static inline uint32_t readU32xBE(const void* p) noexcept { return readU32x<ByteOrder::kBE, Alignment>(p); }
735 template<size_t Alignment> static inline int64_t readI64xLE(const void* p) noexcept { return readI64x<ByteOrder::kLE, Alignment>(p); }
736 template<size_t Alignment> static inline int64_t readI64xBE(const void* p) noexcept { return readI64x<ByteOrder::kBE, Alignment>(p); }
737 template<size_t Alignment> static inline uint64_t readU64xLE(const void* p) noexcept { return readU64x<ByteOrder::kLE, Alignment>(p); }
738 template<size_t Alignment> static inline uint64_t readU64xBE(const void* p) noexcept { return readU64x<ByteOrder::kBE, Alignment>(p); }
739
740 static inline int32_t readI16a(const void* p) noexcept { return readI16x<ByteOrder::kNative, 2>(p); }
741 static inline int32_t readI16u(const void* p) noexcept { return readI16x<ByteOrder::kNative, 1>(p); }
742 static inline uint32_t readU16a(const void* p) noexcept { return readU16x<ByteOrder::kNative, 2>(p); }
743 static inline uint32_t readU16u(const void* p) noexcept { return readU16x<ByteOrder::kNative, 1>(p); }
744
745 static inline int32_t readI16aLE(const void* p) noexcept { return readI16xLE<2>(p); }
746 static inline int32_t readI16uLE(const void* p) noexcept { return readI16xLE<1>(p); }
747 static inline uint32_t readU16aLE(const void* p) noexcept { return readU16xLE<2>(p); }
748 static inline uint32_t readU16uLE(const void* p) noexcept { return readU16xLE<1>(p); }
749
750 static inline int32_t readI16aBE(const void* p) noexcept { return readI16xBE<2>(p); }
751 static inline int32_t readI16uBE(const void* p) noexcept { return readI16xBE<1>(p); }
752 static inline uint32_t readU16aBE(const void* p) noexcept { return readU16xBE<2>(p); }
753 static inline uint32_t readU16uBE(const void* p) noexcept { return readU16xBE<1>(p); }
754
755 static inline uint32_t readU24uLE(const void* p) noexcept { return readU24u<ByteOrder::kLE>(p); }
756 static inline uint32_t readU24uBE(const void* p) noexcept { return readU24u<ByteOrder::kBE>(p); }
757
758 static inline int32_t readI32a(const void* p) noexcept { return readI32x<ByteOrder::kNative, 4>(p); }
759 static inline int32_t readI32u(const void* p) noexcept { return readI32x<ByteOrder::kNative, 1>(p); }
760 static inline uint32_t readU32a(const void* p) noexcept { return readU32x<ByteOrder::kNative, 4>(p); }
761 static inline uint32_t readU32u(const void* p) noexcept { return readU32x<ByteOrder::kNative, 1>(p); }
762
763 static inline int32_t readI32aLE(const void* p) noexcept { return readI32xLE<4>(p); }
764 static inline int32_t readI32uLE(const void* p) noexcept { return readI32xLE<1>(p); }
765 static inline uint32_t readU32aLE(const void* p) noexcept { return readU32xLE<4>(p); }
766 static inline uint32_t readU32uLE(const void* p) noexcept { return readU32xLE<1>(p); }
767
768 static inline int32_t readI32aBE(const void* p) noexcept { return readI32xBE<4>(p); }
769 static inline int32_t readI32uBE(const void* p) noexcept { return readI32xBE<1>(p); }
770 static inline uint32_t readU32aBE(const void* p) noexcept { return readU32xBE<4>(p); }
771 static inline uint32_t readU32uBE(const void* p) noexcept { return readU32xBE<1>(p); }
772
773 static inline int64_t readI64a(const void* p) noexcept { return readI64x<ByteOrder::kNative, 8>(p); }
774 static inline int64_t readI64u(const void* p) noexcept { return readI64x<ByteOrder::kNative, 1>(p); }
775 static inline uint64_t readU64a(const void* p) noexcept { return readU64x<ByteOrder::kNative, 8>(p); }
776 static inline uint64_t readU64u(const void* p) noexcept { return readU64x<ByteOrder::kNative, 1>(p); }
777
778 static inline int64_t readI64aLE(const void* p) noexcept { return readI64xLE<8>(p); }
779 static inline int64_t readI64uLE(const void* p) noexcept { return readI64xLE<1>(p); }
780 static inline uint64_t readU64aLE(const void* p) noexcept { return readU64xLE<8>(p); }
781 static inline uint64_t readU64uLE(const void* p) noexcept { return readU64xLE<1>(p); }
782
783 static inline int64_t readI64aBE(const void* p) noexcept { return readI64xBE<8>(p); }
784 static inline int64_t readI64uBE(const void* p) noexcept { return readI64xBE<1>(p); }
785 static inline uint64_t readU64aBE(const void* p) noexcept { return readU64xBE<8>(p); }
786 static inline uint64_t readU64uBE(const void* p) noexcept { return readU64xBE<1>(p); }
787
788 static inline void writeU8(void* p, uint32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFFu); }
789 static inline void writeI8(void* p, int32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFF); }
790
791 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1>
792 static inline void writeU16x(void* p, uint32_t x) noexcept {
793 if (BO == ByteOrder::kNative && (kUnalignedAccess16 || Alignment >= 2)) {
794 typedef typename Internal::AlignedInt<uint16_t, Alignment>::T U16AlignedToN;
795 static_cast<U16AlignedToN*>(p)[0] = uint16_t(x & 0xFFFFu);
796 }
797 else {
798 static_cast<uint8_t*>(p)[0] = uint8_t((x >> (BO == ByteOrder::kLE ? 0 : 8)) & 0xFFu);
799 static_cast<uint8_t*>(p)[1] = uint8_t((x >> (BO == ByteOrder::kLE ? 8 : 0)) & 0xFFu);
800 }
801 }
802
803 template<uint32_t BO = ByteOrder::kNative>
804 static inline void writeU24u(void* p, uint32_t v) noexcept {
805 static_cast<uint8_t*>(p)[0] = uint8_t((v >> (BO == ByteOrder::kLE ? 0 : 16)) & 0xFFu);
806 static_cast<uint8_t*>(p)[1] = uint8_t((v >> (BO == ByteOrder::kLE ? 8 : 8)) & 0xFFu);
807 static_cast<uint8_t*>(p)[2] = uint8_t((v >> (BO == ByteOrder::kLE ? 16 : 0)) & 0xFFu);
808 }
809
810 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1>
811 static inline void writeU32x(void* p, uint32_t x) noexcept {
812 if (kUnalignedAccess32 || Alignment >= 4) {
813 typedef typename Internal::AlignedInt<uint32_t, Alignment>::T U32AlignedToN;
814 static_cast<U32AlignedToN*>(p)[0] = (BO == ByteOrder::kNative) ? x : Support::byteswap32(x);
815 }
816 else {
817 writeU16x<BO, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 0, x >> (BO == ByteOrder::kLE ? 0 : 16));
818 writeU16x<BO, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 2, x >> (BO == ByteOrder::kLE ? 16 : 0));
819 }
820 }
821
822 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1>
823 static inline void writeU64x(void* p, uint64_t x) noexcept {
824 if (BO == ByteOrder::kNative && (kUnalignedAccess64 || Alignment >= 8)) {
825 typedef typename Internal::AlignedInt<uint64_t, Alignment>::T U64AlignedToN;
826 static_cast<U64AlignedToN*>(p)[0] = x;
827 }
828 else {
829 writeU32x<BO, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 0, uint32_t((x >> (BO == ByteOrder::kLE ? 0 : 32)) & 0xFFFFFFFFu));
830 writeU32x<BO, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 4, uint32_t((x >> (BO == ByteOrder::kLE ? 32 : 0)) & 0xFFFFFFFFu));
831 }
832 }
833
834 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1> static inline void writeI16x(void* p, int32_t x) noexcept { writeU16x<BO, Alignment>(p, uint32_t(x)); }
835 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1> static inline void writeI32x(void* p, int32_t x) noexcept { writeU32x<BO, Alignment>(p, uint32_t(x)); }
836 template<uint32_t BO = ByteOrder::kNative, size_t Alignment = 1> static inline void writeI64x(void* p, int64_t x) noexcept { writeU64x<BO, Alignment>(p, uint64_t(x)); }
837
838 template<size_t Alignment = 1> static inline void writeI16xLE(void* p, int32_t x) noexcept { writeI16x<ByteOrder::kLE, Alignment>(p, x); }
839 template<size_t Alignment = 1> static inline void writeI16xBE(void* p, int32_t x) noexcept { writeI16x<ByteOrder::kBE, Alignment>(p, x); }
840 template<size_t Alignment = 1> static inline void writeU16xLE(void* p, uint32_t x) noexcept { writeU16x<ByteOrder::kLE, Alignment>(p, x); }
841 template<size_t Alignment = 1> static inline void writeU16xBE(void* p, uint32_t x) noexcept { writeU16x<ByteOrder::kBE, Alignment>(p, x); }
842
843 template<size_t Alignment = 1> static inline void writeI32xLE(void* p, int32_t x) noexcept { writeI32x<ByteOrder::kLE, Alignment>(p, x); }
844 template<size_t Alignment = 1> static inline void writeI32xBE(void* p, int32_t x) noexcept { writeI32x<ByteOrder::kBE, Alignment>(p, x); }
845 template<size_t Alignment = 1> static inline void writeU32xLE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kLE, Alignment>(p, x); }
846 template<size_t Alignment = 1> static inline void writeU32xBE(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kBE, Alignment>(p, x); }
847
848 template<size_t Alignment = 1> static inline void writeI64xLE(void* p, int64_t x) noexcept { writeI64x<ByteOrder::kLE, Alignment>(p, x); }
849 template<size_t Alignment = 1> static inline void writeI64xBE(void* p, int64_t x) noexcept { writeI64x<ByteOrder::kBE, Alignment>(p, x); }
850 template<size_t Alignment = 1> static inline void writeU64xLE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kLE, Alignment>(p, x); }
851 template<size_t Alignment = 1> static inline void writeU64xBE(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kBE, Alignment>(p, x); }
852
853 static inline void writeI16a(void* p, int32_t x) noexcept { writeI16x<ByteOrder::kNative, 2>(p, x); }
854 static inline void writeI16u(void* p, int32_t x) noexcept { writeI16x<ByteOrder::kNative, 1>(p, x); }
855 static inline void writeU16a(void* p, uint32_t x) noexcept { writeU16x<ByteOrder::kNative, 2>(p, x); }
856 static inline void writeU16u(void* p, uint32_t x) noexcept { writeU16x<ByteOrder::kNative, 1>(p, x); }
857
858 static inline void writeI16aLE(void* p, int32_t x) noexcept { writeI16xLE<2>(p, x); }
859 static inline void writeI16uLE(void* p, int32_t x) noexcept { writeI16xLE<1>(p, x); }
860 static inline void writeU16aLE(void* p, uint32_t x) noexcept { writeU16xLE<2>(p, x); }
861 static inline void writeU16uLE(void* p, uint32_t x) noexcept { writeU16xLE<1>(p, x); }
862
863 static inline void writeI16aBE(void* p, int32_t x) noexcept { writeI16xBE<2>(p, x); }
864 static inline void writeI16uBE(void* p, int32_t x) noexcept { writeI16xBE<1>(p, x); }
865 static inline void writeU16aBE(void* p, uint32_t x) noexcept { writeU16xBE<2>(p, x); }
866 static inline void writeU16uBE(void* p, uint32_t x) noexcept { writeU16xBE<1>(p, x); }
867
868 static inline void writeU24uLE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kLE>(p, v); }
869 static inline void writeU24uBE(void* p, uint32_t v) noexcept { writeU24u<ByteOrder::kBE>(p, v); }
870
871 static inline void writeI32a(void* p, int32_t x) noexcept { writeI32x<ByteOrder::kNative, 4>(p, x); }
872 static inline void writeI32u(void* p, int32_t x) noexcept { writeI32x<ByteOrder::kNative, 1>(p, x); }
873 static inline void writeU32a(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, 4>(p, x); }
874 static inline void writeU32u(void* p, uint32_t x) noexcept { writeU32x<ByteOrder::kNative, 1>(p, x); }
875
876 static inline void writeI32aLE(void* p, int32_t x) noexcept { writeI32xLE<4>(p, x); }
877 static inline void writeI32uLE(void* p, int32_t x) noexcept { writeI32xLE<1>(p, x); }
878 static inline void writeU32aLE(void* p, uint32_t x) noexcept { writeU32xLE<4>(p, x); }
879 static inline void writeU32uLE(void* p, uint32_t x) noexcept { writeU32xLE<1>(p, x); }
880
881 static inline void writeI32aBE(void* p, int32_t x) noexcept { writeI32xBE<4>(p, x); }
882 static inline void writeI32uBE(void* p, int32_t x) noexcept { writeI32xBE<1>(p, x); }
883 static inline void writeU32aBE(void* p, uint32_t x) noexcept { writeU32xBE<4>(p, x); }
884 static inline void writeU32uBE(void* p, uint32_t x) noexcept { writeU32xBE<1>(p, x); }
885
886 static inline void writeI64a(void* p, int64_t x) noexcept { writeI64x<ByteOrder::kNative, 8>(p, x); }
887 static inline void writeI64u(void* p, int64_t x) noexcept { writeI64x<ByteOrder::kNative, 1>(p, x); }
888 static inline void writeU64a(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, 8>(p, x); }
889 static inline void writeU64u(void* p, uint64_t x) noexcept { writeU64x<ByteOrder::kNative, 1>(p, x); }
890
891 static inline void writeI64aLE(void* p, int64_t x) noexcept { writeI64xLE<8>(p, x); }
892 static inline void writeI64uLE(void* p, int64_t x) noexcept { writeI64xLE<1>(p, x); }
893 static inline void writeU64aLE(void* p, uint64_t x) noexcept { writeU64xLE<8>(p, x); }
894 static inline void writeU64uLE(void* p, uint64_t x) noexcept { writeU64xLE<1>(p, x); }
895
896 static inline void writeI64aBE(void* p, int64_t x) noexcept { writeI64xBE<8>(p, x); }
897 static inline void writeI64uBE(void* p, int64_t x) noexcept { writeI64xBE<1>(p, x); }
898 static inline void writeU64aBE(void* p, uint64_t x) noexcept { writeU64xBE<8>(p, x); }
899 static inline void writeU64uBE(void* p, uint64_t x) noexcept { writeU64xBE<1>(p, x); }
900
901 // ============================================================================
902 // [asmjit::Support - Operators]
903 // ============================================================================
904
905 struct Set { template<typename T> static inline T op(T x, T y) noexcept { ASMJIT_UNUSED(x); return y; } };
906 struct SetNot { template<typename T> static inline T op(T x, T y) noexcept { ASMJIT_UNUSED(x); return ~y; } };
907 struct And { template<typename T> static inline T op(T x, T y) noexcept { return x & y; } };
908 struct AndNot { template<typename T> static inline T op(T x, T y) noexcept { return x & ~y; } };
909 struct NotAnd { template<typename T> static inline T op(T x, T y) noexcept { return ~x & y; } };
910 struct Or { template<typename T> static inline T op(T x, T y) noexcept { return x | y; } };
911 struct Xor { template<typename T> static inline T op(T x, T y) noexcept { return x ^ y; } };
912 struct Add { template<typename T> static inline T op(T x, T y) noexcept { return x + y; } };
913 struct Sub { template<typename T> static inline T op(T x, T y) noexcept { return x - y; } };
914 struct Min { template<typename T> static inline T op(T x, T y) noexcept { return min<T>(x, y); } };
915 struct Max { template<typename T> static inline T op(T x, T y) noexcept { return max<T>(x, y); } };
916
917 // ============================================================================
918 // [asmjit::Support - BitWordIterator]
919 // ============================================================================
920
921 //! Iterates over each bit in a number which is set to 1.
922 //!
923 //! Example of use:
924 //!
925 //! ```
926 //! uint32_t bitsToIterate = 0x110F;
927 //! Support::BitWordIterator<uint32_t> it(bitsToIterate);
928 //!
929 //! while (it.hasNext()) {
930 //! uint32_t bitIndex = it.next();
931 //! std::printf("Bit at %u is set\n", unsigned(bitIndex));
932 //! }
933 //! ```
934 template<typename T>
935 class BitWordIterator {
936 public:
937 inline explicit BitWordIterator(T bitWord) noexcept
938 : _bitWord(bitWord) {}
939
940 inline void init(T bitWord) noexcept { _bitWord = bitWord; }
941 inline bool hasNext() const noexcept { return _bitWord != 0; }
942
943 inline uint32_t next() noexcept {
944 ASMJIT_ASSERT(_bitWord != 0);
945 uint32_t index = ctz(_bitWord);
946 _bitWord ^= T(1u) << index;
947 return index;
948 }
949
950 T _bitWord;
951 };
952
953 // ============================================================================
954 // [asmjit::Support - BitVectorOps]
955 // ============================================================================
956
957 //! \cond
958 namespace Internal {
959 template<typename T, class OperatorT, class FullWordOpT>
960 static inline void bitVectorOp(T* buf, size_t index, size_t count) noexcept {
961 if (count == 0)
962 return;
963
964 const size_t kTSizeInBits = bitSizeOf<T>();
965 size_t vecIndex = index / kTSizeInBits; // T[]
966 size_t bitIndex = index % kTSizeInBits; // T[][]
967
968 buf += vecIndex;
969
970 // The first BitWord requires special handling to preserve bits outside the fill region.
971 const T kFillMask = allOnes<T>();
972 size_t firstNBits = min<size_t>(kTSizeInBits - bitIndex, count);
973
974 buf[0] = OperatorT::op(buf[0], (kFillMask >> (kTSizeInBits - firstNBits)) << bitIndex);
975 buf++;
976 count -= firstNBits;
977
978 // All bits between the first and last affected BitWords can be just filled.
979 while (count >= kTSizeInBits) {
980 buf[0] = FullWordOpT::op(buf[0], kFillMask);
981 buf++;
982 count -= kTSizeInBits;
983 }
984
985 // The last BitWord requires special handling as well
986 if (count)
987 buf[0] = OperatorT::op(buf[0], kFillMask >> (kTSizeInBits - count));
988 }
989 }
990 //! \endcond
991
992 //! Sets bit in a bit-vector `buf` at `index`.
993 template<typename T>
994 static inline bool bitVectorGetBit(T* buf, size_t index) noexcept {
995 const size_t kTSizeInBits = bitSizeOf<T>();
996
997 size_t vecIndex = index / kTSizeInBits;
998 size_t bitIndex = index % kTSizeInBits;
999
1000 return bool((buf[vecIndex] >> bitIndex) & 0x1u);
1001 }
1002
1003 //! Sets bit in a bit-vector `buf` at `index` to `value`.
1004 template<typename T>
1005 static inline void bitVectorSetBit(T* buf, size_t index, bool value) noexcept {
1006 const size_t kTSizeInBits = bitSizeOf<T>();
1007
1008 size_t vecIndex = index / kTSizeInBits;
1009 size_t bitIndex = index % kTSizeInBits;
1010
1011 T bitMask = T(1u) << bitIndex;
1012 if (value)
1013 buf[vecIndex] |= bitMask;
1014 else
1015 buf[vecIndex] &= ~bitMask;
1016 }
1017
1018 //! Sets bit in a bit-vector `buf` at `index` to `value`.
1019 template<typename T>
1020 static inline void bitVectorFlipBit(T* buf, size_t index) noexcept {
1021 const size_t kTSizeInBits = bitSizeOf<T>();
1022
1023 size_t vecIndex = index / kTSizeInBits;
1024 size_t bitIndex = index % kTSizeInBits;
1025
1026 T bitMask = T(1u) << bitIndex;
1027 buf[vecIndex] ^= bitMask;
1028 }
1029
1030 //! Fills `count` bits in bit-vector `buf` starting at bit-index `index`.
1031 template<typename T>
1032 static inline void bitVectorFill(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, Or, Set>(buf, index, count); }
1033
1034 //! Clears `count` bits in bit-vector `buf` starting at bit-index `index`.
1035 template<typename T>
1036 static inline void bitVectorClear(T* buf, size_t index, size_t count) noexcept { Internal::bitVectorOp<T, AndNot, SetNot>(buf, index, count); }
1037
1038 template<typename T>
1039 static inline size_t bitVectorIndexOf(T* buf, size_t start, bool value) noexcept {
1040 const size_t kTSizeInBits = bitSizeOf<T>();
1041 size_t vecIndex = start / kTSizeInBits; // T[]
1042 size_t bitIndex = start % kTSizeInBits; // T[][]
1043
1044 T* p = buf + vecIndex;
1045
1046 // We always look for zeros, if value is `true` we have to flip all bits before the search.
1047 const T kFillMask = allOnes<T>();
1048 const T kFlipMask = value ? T(0) : kFillMask;
1049
1050 // The first BitWord requires special handling as there are some bits we want to ignore.
1051 T bits = (*p ^ kFlipMask) & (kFillMask << bitIndex);
1052 for (;;) {
1053 if (bits)
1054 return (size_t)(p - buf) * kTSizeInBits + ctz(bits);
1055 bits = *++p ^ kFlipMask;
1056 }
1057 }
1058
1059 // ============================================================================
1060 // [asmjit::Support - BitVectorIterator]
1061 // ============================================================================
1062
1063 template<typename T>
1064 class BitVectorIterator {
1065 public:
1066 ASMJIT_INLINE BitVectorIterator(const T* data, size_t numBitWords, size_t start = 0) noexcept {
1067 init(data, numBitWords, start);
1068 }
1069
1070 ASMJIT_INLINE void init(const T* data, size_t numBitWords, size_t start = 0) noexcept {
1071 const T* ptr = data + (start / bitSizeOf<T>());
1072 size_t idx = alignDown(start, bitSizeOf<T>());
1073 size_t end = numBitWords * bitSizeOf<T>();
1074
1075 T bitWord = T(0);
1076 if (idx < end) {
1077 bitWord = *ptr++ & (allOnes<T>() << (start % bitSizeOf<T>()));
1078 while (!bitWord && (idx += bitSizeOf<T>()) < end)
1079 bitWord = *ptr++;
1080 }
1081
1082 _ptr = ptr;
1083 _idx = idx;
1084 _end = end;
1085 _current = bitWord;
1086 }
1087
1088 ASMJIT_INLINE bool hasNext() const noexcept {
1089 return _current != T(0);
1090 }
1091
1092 ASMJIT_INLINE size_t next() noexcept {
1093 T bitWord = _current;
1094 ASMJIT_ASSERT(bitWord != T(0));
1095
1096 uint32_t bit = ctz(bitWord);
1097 bitWord ^= T(1u) << bit;
1098
1099 size_t n = _idx + bit;
1100 while (!bitWord && (_idx += bitSizeOf<T>()) < _end)
1101 bitWord = *_ptr++;
1102
1103 _current = bitWord;
1104 return n;
1105 }
1106
1107 ASMJIT_INLINE size_t peekNext() const noexcept {
1108 ASMJIT_ASSERT(_current != T(0));
1109 return _idx + ctz(_current);
1110 }
1111
1112 const T* _ptr;
1113 size_t _idx;
1114 size_t _end;
1115 T _current;
1116 };
1117
1118 // ============================================================================
1119 // [asmjit::Support - BitVectorOpIterator]
1120 // ============================================================================
1121
1122 template<typename T, class OperatorT>
1123 class BitVectorOpIterator {
1124 public:
1125 static constexpr uint32_t kTSizeInBits = bitSizeOf<T>();
1126
1127 ASMJIT_INLINE BitVectorOpIterator(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
1128 init(aData, bData, numBitWords, start);
1129 }
1130
1131 ASMJIT_INLINE void init(const T* aData, const T* bData, size_t numBitWords, size_t start = 0) noexcept {
1132 const T* aPtr = aData + (start / bitSizeOf<T>());
1133 const T* bPtr = bData + (start / bitSizeOf<T>());
1134 size_t idx = alignDown(start, bitSizeOf<T>());
1135 size_t end = numBitWords * bitSizeOf<T>();
1136
1137 T bitWord = T(0);
1138 if (idx < end) {
1139 bitWord = OperatorT::op(*aPtr++, *bPtr++) & (allOnes<T>() << (start % bitSizeOf<T>()));
1140 while (!bitWord && (idx += kTSizeInBits) < end)
1141 bitWord = OperatorT::op(*aPtr++, *bPtr++);
1142 }
1143
1144 _aPtr = aPtr;
1145 _bPtr = bPtr;
1146 _idx = idx;
1147 _end = end;
1148 _current = bitWord;
1149 }
1150
1151 ASMJIT_INLINE bool hasNext() noexcept {
1152 return _current != T(0);
1153 }
1154
1155 ASMJIT_INLINE size_t next() noexcept {
1156 T bitWord = _current;
1157 ASMJIT_ASSERT(bitWord != T(0));
1158
1159 uint32_t bit = ctz(bitWord);
1160 bitWord ^= T(1u) << bit;
1161
1162 size_t n = _idx + bit;
1163 while (!bitWord && (_idx += kTSizeInBits) < _end)
1164 bitWord = OperatorT::op(*_aPtr++, *_bPtr++);
1165
1166 _current = bitWord;
1167 return n;
1168 }
1169
1170 const T* _aPtr;
1171 const T* _bPtr;
1172 size_t _idx;
1173 size_t _end;
1174 T _current;
1175 };
1176
1177 // ============================================================================
1178 // [asmjit::Support - Sorting]
1179 // ============================================================================
1180
1181 //! Sort order.
1182 enum SortOrder : uint32_t {
1183 kSortAscending = 0, //!< Ascending.
1184 kSortDescending = 1 //!< Descending.
1185 };
1186
1187 //! A helper class that provides comparison of any user-defined type that
1188 //! implements `<` and `>` operators (primitive types are supported as well).
1189 template<uint32_t Order = kSortAscending>
1190 struct Compare {
1191 template<typename A, typename B>
1192 inline int operator()(const A& a, const B& b) const noexcept {
1193 return Order == kSortAscending ? int(a > b) - int(a < b)
1194 : int(a < b) - int(a > b);
1195 }
1196 };
1197
1198 //! Insertion sort.
1199 template<typename T, typename CompareT = Compare<kSortAscending>>
1200 static inline void iSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
1201 for (T* pm = base + 1; pm < base + size; pm++)
1202 for (T* pl = pm; pl > base && cmp(pl[-1], pl[0]) > 0; pl--)
1203 std::swap(pl[-1], pl[0]);
1204 }
1205
1206 //! \cond
1207 namespace Internal {
1208 //! Quick-sort implementation.
1209 template<typename T, class CompareT>
1210 struct QSortImpl {
1211 static constexpr size_t kStackSize = 64 * 2;
1212 static constexpr size_t kISortThreshold = 7;
1213
1214 // Based on "PDCLib - Public Domain C Library" and rewritten to C++.
1215 static void sort(T* base, size_t size, const CompareT& cmp) noexcept {
1216 T* end = base + size;
1217 T* stack[kStackSize];
1218 T** stackptr = stack;
1219
1220 for (;;) {
1221 if ((size_t)(end - base) > kISortThreshold) {
1222 // We work from second to last - first will be pivot element.
1223 T* pi = base + 1;
1224 T* pj = end - 1;
1225 std::swap(base[(size_t)(end - base) / 2], base[0]);
1226
1227 if (cmp(*pi , *pj ) > 0) std::swap(*pi , *pj );
1228 if (cmp(*base, *pj ) > 0) std::swap(*base, *pj );
1229 if (cmp(*pi , *base) > 0) std::swap(*pi , *base);
1230
1231 // Now we have the median for pivot element, entering main loop.
1232 for (;;) {
1233 while (pi < pj && cmp(*++pi, *base) < 0) continue; // Move `i` right until `*i >= pivot`.
1234 while (pj > base && cmp(*--pj, *base) > 0) continue; // Move `j` left until `*j <= pivot`.
1235
1236 if (pi > pj) break;
1237 std::swap(*pi, *pj);
1238 }
1239
1240 // Move pivot into correct place.
1241 std::swap(*base, *pj);
1242
1243 // Larger subfile base / end to stack, sort smaller.
1244 if (pj - base > end - pi) {
1245 // Left is larger.
1246 *stackptr++ = base;
1247 *stackptr++ = pj;
1248 base = pi;
1249 }
1250 else {
1251 // Right is larger.
1252 *stackptr++ = pi;
1253 *stackptr++ = end;
1254 end = pj;
1255 }
1256 ASMJIT_ASSERT(stackptr <= stack + kStackSize);
1257 }
1258 else {
1259 iSort(base, (size_t)(end - base), cmp);
1260 if (stackptr == stack)
1261 break;
1262 end = *--stackptr;
1263 base = *--stackptr;
1264 }
1265 }
1266 }
1267 };
1268 }
1269 //! \endcond
1270
1271
1272 //! Quick sort implementation.
1273 //!
1274 //! The main reason to provide a custom qsort implementation is that we needed
1275 //! something that will never throw `bad_alloc` exception. This implementation
1276 //! doesn't use dynamic memory allocation.
1277 template<typename T, class CompareT = Compare<kSortAscending>>
1278 static inline void qSort(T* base, size_t size, const CompareT& cmp = CompareT()) noexcept {
1279 Internal::QSortImpl<T, CompareT>::sort(base, size, cmp);
1280 }
1281
1282 // ============================================================================
1283 // [asmjit::Support - Iterators]
1284 // ============================================================================
1285
1286 template<typename T>
1287 class Iterator {
1288 public:
1289 constexpr Iterator(T* p) noexcept : _p(p) {}
1290 constexpr Iterator(const Iterator& other) noexcept = default;
1291
1292 inline Iterator& operator=(const Iterator& other) noexcept = default;
1293
1294 inline Iterator operator+(size_t n) const noexcept { return Iterator(_p + n); }
1295 inline Iterator operator-(size_t n) const noexcept { return Iterator(_p - n); }
1296
1297 inline Iterator& operator+=(size_t n) noexcept { _p += n; return *this; }
1298 inline Iterator& operator-=(size_t n) noexcept { _p -= n; return *this; }
1299
1300 inline Iterator& operator++() noexcept { return operator+=(1); }
1301 inline Iterator& operator--() noexcept { return operator-=(1); }
1302
1303 inline Iterator operator++(int) noexcept { T* prev = _p; operator+=(1); return Iterator(prev); }
1304 inline Iterator operator--(int) noexcept { T* prev = _p; operator-=(1); return Iterator(prev); }
1305
1306 inline bool operator==(const Iterator& other) noexcept { return _p == other._p; }
1307 inline bool operator!=(const Iterator& other) noexcept { return _p != other._p; }
1308
1309 inline T& operator*() const noexcept { return _p[0]; }
1310
1311 T* _p;
1312 };
1313
1314 template<typename T>
1315 class ReverseIterator {
1316 public:
1317 constexpr ReverseIterator(T* p) noexcept : _p(p) {}
1318 constexpr ReverseIterator(const ReverseIterator& other) noexcept = default;
1319
1320 inline ReverseIterator& operator=(const ReverseIterator& other) noexcept = default;
1321
1322 inline ReverseIterator operator+(size_t n) const noexcept { return ReverseIterator(_p + n); }
1323 inline ReverseIterator operator-(size_t n) const noexcept { return ReverseIterator(_p - n); }
1324
1325 inline ReverseIterator& operator+=(size_t n) noexcept { _p -= n; return *this; }
1326 inline ReverseIterator& operator-=(size_t n) noexcept { _p += n; return *this; }
1327
1328 inline ReverseIterator& operator++() noexcept { return operator+=(1); }
1329 inline ReverseIterator& operator--() noexcept { return operator-=(1); }
1330
1331 inline ReverseIterator operator++(int) noexcept { T* prev = _p; operator+=(1); return ReverseIterator(prev); }
1332 inline ReverseIterator operator--(int) noexcept { T* prev = _p; operator-=(1); return ReverseIterator(prev); }
1333
1334 inline bool operator==(const ReverseIterator& other) noexcept { return _p == other._p; }
1335 inline bool operator!=(const ReverseIterator& other) noexcept { return _p != other._p; }
1336
1337 inline T& operator*() const noexcept { return _p[-1]; }
1338
1339 T* _p;
1340 };
1341
1342 // ============================================================================
1343 // [asmjit::Support::Temporary]
1344 // ============================================================================
1345
1346 //! Used to pass a temporary buffer to:
1347 //!
1348 //! - Containers that use user-passed buffer as an initial storage (still can grow).
1349 //! - Zone allocator that would use the temporary buffer as a first block.
1350 struct Temporary {
1351 void* _data;
1352 size_t _size;
1353
1354 //! \name Construction & Destruction
1355 //! \{
1356
1357 constexpr Temporary(const Temporary& other) noexcept = default;
1358 constexpr Temporary(void* data, size_t size) noexcept
1359 : _data(data),
1360 _size(size) {}
1361
1362 //! \}
1363
1364 //! \name Overloaded Operators
1365 //! \{
1366
1367 inline Temporary& operator=(const Temporary& other) noexcept = default;
1368
1369 //! \}
1370
1371 //! \name Accessors
1372 //! \{
1373
1374 //! Returns the data storage.
1375 template<typename T = void>
1376 constexpr T* data() const noexcept { return static_cast<T*>(_data); }
1377 //! Returns the data storage size in bytes.
1378 constexpr size_t size() const noexcept { return _size; }
1379
1380 //! \}
1381 };
1382
1383 } // {Support}
1384
1385 //! \}
1386
1387 ASMJIT_END_NAMESPACE
1388
1389 #endif // _ASMJIT_CORE_SUPPORT_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/target.h"
8
9 ASMJIT_BEGIN_NAMESPACE
10
11 // ============================================================================
12 // [asmjit::Target - Construction / Destruction]
13 // ============================================================================
14
15 Target::Target() noexcept
16 : _targetType(kTargetNone),
17 _codeInfo() {}
18 Target::~Target() noexcept {}
19
20 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_TARGET_H
7 #define _ASMJIT_CORE_TARGET_H
8
9 #include "../core/arch.h"
10 #include "../core/func.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_core
15 //! \{
16
17 // ============================================================================
18 // [asmjit::CodeInfo]
19 // ============================================================================
20
21 //! Basic information about a code (or target). It describes its architecture,
22 //! code generation mode (or optimization level), and base address.
23 class CodeInfo {
24 public:
25 //!< Architecture information.
26 ArchInfo _archInfo;
27 //! Natural stack alignment (ARCH+OS).
28 uint8_t _stackAlignment;
29 //! Default CDECL calling convention.
30 uint8_t _cdeclCallConv;
31 //! Default STDCALL calling convention.
32 uint8_t _stdCallConv;
33 //! Default FASTCALL calling convention.
34 uint8_t _fastCallConv;
35 //! Base address.
36 uint64_t _baseAddress;
37
38 //! \name Construction & Destruction
39 //! \{
40
41 inline CodeInfo() noexcept
42 : _archInfo(),
43 _stackAlignment(0),
44 _cdeclCallConv(CallConv::kIdNone),
45 _stdCallConv(CallConv::kIdNone),
46 _fastCallConv(CallConv::kIdNone),
47 _baseAddress(Globals::kNoBaseAddress) {}
48
49 inline explicit CodeInfo(uint32_t archId, uint32_t archMode = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept
50 : _archInfo(archId, archMode),
51 _stackAlignment(0),
52 _cdeclCallConv(CallConv::kIdNone),
53 _stdCallConv(CallConv::kIdNone),
54 _fastCallConv(CallConv::kIdNone),
55 _baseAddress(baseAddress) {}
56
57 inline CodeInfo(const CodeInfo& other) noexcept { init(other); }
58
59 inline bool isInitialized() const noexcept {
60 return _archInfo.archId() != ArchInfo::kIdNone;
61 }
62
63 inline void init(const CodeInfo& other) noexcept {
64 *this = other;
65 }
66
67 inline void init(uint32_t archId, uint32_t archMode = 0, uint64_t baseAddress = Globals::kNoBaseAddress) noexcept {
68 _archInfo.init(archId, archMode);
69 _stackAlignment = 0;
70 _cdeclCallConv = CallConv::kIdNone;
71 _stdCallConv = CallConv::kIdNone;
72 _fastCallConv = CallConv::kIdNone;
73 _baseAddress = baseAddress;
74 }
75
76 inline void reset() noexcept {
77 _archInfo.reset();
78 _stackAlignment = 0;
79 _cdeclCallConv = CallConv::kIdNone;
80 _stdCallConv = CallConv::kIdNone;
81 _fastCallConv = CallConv::kIdNone;
82 _baseAddress = Globals::kNoBaseAddress;
83 }
84
85 //! \}
86
87 //! \name Overloaded Operators
88 //! \{
89
90 inline CodeInfo& operator=(const CodeInfo& other) noexcept = default;
91
92 inline bool operator==(const CodeInfo& other) const noexcept { return ::memcmp(this, &other, sizeof(*this)) == 0; }
93 inline bool operator!=(const CodeInfo& other) const noexcept { return ::memcmp(this, &other, sizeof(*this)) != 0; }
94
95 //! \}
96
97 //! \name Accessors
98 //! \{
99
100 //! Returns the target architecture information, see `ArchInfo`.
101 inline const ArchInfo& archInfo() const noexcept { return _archInfo; }
102
103 //! Returns the target architecture id, see `ArchInfo::Id`.
104 inline uint32_t archId() const noexcept { return _archInfo.archId(); }
105 //! Returns the target architecture sub-type, see `ArchInfo::SubId`.
106 inline uint32_t archSubId() const noexcept { return _archInfo.archSubId(); }
107 //! Returns the native size of the target's architecture GP register.
108 inline uint32_t gpSize() const noexcept { return _archInfo.gpSize(); }
109 //! Returns the number of GP registers of the target's architecture.
110 inline uint32_t gpCount() const noexcept { return _archInfo.gpCount(); }
111
112 //! Returns a natural stack alignment that must be honored (or 0 if not known).
113 inline uint32_t stackAlignment() const noexcept { return _stackAlignment; }
114 //! Sets a natural stack alignment that must be honored.
115 inline void setStackAlignment(uint32_t sa) noexcept { _stackAlignment = uint8_t(sa); }
116
117 inline uint32_t cdeclCallConv() const noexcept { return _cdeclCallConv; }
118 inline void setCdeclCallConv(uint32_t cc) noexcept { _cdeclCallConv = uint8_t(cc); }
119
120 inline uint32_t stdCallConv() const noexcept { return _stdCallConv; }
121 inline void setStdCallConv(uint32_t cc) noexcept { _stdCallConv = uint8_t(cc); }
122
123 inline uint32_t fastCallConv() const noexcept { return _fastCallConv; }
124 inline void setFastCallConv(uint32_t cc) noexcept { _fastCallConv = uint8_t(cc); }
125
126 inline bool hasBaseAddress() const noexcept { return _baseAddress != Globals::kNoBaseAddress; }
127 inline uint64_t baseAddress() const noexcept { return _baseAddress; }
128 inline void setBaseAddress(uint64_t p) noexcept { _baseAddress = p; }
129 inline void resetBaseAddress() noexcept { _baseAddress = Globals::kNoBaseAddress; }
130
131 //! \}
132 };
133
134 // ============================================================================
135 // [asmjit::Target]
136 // ============================================================================
137
138 //! Target is an abstract class that describes a machine code target.
139 class ASMJIT_VIRTAPI Target {
140 public:
141 ASMJIT_BASE_CLASS(Target)
142 ASMJIT_NONCOPYABLE(Target)
143
144 //! Tartget type, see `TargetType`.
145 uint8_t _targetType;
146 //! Reserved for future use.
147 uint8_t _reserved[7];
148 //! Basic information about the Runtime's code.
149 CodeInfo _codeInfo;
150
151 enum TargetType : uint32_t {
152 //! Uninitialized target or unknown target type.
153 kTargetNone = 0,
154 //! JIT target type, see `JitRuntime`.
155 kTargetJit = 1
156 };
157
158 //! \name Construction & Destruction
159 //! \{
160
161 //! Creates a `Target` instance.
162 ASMJIT_API Target() noexcept;
163 //! Destroys the `Target` instance.
164 ASMJIT_API virtual ~Target() noexcept;
165
166 //! \}
167
168 //! \name Accessors
169 //! \{
170
171 //! Returns CodeInfo of this target.
172 //!
173 //! CodeInfo can be used to setup a CodeHolder in case you plan to generate a
174 //! code compatible and executable by this Runtime.
175 inline const CodeInfo& codeInfo() const noexcept { return _codeInfo; }
176
177 //! Returns the target architecture id, see `ArchInfo::Id`.
178 inline uint32_t archId() const noexcept { return _codeInfo.archId(); }
179 //! Returns the target architecture sub-id, see `ArchInfo::SubId`.
180 inline uint32_t archSubId() const noexcept { return _codeInfo.archSubId(); }
181
182 //! Returns the target type, see `TargetType`.
183 inline uint32_t targetType() const noexcept { return _targetType; }
184
185 //! \}
186 };
187
188 //! \}
189
190 ASMJIT_END_NAMESPACE
191
192 #endif // _ASMJIT_CORE_TARGET_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/misc_p.h"
8 #include "../core/type.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::Type]
14 // ============================================================================
15
16 const Type::TypeData Type::_typeData = {
17 #define VALUE(X) Type::BaseOfTypeId<X>::kTypeId
18 { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) },
19 #undef VALUE
20
21 #define VALUE(X) Type::SizeOfTypeId<X>::kTypeSize
22 { ASMJIT_LOOKUP_TABLE_256(VALUE, 0) }
23 #undef VALUE
24 };
25
26 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_TYPE_H
7 #define _ASMJIT_CORE_TYPE_H
8
9 #include "../core/globals.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_core
14 //! \{
15
16 // ============================================================================
17 // [asmjit::Type]
18 // ============================================================================
19
20 //! Provides minimum type-system that is used by \ref asmjit_func and \ref asmjit_compiler.
21 namespace Type {
22
23 //! TypeId.
24 //!
25 //! This is an additional information that can be used to describe a value-type
26 //! of physical or virtual register. it's used mostly by BaseCompiler to describe
27 //! register representation (the group of data stored in the register and the
28 //! width used) and it's also used by APIs that allow to describe and work with
29 //! function signatures.
30 enum Id : uint32_t {
31 kIdVoid = 0,
32
33 _kIdBaseStart = 32,
34 _kIdBaseEnd = 44,
35
36 _kIdIntStart = 32,
37 _kIdIntEnd = 41,
38
39 kIdIntPtr = 32,
40 kIdUIntPtr = 33,
41
42 kIdI8 = 34,
43 kIdU8 = 35,
44 kIdI16 = 36,
45 kIdU16 = 37,
46 kIdI32 = 38,
47 kIdU32 = 39,
48 kIdI64 = 40,
49 kIdU64 = 41,
50
51 _kIdFloatStart = 42,
52 _kIdFloatEnd = 44,
53
54 kIdF32 = 42,
55 kIdF64 = 43,
56 kIdF80 = 44,
57
58 _kIdMaskStart = 45,
59 _kIdMaskEnd = 48,
60
61 kIdMask8 = 45,
62 kIdMask16 = 46,
63 kIdMask32 = 47,
64 kIdMask64 = 48,
65
66 _kIdMmxStart = 49,
67 _kIdMmxEnd = 50,
68
69 kIdMmx32 = 49,
70 kIdMmx64 = 50,
71
72 _kIdVec32Start = 51,
73 _kIdVec32End = 60,
74
75 kIdI8x4 = 51,
76 kIdU8x4 = 52,
77 kIdI16x2 = 53,
78 kIdU16x2 = 54,
79 kIdI32x1 = 55,
80 kIdU32x1 = 56,
81 kIdF32x1 = 59,
82
83 _kIdVec64Start = 61,
84 _kIdVec64End = 70,
85
86 kIdI8x8 = 61,
87 kIdU8x8 = 62,
88 kIdI16x4 = 63,
89 kIdU16x4 = 64,
90 kIdI32x2 = 65,
91 kIdU32x2 = 66,
92 kIdI64x1 = 67,
93 kIdU64x1 = 68,
94 kIdF32x2 = 69,
95 kIdF64x1 = 70,
96
97 _kIdVec128Start = 71,
98 _kIdVec128End = 80,
99
100 kIdI8x16 = 71,
101 kIdU8x16 = 72,
102 kIdI16x8 = 73,
103 kIdU16x8 = 74,
104 kIdI32x4 = 75,
105 kIdU32x4 = 76,
106 kIdI64x2 = 77,
107 kIdU64x2 = 78,
108 kIdF32x4 = 79,
109 kIdF64x2 = 80,
110
111 _kIdVec256Start = 81,
112 _kIdVec256End = 90,
113
114 kIdI8x32 = 81,
115 kIdU8x32 = 82,
116 kIdI16x16 = 83,
117 kIdU16x16 = 84,
118 kIdI32x8 = 85,
119 kIdU32x8 = 86,
120 kIdI64x4 = 87,
121 kIdU64x4 = 88,
122 kIdF32x8 = 89,
123 kIdF64x4 = 90,
124
125 _kIdVec512Start = 91,
126 _kIdVec512End = 100,
127
128 kIdI8x64 = 91,
129 kIdU8x64 = 92,
130 kIdI16x32 = 93,
131 kIdU16x32 = 94,
132 kIdI32x16 = 95,
133 kIdU32x16 = 96,
134 kIdI64x8 = 97,
135 kIdU64x8 = 98,
136 kIdF32x16 = 99,
137 kIdF64x8 = 100,
138
139 kIdCount = 101,
140 kIdMax = 255
141 };
142
143 struct TypeData {
144 uint8_t baseOf[kIdMax + 1];
145 uint8_t sizeOf[kIdMax + 1];
146 };
147 ASMJIT_VARAPI const TypeData _typeData;
148
149 static constexpr bool isVoid(uint32_t typeId) noexcept { return typeId == 0; }
150 static constexpr bool isValid(uint32_t typeId) noexcept { return typeId >= _kIdIntStart && typeId <= _kIdVec512End; }
151 static constexpr bool isBase(uint32_t typeId) noexcept { return typeId >= _kIdBaseStart && typeId <= _kIdBaseEnd; }
152 static constexpr bool isAbstract(uint32_t typeId) noexcept { return typeId >= kIdIntPtr && typeId <= kIdUIntPtr; }
153
154 static constexpr bool isInt(uint32_t typeId) noexcept { return typeId >= _kIdIntStart && typeId <= _kIdIntEnd; }
155 static constexpr bool isInt8(uint32_t typeId) noexcept { return typeId == kIdI8; }
156 static constexpr bool isUInt8(uint32_t typeId) noexcept { return typeId == kIdU8; }
157 static constexpr bool isInt16(uint32_t typeId) noexcept { return typeId == kIdI16; }
158 static constexpr bool isUInt16(uint32_t typeId) noexcept { return typeId == kIdU16; }
159 static constexpr bool isInt32(uint32_t typeId) noexcept { return typeId == kIdI32; }
160 static constexpr bool isUInt32(uint32_t typeId) noexcept { return typeId == kIdU32; }
161 static constexpr bool isInt64(uint32_t typeId) noexcept { return typeId == kIdI64; }
162 static constexpr bool isUInt64(uint32_t typeId) noexcept { return typeId == kIdU64; }
163
164 static constexpr bool isGp8(uint32_t typeId) noexcept { return typeId >= kIdI8 && typeId <= kIdU8; }
165 static constexpr bool isGp16(uint32_t typeId) noexcept { return typeId >= kIdI16 && typeId <= kIdU16; }
166 static constexpr bool isGp32(uint32_t typeId) noexcept { return typeId >= kIdI32 && typeId <= kIdU32; }
167 static constexpr bool isGp64(uint32_t typeId) noexcept { return typeId >= kIdI64 && typeId <= kIdU64; }
168
169 static constexpr bool isFloat(uint32_t typeId) noexcept { return typeId >= _kIdFloatStart && typeId <= _kIdFloatEnd; }
170 static constexpr bool isFloat32(uint32_t typeId) noexcept { return typeId == kIdF32; }
171 static constexpr bool isFloat64(uint32_t typeId) noexcept { return typeId == kIdF64; }
172 static constexpr bool isFloat80(uint32_t typeId) noexcept { return typeId == kIdF80; }
173
174 static constexpr bool isMask(uint32_t typeId) noexcept { return typeId >= _kIdMaskStart && typeId <= _kIdMaskEnd; }
175 static constexpr bool isMask8(uint32_t typeId) noexcept { return typeId == kIdMask8; }
176 static constexpr bool isMask16(uint32_t typeId) noexcept { return typeId == kIdMask16; }
177 static constexpr bool isMask32(uint32_t typeId) noexcept { return typeId == kIdMask32; }
178 static constexpr bool isMask64(uint32_t typeId) noexcept { return typeId == kIdMask64; }
179
180 static constexpr bool isMmx(uint32_t typeId) noexcept { return typeId >= _kIdMmxStart && typeId <= _kIdMmxEnd; }
181 static constexpr bool isMmx32(uint32_t typeId) noexcept { return typeId == kIdMmx32; }
182 static constexpr bool isMmx64(uint32_t typeId) noexcept { return typeId == kIdMmx64; }
183
184 static constexpr bool isVec(uint32_t typeId) noexcept { return typeId >= _kIdVec32Start && typeId <= _kIdVec512End; }
185 static constexpr bool isVec32(uint32_t typeId) noexcept { return typeId >= _kIdVec32Start && typeId <= _kIdVec32End; }
186 static constexpr bool isVec64(uint32_t typeId) noexcept { return typeId >= _kIdVec64Start && typeId <= _kIdVec64End; }
187 static constexpr bool isVec128(uint32_t typeId) noexcept { return typeId >= _kIdVec128Start && typeId <= _kIdVec128End; }
188 static constexpr bool isVec256(uint32_t typeId) noexcept { return typeId >= _kIdVec256Start && typeId <= _kIdVec256End; }
189 static constexpr bool isVec512(uint32_t typeId) noexcept { return typeId >= _kIdVec512Start && typeId <= _kIdVec512End; }
190
191 //! IdOfT<> template allows to get a TypeId of a C++ `T` type.
192 template<typename T> struct IdOfT { /* Fail if not specialized. */ };
193
194 //! \cond
195 template<typename T> struct IdOfT<T*> {
196 enum : uint32_t { kTypeId = kIdUIntPtr };
197 };
198
199 template<typename T> struct IdOfT<T&> {
200 enum : uint32_t { kTypeId = kIdUIntPtr };
201 };
202
203 template<typename T>
204 struct IdOfIntT {
205 static constexpr uint32_t kTypeId =
206 sizeof(T) == 1 ? (std::is_signed<T>::value ? kIdI8 : kIdU8 ) :
207 sizeof(T) == 2 ? (std::is_signed<T>::value ? kIdI16 : kIdU16) :
208 sizeof(T) == 4 ? (std::is_signed<T>::value ? kIdI32 : kIdU32) :
209 sizeof(T) == 8 ? (std::is_signed<T>::value ? kIdI64 : kIdU64) : kIdVoid;
210 };
211
212 template<uint32_t TYPE_ID>
213 struct BaseOfTypeId {
214 static constexpr uint32_t kTypeId =
215 isBase (TYPE_ID) ? TYPE_ID :
216 isMask8 (TYPE_ID) ? kIdU8 :
217 isMask16(TYPE_ID) ? kIdU16 :
218 isMask32(TYPE_ID) ? kIdU32 :
219 isMask64(TYPE_ID) ? kIdU64 :
220 isMmx32 (TYPE_ID) ? kIdI32 :
221 isMmx64 (TYPE_ID) ? kIdI64 :
222 isVec32 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec32Start :
223 isVec64 (TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec64Start :
224 isVec128(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec128Start :
225 isVec256(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec256Start :
226 isVec512(TYPE_ID) ? TYPE_ID + kIdI8 - _kIdVec512Start : 0;
227 };
228
229 template<uint32_t TYPE_ID>
230 struct SizeOfTypeId {
231 static constexpr uint32_t kTypeSize =
232 isInt8 (TYPE_ID) ? 1 :
233 isUInt8 (TYPE_ID) ? 1 :
234 isInt16 (TYPE_ID) ? 2 :
235 isUInt16 (TYPE_ID) ? 2 :
236 isInt32 (TYPE_ID) ? 4 :
237 isUInt32 (TYPE_ID) ? 4 :
238 isInt64 (TYPE_ID) ? 8 :
239 isUInt64 (TYPE_ID) ? 8 :
240 isFloat32(TYPE_ID) ? 4 :
241 isFloat64(TYPE_ID) ? 8 :
242 isFloat80(TYPE_ID) ? 10 :
243 isMask8 (TYPE_ID) ? 1 :
244 isMask16 (TYPE_ID) ? 2 :
245 isMask32 (TYPE_ID) ? 4 :
246 isMask64 (TYPE_ID) ? 8 :
247 isMmx32 (TYPE_ID) ? 4 :
248 isMmx64 (TYPE_ID) ? 8 :
249 isVec32 (TYPE_ID) ? 4 :
250 isVec64 (TYPE_ID) ? 8 :
251 isVec128 (TYPE_ID) ? 16 :
252 isVec256 (TYPE_ID) ? 32 :
253 isVec512 (TYPE_ID) ? 64 : 0;
254 };
255 //! \endcond
256
257 static inline uint32_t baseOf(uint32_t typeId) noexcept {
258 ASMJIT_ASSERT(typeId <= kIdMax);
259 return _typeData.baseOf[typeId];
260 }
261
262 static inline uint32_t sizeOf(uint32_t typeId) noexcept {
263 ASMJIT_ASSERT(typeId <= kIdMax);
264 return _typeData.sizeOf[typeId];
265 }
266
267 //! Returns offset needed to convert a `kIntPtr` and `kUIntPtr` TypeId
268 //! into a type that matches `gpSize` (general-purpose register size).
269 //! If you find such TypeId it's then only about adding the offset to it.
270 //!
271 //! For example:
272 //!
273 //! ```
274 //! uint32_t gpSize = '4' or '8';
275 //! uint32_t deabstractDelta = Type::deabstractDeltaOfSize(gpSize);
276 //!
277 //! uint32_t typeId = 'some type-id';
278 //!
279 //! // Normalize some typeId into a non-abstract typeId.
280 //! if (Type::isAbstract(typeId)) typeId += deabstractDelta;
281 //!
282 //! // The same, but by using Type::deabstract() function.
283 //! typeId = Type::deabstract(typeId, deabstractDelta);
284 //! ```
285 static constexpr uint32_t deabstractDeltaOfSize(uint32_t gpSize) noexcept {
286 return gpSize >= 8 ? kIdI64 - kIdIntPtr : kIdI32 - kIdIntPtr;
287 }
288
289 static constexpr uint32_t deabstract(uint32_t typeId, uint32_t deabstractDelta) noexcept {
290 return isAbstract(typeId) ? typeId + deabstractDelta : typeId;
291 }
292
293 //! bool as C++ type-name.
294 struct Bool {};
295 //! int8_t as C++ type-name.
296 struct I8 {};
297 //! uint8_t as C++ type-name.
298 struct U8 {};
299 //! int16_t as C++ type-name.
300 struct I16 {};
301 //! uint16_t as C++ type-name.
302 struct U16 {};
303 //! int32_t as C++ type-name.
304 struct I32 {};
305 //! uint32_t as C++ type-name.
306 struct U32 {};
307 //! int64_t as C++ type-name.
308 struct I64 {};
309 //! uint64_t as C++ type-name.
310 struct U64 {};
311 //! intptr_t as C++ type-name.
312 struct IPtr {};
313 //! uintptr_t as C++ type-name.
314 struct UPtr {};
315 //! float as C++ type-name.
316 struct F32 {};
317 //! double as C++ type-name.
318 struct F64 {};
319
320 } // {Type}
321
322 // ============================================================================
323 // [ASMJIT_DEFINE_TYPE_ID]
324 // ============================================================================
325
326 //! \cond
327 #define ASMJIT_DEFINE_TYPE_ID(T, TYPE_ID) \
328 namespace Type { \
329 template<> \
330 struct IdOfT<T> { \
331 enum : uint32_t { kTypeId = TYPE_ID }; \
332 }; \
333 }
334
335 ASMJIT_DEFINE_TYPE_ID(bool , IdOfIntT<bool >::kTypeId);
336 ASMJIT_DEFINE_TYPE_ID(char , IdOfIntT<char >::kTypeId);
337 ASMJIT_DEFINE_TYPE_ID(signed char , IdOfIntT<signed char >::kTypeId);
338 ASMJIT_DEFINE_TYPE_ID(unsigned char , IdOfIntT<unsigned char >::kTypeId);
339 ASMJIT_DEFINE_TYPE_ID(short , IdOfIntT<short >::kTypeId);
340 ASMJIT_DEFINE_TYPE_ID(unsigned short , IdOfIntT<unsigned short >::kTypeId);
341 ASMJIT_DEFINE_TYPE_ID(int , IdOfIntT<int >::kTypeId);
342 ASMJIT_DEFINE_TYPE_ID(unsigned int , IdOfIntT<unsigned int >::kTypeId);
343 ASMJIT_DEFINE_TYPE_ID(long , IdOfIntT<long >::kTypeId);
344 ASMJIT_DEFINE_TYPE_ID(unsigned long , IdOfIntT<unsigned long >::kTypeId);
345 ASMJIT_DEFINE_TYPE_ID(long long , IdOfIntT<long long >::kTypeId);
346 ASMJIT_DEFINE_TYPE_ID(unsigned long long, IdOfIntT<unsigned long long>::kTypeId);
347
348 #if ASMJIT_CXX_HAS_NATIVE_WCHAR_T
349 ASMJIT_DEFINE_TYPE_ID(wchar_t , IdOfIntT<wchar_t >::kTypeId);
350 #endif
351
352 #if ASMJIT_CXX_HAS_UNICODE_LITERALS
353 ASMJIT_DEFINE_TYPE_ID(char16_t , IdOfIntT<char16_t >::kTypeId);
354 ASMJIT_DEFINE_TYPE_ID(char32_t , IdOfIntT<char32_t >::kTypeId);
355 #endif
356
357 ASMJIT_DEFINE_TYPE_ID(void , kIdVoid);
358 ASMJIT_DEFINE_TYPE_ID(float , kIdF32);
359 ASMJIT_DEFINE_TYPE_ID(double , kIdF64);
360
361 ASMJIT_DEFINE_TYPE_ID(Bool , kIdU8);
362 ASMJIT_DEFINE_TYPE_ID(I8 , kIdI8);
363 ASMJIT_DEFINE_TYPE_ID(U8 , kIdU8);
364 ASMJIT_DEFINE_TYPE_ID(I16 , kIdI16);
365 ASMJIT_DEFINE_TYPE_ID(U16 , kIdU16);
366 ASMJIT_DEFINE_TYPE_ID(I32 , kIdI32);
367 ASMJIT_DEFINE_TYPE_ID(U32 , kIdU32);
368 ASMJIT_DEFINE_TYPE_ID(I64 , kIdI64);
369 ASMJIT_DEFINE_TYPE_ID(U64 , kIdU64);
370 ASMJIT_DEFINE_TYPE_ID(IPtr , kIdIntPtr);
371 ASMJIT_DEFINE_TYPE_ID(UPtr , kIdUIntPtr);
372 ASMJIT_DEFINE_TYPE_ID(F32 , kIdF32);
373 ASMJIT_DEFINE_TYPE_ID(F64 , kIdF64);
374 //! \endcond
375
376 //! \}
377
378 ASMJIT_END_NAMESPACE
379
380 #endif // _ASMJIT_CORE_TYPE_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_JIT
8
9 #include "../core/osutils.h"
10 #include "../core/string.h"
11 #include "../core/support.h"
12 #include "../core/virtmem.h"
13
14 #if !defined(_WIN32)
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <sys/mman.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <unistd.h>
21
22 // Linux has a `memfd_create` syscall that we would like to use, if available.
23 #if defined(__linux__)
24 #include <sys/syscall.h>
25 #endif
26
27 // Apple recently introduced MAP_JIT flag, which we want to use.
28 #if defined(__APPLE__)
29 #include <TargetConditionals.h>
30 #if TARGET_OS_OSX
31 #include <sys/utsname.h>
32 #endif
33 // Older SDK doesn't define `MAP_JIT`.
34 #ifndef MAP_JIT
35 #define MAP_JIT 0x800
36 #endif
37 #endif
38
39 // BSD/OSX: `MAP_ANONYMOUS` is not defined, `MAP_ANON` is.
40 #if !defined(MAP_ANONYMOUS)
41 #define MAP_ANONYMOUS MAP_ANON
42 #endif
43 #endif
44
45 #include <atomic>
46
47 #if defined(__APPLE__)
48 #define ASMJIT_VM_SHM_DETECT 0
49 #else
50 #define ASMJIT_VM_SHM_DETECT 1
51 #endif
52
53 ASMJIT_BEGIN_NAMESPACE
54
55 // ============================================================================
56 // [asmjit::VirtMem - Utilities]
57 // ============================================================================
58
59 static const uint32_t VirtMem_dualMappingFilter[2] = {
60 VirtMem::kAccessWrite,
61 VirtMem::kAccessExecute
62 };
63
64 // ============================================================================
65 // [asmjit::VirtMem - Virtual Memory [Windows]]
66 // ============================================================================
67
68 #if defined(_WIN32)
69 struct ScopedHandle {
70 inline ScopedHandle() noexcept
71 : value(nullptr) {}
72
73 inline ~ScopedHandle() noexcept {
74 if (value != nullptr)
75 ::CloseHandle(value);
76 }
77
78 HANDLE value;
79 };
80
81 static void VirtMem_getInfo(VirtMem::Info& vmInfo) noexcept {
82 SYSTEM_INFO systemInfo;
83
84 ::GetSystemInfo(&systemInfo);
85 vmInfo.pageSize = Support::alignUpPowerOf2<uint32_t>(systemInfo.dwPageSize);
86 vmInfo.pageGranularity = systemInfo.dwAllocationGranularity;
87 }
88
89 // Windows specific implementation that uses `VirtualAlloc` and `VirtualFree`.
90 static DWORD VirtMem_accessToWinProtectFlags(uint32_t flags) noexcept {
91 DWORD protectFlags;
92
93 // READ|WRITE|EXECUTE.
94 if (flags & VirtMem::kAccessExecute)
95 protectFlags = (flags & VirtMem::kAccessWrite) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
96 else if (flags & VirtMem::kAccessReadWrite)
97 protectFlags = (flags & VirtMem::kAccessWrite) ? PAGE_READWRITE : PAGE_READONLY;
98 else
99 protectFlags = PAGE_NOACCESS;
100
101 // Any other flags to consider?
102 return protectFlags;
103 }
104
105 static DWORD VirtMem_accessToWinDesiredAccess(uint32_t flags) noexcept {
106 DWORD access = (flags & VirtMem::kAccessWrite) ? FILE_MAP_WRITE : FILE_MAP_READ;
107 if (flags & VirtMem::kAccessExecute)
108 access |= FILE_MAP_EXECUTE;
109 return access;
110 }
111
112 Error VirtMem::alloc(void** p, size_t size, uint32_t flags) noexcept {
113 *p = nullptr;
114 if (size == 0)
115 return DebugUtils::errored(kErrorInvalidArgument);
116
117 DWORD protectFlags = VirtMem_accessToWinProtectFlags(flags);
118 void* result = ::VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, protectFlags);
119
120 if (!result)
121 return DebugUtils::errored(kErrorOutOfMemory);
122
123 *p = result;
124 return kErrorOk;
125 }
126
127 Error VirtMem::release(void* p, size_t size) noexcept {
128 ASMJIT_UNUSED(size);
129 if (ASMJIT_UNLIKELY(!::VirtualFree(p, 0, MEM_RELEASE)))
130 return DebugUtils::errored(kErrorInvalidArgument);
131 return kErrorOk;
132 }
133
134 Error VirtMem::protect(void* p, size_t size, uint32_t flags) noexcept {
135 DWORD protectFlags = VirtMem_accessToWinProtectFlags(flags);
136 DWORD oldFlags;
137
138 if (::VirtualProtect(p, size, protectFlags, &oldFlags))
139 return kErrorOk;
140
141 return DebugUtils::errored(kErrorInvalidArgument);
142 }
143
144 Error VirtMem::allocDualMapping(DualMapping* dm, size_t size, uint32_t flags) noexcept {
145 dm->ro = nullptr;
146 dm->rw = nullptr;
147
148 if (size == 0)
149 return DebugUtils::errored(kErrorInvalidArgument);
150
151 ScopedHandle handle;
152 handle.value = ::CreateFileMappingW(
153 INVALID_HANDLE_VALUE,
154 nullptr,
155 PAGE_EXECUTE_READWRITE,
156 (DWORD)(uint64_t(size) >> 32),
157 (DWORD)(size & 0xFFFFFFFFu),
158 nullptr);
159
160 if (ASMJIT_UNLIKELY(!handle.value))
161 return DebugUtils::errored(kErrorOutOfMemory);
162
163 void* ptr[2];
164 for (uint32_t i = 0; i < 2; i++) {
165 DWORD desiredAccess = VirtMem_accessToWinDesiredAccess(flags & ~VirtMem_dualMappingFilter[i]);
166 ptr[i] = ::MapViewOfFile(handle.value, desiredAccess, 0, 0, size);
167
168 if (ptr[i] == nullptr) {
169 if (i == 0)
170 ::UnmapViewOfFile(ptr[0]);
171 return DebugUtils::errored(kErrorOutOfMemory);
172 }
173 }
174
175 dm->ro = ptr[0];
176 dm->rw = ptr[1];
177 return kErrorOk;
178 }
179
180 Error VirtMem::releaseDualMapping(DualMapping* dm, size_t size) noexcept {
181 ASMJIT_UNUSED(size);
182 bool failed = false;
183
184 if (!::UnmapViewOfFile(dm->ro))
185 failed = true;
186
187 if (dm->ro != dm->rw && !UnmapViewOfFile(dm->rw))
188 failed = true;
189
190 if (failed)
191 return DebugUtils::errored(kErrorInvalidArgument);
192
193 dm->ro = nullptr;
194 dm->rw = nullptr;
195 return kErrorOk;
196 }
197 #endif
198
199 // ============================================================================
200 // [asmjit::VirtMem - Virtual Memory [Posix]]
201 // ============================================================================
202
203 #if !defined(_WIN32)
204 struct ScopedFD {
205 inline ScopedFD() noexcept
206 : value(-1) {}
207
208 inline ~ScopedFD() noexcept {
209 if (value != -1)
210 close(value);
211 }
212
213 int value;
214 };
215
216 static void VirtMem_getInfo(VirtMem::Info& vmInfo) noexcept {
217 uint32_t pageSize = uint32_t(::getpagesize());
218
219 vmInfo.pageSize = pageSize;
220 vmInfo.pageGranularity = Support::max<uint32_t>(pageSize, 65536);
221 }
222
223 // Some operating systems don't allow /dev/shm to be executable. On Linux this
224 // happens when /dev/shm is mounted with 'noexec', which is enforced by systemd.
225 // Other operating systems like OSX also restrict executable permissions regarding
226 // /dev/shm, so we use a runtime detection before trying to allocate the requested
227 // memory by the user. Sometimes we don't need the detection as we know it would
228 // always result in 'kShmStrategyTmpDir'.
229 enum ShmStrategy : uint32_t {
230 kShmStrategyUnknown = 0,
231 kShmStrategyDevShm = 1,
232 kShmStrategyTmpDir = 2
233 };
234
235 // Posix specific implementation that uses `mmap()` and `munmap()`.
236 static int VirtMem_accessToPosixProtection(uint32_t flags) noexcept {
237 int protection = 0;
238 if (flags & VirtMem::kAccessRead ) protection |= PROT_READ;
239 if (flags & VirtMem::kAccessWrite ) protection |= PROT_READ | PROT_WRITE;
240 if (flags & VirtMem::kAccessExecute) protection |= PROT_READ | PROT_EXEC;
241 return protection;
242 }
243
244 // Translates libc errors specific to VirtualMemory mapping to `asmjit::Error`.
245 static Error VirtMem_makeErrorFromErrno(int e) noexcept {
246 switch (e) {
247 case EACCES:
248 case EAGAIN:
249 case ENODEV:
250 case EPERM:
251 return kErrorInvalidState;
252
253 case EFBIG:
254 case ENOMEM:
255 case EOVERFLOW:
256 return kErrorOutOfMemory;
257
258 case EMFILE:
259 case ENFILE:
260 return kErrorTooManyHandles;
261
262 default:
263 return kErrorInvalidArgument;
264 }
265 }
266
267 #if defined(__APPLE__)
268 // Detects whether the current process is hardened, which means that pages that
269 // have WRITE and EXECUTABLE flags cannot be allocated without MAP_JIT flag.
270 static ASMJIT_INLINE bool VirtMem_isHardened() noexcept {
271 static volatile uint32_t globalHardenedFlag;
272
273 enum HardenedFlag : uint32_t {
274 kHardenedFlagUnknown = 0,
275 kHardenedFlagDisabled = 1,
276 kHardenedFlagEnabled = 2
277 };
278
279 uint32_t flag = globalHardenedFlag;
280 if (flag == kHardenedFlagUnknown) {
281 VirtMem::Info memInfo;
282 VirtMem_getInfo(memInfo);
283
284 void* ptr = mmap(nullptr, memInfo.pageSize, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
285 if (ptr == MAP_FAILED) {
286 flag = kHardenedFlagEnabled;
287 }
288 else {
289 flag = kHardenedFlagDisabled;
290 munmap(ptr, memInfo.pageSize);
291 }
292 globalHardenedFlag = flag;
293 }
294
295 return flag == kHardenedFlagEnabled;
296 }
297
298 // MAP_JIT flag required to run unsigned JIT code is only supported by kernel
299 // version 10.14+ (Mojave) and IOS.
300 static ASMJIT_INLINE bool VirtMem_hasMapJitSupport() noexcept {
301 #if TARGET_OS_OSX
302 static volatile uint32_t globalVersion;
303
304 uint32_t ver = globalVersion;
305 if (!ver) {
306 struct utsname osname;
307 uname(&osname);
308 ver = atoi(osname.release);
309 globalVersion = ver;
310 }
311 return ver >= 18;
312 #else
313 // Assume it's available.
314 return true;
315 #endif
316 }
317
318 static ASMJIT_INLINE uint32_t VirtMem_appleSpecificMMapFlags(uint32_t flags) {
319 // Always use MAP_JIT flag if user asked for it (could be used for testing
320 // on non-hardened processes) and detect whether it must be used when the
321 // process is actually hardened (in that case it doesn't make sense to rely
322 // on user `flags`).
323 bool useMapJit = ((flags & VirtMem::kMMapEnableMapJit) != 0) || VirtMem_isHardened();
324 if (useMapJit)
325 return VirtMem_hasMapJitSupport() ? MAP_JIT : 0u;
326 else
327 return 0;
328 }
329 #else
330 static ASMJIT_INLINE uint32_t VirtMem_appleSpecificMMapFlags(uint32_t flags) {
331 ASMJIT_UNUSED(flags);
332 return 0;
333 }
334 #endif
335
336 static const char* VirtMem_getTmpDir() noexcept {
337 const char* tmpDir = getenv("TMPDIR");
338 return tmpDir ? tmpDir : "/tmp";
339 }
340
341 static Error VirtMem_openAnonymousMemory(int* fd, bool preferTmpOverDevShm) noexcept {
342 #if defined(SYS_memfd_create)
343 // Linux specific 'memfd_create' - if the syscall returns `ENOSYS` it means
344 // it's not available and we will never call it again (would be pointless).
345
346 // Zero initialized, if ever changed to '1' that would mean the syscall is not
347 // available and we must use `shm_open()` and `shm_unlink()`.
348 static volatile uint32_t memfd_create_not_supported;
349
350 if (!memfd_create_not_supported) {
351 *fd = (int)syscall(SYS_memfd_create, "vmem", 0);
352 if (ASMJIT_LIKELY(*fd >= 0))
353 return kErrorOk;
354
355 int e = errno;
356 if (e == ENOSYS)
357 memfd_create_not_supported = 1;
358 else
359 return DebugUtils::errored(VirtMem_makeErrorFromErrno(e));
360 }
361 #endif
362
363 #if defined(SHM_ANON)
364 // Originally FreeBSD extension, apparently works in other BSDs too.
365 ASMJIT_UNUSED(preferTmpOverDevShm);
366 *fd = shm_open(SHM_ANON, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
367
368 if (ASMJIT_LIKELY(*fd >= 0))
369 return kErrorOk;
370 else
371 return DebugUtils::errored(VirtMem_makeErrorFromErrno(errno));
372 #else
373 // POSIX API. We have to generate somehow a unique name. This is nothing
374 // cryptographic, just using a bit from the stack address to always have
375 // a different base for different threads (as threads have their own stack)
376 // and retries for avoiding collisions. We use `shm_open()` with flags that
377 // require creation of the file so we never open an existing shared memory.
378 static std::atomic<uint32_t> internalCounter;
379
380 StringTmp<128> uniqueName;
381 const char* kShmFormat = "/shm-id-%08llX";
382
383 uint32_t kRetryCount = 100;
384 uint64_t bits = ((uintptr_t)(void*)&uniqueName) & 0x55555555u;
385
386 for (uint32_t i = 0; i < kRetryCount; i++) {
387 bits -= uint64_t(OSUtils::getTickCount()) * 773703683;
388 bits = ((bits >> 14) ^ (bits << 6)) + uint64_t(++internalCounter) * 10619863;
389
390 if (!ASMJIT_VM_SHM_DETECT || preferTmpOverDevShm) {
391 uniqueName.assignString(VirtMem_getTmpDir());
392 uniqueName.appendFormat(kShmFormat, (unsigned long long)bits);
393 *fd = open(uniqueName.data(), O_RDWR | O_CREAT | O_EXCL, 0);
394 if (ASMJIT_LIKELY(*fd >= 0)) {
395 unlink(uniqueName.data());
396 return kErrorOk;
397 }
398 }
399 else {
400 uniqueName.assignFormat(kShmFormat, (unsigned long long)bits);
401 *fd = shm_open(uniqueName.data(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
402 if (ASMJIT_LIKELY(*fd >= 0)) {
403 shm_unlink(uniqueName.data());
404 return kErrorOk;
405 }
406 }
407
408 int e = errno;
409 if (e == EEXIST)
410 continue;
411 else
412 return DebugUtils::errored(VirtMem_makeErrorFromErrno(e));
413 }
414 return kErrorOk;
415 #endif
416 }
417
418 #if ASMJIT_VM_SHM_DETECT
419 static Error VirtMem_detectShmStrategy(uint32_t* strategyOut) noexcept {
420 ScopedFD fd;
421 VirtMem::Info vmInfo = VirtMem::info();
422
423 ASMJIT_PROPAGATE(VirtMem_openAnonymousMemory(&fd.value, false));
424 if (ftruncate(fd.value, off_t(vmInfo.pageSize)) != 0)
425 return DebugUtils::errored(VirtMem_makeErrorFromErrno(errno));
426
427 void* ptr = mmap(nullptr, vmInfo.pageSize, PROT_READ | PROT_EXEC, MAP_SHARED, fd.value, 0);
428 if (ptr == MAP_FAILED) {
429 int e = errno;
430 if (e == EINVAL) {
431 *strategyOut = kShmStrategyTmpDir;
432 return kErrorOk;
433 }
434 return DebugUtils::errored(VirtMem_makeErrorFromErrno(e));
435 }
436 else {
437 munmap(ptr, vmInfo.pageSize);
438 *strategyOut = kShmStrategyDevShm;
439 return kErrorOk;
440 }
441 }
442 #endif
443
444 #if ASMJIT_VM_SHM_DETECT
445 static Error VirtMem_getShmStrategy(uint32_t* strategyOut) noexcept {
446 // Initially don't assume anything. It has to be tested whether
447 // '/dev/shm' was mounted with 'noexec' flag or not.
448 static volatile uint32_t globalShmStrategy = kShmStrategyUnknown;
449
450 uint32_t strategy = globalShmStrategy;
451 if (strategy == kShmStrategyUnknown) {
452 ASMJIT_PROPAGATE(VirtMem_detectShmStrategy(&strategy));
453 globalShmStrategy = strategy;
454 }
455
456 *strategyOut = strategy;
457 return kErrorOk;
458 }
459 #else
460 static Error VirtMem_getShmStrategy(uint32_t* strategyOut) noexcept {
461 *strategyOut = kShmStrategyTmpDir;
462 return kErrorOk;
463 }
464 #endif
465
466 Error VirtMem::alloc(void** p, size_t size, uint32_t flags) noexcept {
467 *p = nullptr;
468
469 if (size == 0)
470 return DebugUtils::errored(kErrorInvalidArgument);
471
472 int protection = VirtMem_accessToPosixProtection(flags);
473 int mmFlags = MAP_PRIVATE | MAP_ANONYMOUS | VirtMem_appleSpecificMMapFlags(flags);
474 void* ptr = mmap(nullptr, size, protection, mmFlags, -1, 0);
475
476 if (ptr == MAP_FAILED)
477 return DebugUtils::errored(kErrorOutOfMemory);
478
479 *p = ptr;
480 return kErrorOk;
481 }
482
483 Error VirtMem::release(void* p, size_t size) noexcept {
484 if (ASMJIT_UNLIKELY(munmap(p, size) != 0))
485 return DebugUtils::errored(kErrorInvalidArgument);
486
487 return kErrorOk;
488 }
489
490
491 Error VirtMem::protect(void* p, size_t size, uint32_t flags) noexcept {
492 int protection = VirtMem_accessToPosixProtection(flags);
493 if (mprotect(p, size, protection) == 0)
494 return kErrorOk;
495
496 return DebugUtils::errored(kErrorInvalidArgument);
497 }
498
499 Error VirtMem::allocDualMapping(DualMapping* dm, size_t size, uint32_t flags) noexcept {
500 dm->ro = nullptr;
501 dm->rw = nullptr;
502
503 if (off_t(size) <= 0)
504 return DebugUtils::errored(size == 0 ? kErrorInvalidArgument : kErrorTooLarge);
505
506 bool preferTmpOverDevShm = (flags & kMappingPreferTmp) != 0;
507 if (!preferTmpOverDevShm) {
508 uint32_t strategy;
509 ASMJIT_PROPAGATE(VirtMem_getShmStrategy(&strategy));
510 preferTmpOverDevShm = (strategy == kShmStrategyTmpDir);
511 }
512
513 // ScopedFD will automatically close the file descriptor in its destructor.
514 ScopedFD fd;
515 ASMJIT_PROPAGATE(VirtMem_openAnonymousMemory(&fd.value, preferTmpOverDevShm));
516 if (ftruncate(fd.value, off_t(size)) != 0)
517 return DebugUtils::errored(VirtMem_makeErrorFromErrno(errno));
518
519 void* ptr[2];
520 for (uint32_t i = 0; i < 2; i++) {
521 ptr[i] = mmap(nullptr, size, VirtMem_accessToPosixProtection(flags & ~VirtMem_dualMappingFilter[i]), MAP_SHARED, fd.value, 0);
522 if (ptr[i] == MAP_FAILED) {
523 // Get the error now before `munmap` has a chance to clobber it.
524 int e = errno;
525 if (i == 1)
526 munmap(ptr[0], size);
527 return DebugUtils::errored(VirtMem_makeErrorFromErrno(e));
528 }
529 }
530
531 dm->ro = ptr[0];
532 dm->rw = ptr[1];
533 return kErrorOk;
534 }
535
536 Error VirtMem::releaseDualMapping(DualMapping* dm, size_t size) noexcept {
537 Error err = release(dm->ro, size);
538 if (dm->ro != dm->rw)
539 err |= release(dm->rw, size);
540
541 if (err)
542 return DebugUtils::errored(kErrorInvalidArgument);
543
544 dm->ro = nullptr;
545 dm->rw = nullptr;
546 return kErrorOk;
547 }
548 #endif
549
550 // ============================================================================
551 // [asmjit::VirtMem - Virtual Memory [Memory Info]]
552 // ============================================================================
553
554 VirtMem::Info VirtMem::info() noexcept {
555 static VirtMem::Info vmInfo;
556 static std::atomic<uint32_t> vmInfoInitialized;
557
558 if (!vmInfoInitialized.load()) {
559 VirtMem::Info localMemInfo;
560 VirtMem_getInfo(localMemInfo);
561
562 vmInfo = localMemInfo;
563 vmInfoInitialized.store(1u);
564 }
565
566 return vmInfo;
567 }
568
569 ASMJIT_END_NAMESPACE
570
571 #endif
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_VIRTMEM_H
7 #define _ASMJIT_CORE_VIRTMEM_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_JIT
11
12 #include "../core/globals.h"
13
14 ASMJIT_BEGIN_NAMESPACE
15
16 //! \addtogroup asmjit_jit
17 //! \{
18
19 // ============================================================================
20 // [asmjit::VirtMem]
21 // ============================================================================
22
23 //! Virtual memory management.
24 namespace VirtMem {
25
26 //! Virtual memory and memory mapping flags.
27 enum Flags : uint32_t {
28 //! No access flags.
29 kAccessNone = 0x00000000u,
30 //! Memory is readable.
31 kAccessRead = 0x00000001u,
32 //! Memory is writable (implies read access).
33 kAccessWrite = 0x00000002u,
34 //! Memory is executable (implies read access).
35 kAccessExecute = 0x00000004u,
36
37 //! A combination of `kAccessRead | kAccessWrite`
38 kAccessReadWrite = 0x00000003u,
39
40 //! Use a `MAP_JIT` flag available on Apple platforms (OSX Mojave+), which
41 //! allows JIT code to be executed in OSX bundles. This flag is not turned
42 //! on by default, because when a process uses `fork()` the child process
43 //! has no access to the pages mapped with `MAP_JIT`, which could break code
44 //! that doesn't expect this behavior.
45 kMMapEnableMapJit = 0x00000010u,
46
47 //! Not an access flag, only used by `allocDualMapping()` to override the
48 //! default allocation strategy to always use a 'tmp' directory instead of
49 //! "/dev/shm" (on POSIX platforms). Please note that this flag will be
50 //! ignored if the operating system allows to allocate an executable memory
51 //! by a different API than `open()` or `shm_open()`. For example on Linux
52 //! `memfd_create()` is preferred and on BSDs `shm_open(SHM_ANON, ...)` is
53 //! used if SHM_ANON is defined.
54 kMappingPreferTmp = 0x80000000u
55 };
56
57 //! Virtual memory information.
58 struct Info {
59 //! Virtual memory page size.
60 uint32_t pageSize;
61 //! Virtual memory page granularity.
62 uint32_t pageGranularity;
63 };
64
65 //! Dual memory mapping used to map an anonymous memory into two memory regions
66 //! where one region is read-only, but executable, and the second region is
67 //! read+write, but not executable. Please see \ref VirtMem::allocDualMapping()
68 //! for more details.
69 struct DualMapping {
70 //! Pointer to data with 'Read' or 'Read+Execute' access.
71 void* ro;
72 //! Pointer to data with 'Read-Write' access, but never 'Write+Execute'.
73 void* rw;
74 };
75
76 //! Returns virtual memory information, see `VirtMem::Info` for more details.
77 ASMJIT_API Info info() noexcept;
78
79 //! Allocates virtual memory by either using `VirtualAlloc()` (Windows)
80 //! or `mmap()` (POSIX).
81 //!
82 //! \note `size` should be aligned to a page size, use \ref VirtMem::info()
83 //! to obtain it. Invalid size will not be corrected by the implementation
84 //! and the allocation would not succeed in such case.
85 ASMJIT_API Error alloc(void** p, size_t size, uint32_t flags) noexcept;
86
87 //! Releases virtual memory previously allocated by \ref VirtMem::alloc() or
88 //! \ref VirtMem::allocDualMapping().
89 //!
90 //! \note The size must be the same as used by \ref VirtMem::alloc(). If the
91 //! size is not the same value the call will fail on any POSIX system, but
92 //! pass on Windows, because of the difference of the implementation.
93 ASMJIT_API Error release(void* p, size_t size) noexcept;
94
95 //! A cross-platform wrapper around `mprotect()` (POSIX) and `VirtualProtect`
96 //! (Windows).
97 ASMJIT_API Error protect(void* p, size_t size, uint32_t flags) noexcept;
98
99 //! Allocates virtual memory and creates two views of it where the first view
100 //! has no write access. This is an addition to the API that should be used
101 //! in cases in which the operating system either enforces W^X security policy
102 //! or the application wants to use this policy by default to improve security
103 //! and prevent an accidental (or purposed) self-modifying code.
104 //!
105 //! The memory returned in the `dm` are two independent mappings of the same
106 //! shared memory region. You must use \ref VirtMem::releaseDualMapping() to
107 //! release it when it's no longer needed. Never use `VirtMem::release()` to
108 //! release the memory returned by `allocDualMapping()` as that would fail on
109 //! Windows.
110 //!
111 //! \remarks Both pointers in `dm` would be set to `nullptr` if the function fails.
112 ASMJIT_API Error allocDualMapping(DualMapping* dm, size_t size, uint32_t flags) noexcept;
113
114 //! Releases the virtual memory mapping previously allocated by
115 //! \ref VirtMem::allocDualMapping().
116 //!
117 //! \remarks Both pointers in `dm` would be set to `nullptr` if the function succeeds.
118 ASMJIT_API Error releaseDualMapping(DualMapping* dm, size_t size) noexcept;
119
120 } // VirtMem
121
122 //! \}
123
124 ASMJIT_END_NAMESPACE
125
126 #endif
127 #endif // _ASMJIT_CORE_VIRTMEM_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/support.h"
8 #include "../core/zone.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::Zone - Statics]
14 // ============================================================================
15
16 // Zero size block used by `Zone` that doesn't have any memory allocated.
17 // Should be allocated in read-only memory and should never be modified.
18 const Zone::Block Zone::_zeroBlock = { nullptr, nullptr, 0 };
19
20 // ============================================================================
21 // [asmjit::Zone - Init / Reset]
22 // ============================================================================
23
24 void Zone::_init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept {
25 ASMJIT_ASSERT(blockSize >= kMinBlockSize);
26 ASMJIT_ASSERT(blockSize <= kMaxBlockSize);
27 ASMJIT_ASSERT(blockAlignment <= 64);
28
29 // Just to make the compiler happy...
30 constexpr size_t kBlockSizeMask = (Support::allOnes<size_t>() >> 4);
31 constexpr size_t kBlockAlignmentShiftMask = 0x7u;
32
33 _assignZeroBlock();
34 _blockSize = blockSize & kBlockSizeMask;
35 _isTemporary = temporary != nullptr;
36 _blockAlignmentShift = Support::ctz(blockAlignment) & kBlockAlignmentShiftMask;
37
38 // Setup the first [temporary] block, if necessary.
39 if (temporary) {
40 Block* block = temporary->data<Block>();
41 block->prev = nullptr;
42 block->next = nullptr;
43
44 ASMJIT_ASSERT(temporary->size() >= kBlockSize);
45 block->size = temporary->size() - kBlockSize;
46
47 _assignBlock(block);
48 }
49 }
50
51 void Zone::reset(uint32_t resetPolicy) noexcept {
52 Block* cur = _block;
53
54 // Can't be altered.
55 if (cur == &_zeroBlock)
56 return;
57
58 if (resetPolicy == Globals::kResetHard) {
59 Block* initial = const_cast<Zone::Block*>(&_zeroBlock);
60 _ptr = initial->data();
61 _end = initial->data();
62 _block = initial;
63
64 // Since cur can be in the middle of the double-linked list, we have to
65 // traverse both directions (`prev` and `next`) separately to visit all.
66 Block* next = cur->next;
67 do {
68 Block* prev = cur->prev;
69
70 // If this is the first block and this ZoneTmp is temporary then the
71 // first block is statically allocated. We cannot free it and it makes
72 // sense to keep it even when this is hard reset.
73 if (prev == nullptr && _isTemporary) {
74 cur->prev = nullptr;
75 cur->next = nullptr;
76 _assignBlock(cur);
77 break;
78 }
79
80 ::free(cur);
81 cur = prev;
82 } while (cur);
83
84 cur = next;
85 while (cur) {
86 next = cur->next;
87 ::free(cur);
88 cur = next;
89 }
90 }
91 else {
92 while (cur->prev)
93 cur = cur->prev;
94 _assignBlock(cur);
95 }
96 }
97
98 // ============================================================================
99 // [asmjit::Zone - Alloc]
100 // ============================================================================
101
102 void* Zone::_alloc(size_t size, size_t alignment) noexcept {
103 Block* curBlock = _block;
104 Block* next = curBlock->next;
105
106 size_t rawBlockAlignment = blockAlignment();
107 size_t minimumAlignment = Support::max<size_t>(alignment, rawBlockAlignment);
108
109 // If the `Zone` has been cleared the current block doesn't have to be the
110 // last one. Check if there is a block that can be used instead of allocating
111 // a new one. If there is a `next` block it's completely unused, we don't have
112 // to check for remaining bytes in that case.
113 if (next) {
114 uint8_t* ptr = Support::alignUp(next->data(), minimumAlignment);
115 uint8_t* end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
116
117 if (size <= (size_t)(end - ptr)) {
118 _block = next;
119 _ptr = ptr + size;
120 _end = Support::alignDown(next->data() + next->size, rawBlockAlignment);
121 return static_cast<void*>(ptr);
122 }
123 }
124
125 size_t blockAlignmentOverhead = alignment - Support::min<size_t>(alignment, Globals::kAllocAlignment);
126 size_t newSize = Support::max(blockSize(), size);
127
128 // Prevent arithmetic overflow.
129 if (ASMJIT_UNLIKELY(newSize > std::numeric_limits<size_t>::max() - kBlockSize - blockAlignmentOverhead))
130 return nullptr;
131
132 // Allocate new block - we add alignment overhead to `newSize`, which becomes the
133 // new block size, and we also add `kBlockOverhead` to the allocator as it includes
134 // members of `Zone::Block` structure.
135 newSize += blockAlignmentOverhead;
136 Block* newBlock = static_cast<Block*>(::malloc(newSize + kBlockSize));
137
138 if (ASMJIT_UNLIKELY(!newBlock))
139 return nullptr;
140
141 // Align the pointer to `minimumAlignment` and adjust the size of this block
142 // accordingly. It's the same as using `minimumAlignment - Support::alignUpDiff()`,
143 // just written differently.
144 {
145 newBlock->prev = nullptr;
146 newBlock->next = nullptr;
147 newBlock->size = newSize;
148
149 if (curBlock != &_zeroBlock) {
150 newBlock->prev = curBlock;
151 curBlock->next = newBlock;
152
153 // Does only happen if there is a next block, but the requested memory
154 // can't fit into it. In this case a new buffer is allocated and inserted
155 // between the current block and the next one.
156 if (next) {
157 newBlock->next = next;
158 next->prev = newBlock;
159 }
160 }
161
162 uint8_t* ptr = Support::alignUp(newBlock->data(), minimumAlignment);
163 uint8_t* end = Support::alignDown(newBlock->data() + newSize, rawBlockAlignment);
164
165 _ptr = ptr + size;
166 _end = end;
167 _block = newBlock;
168
169 ASMJIT_ASSERT(_ptr <= _end);
170 return static_cast<void*>(ptr);
171 }
172 }
173
174 void* Zone::allocZeroed(size_t size, size_t alignment) noexcept {
175 void* p = alloc(size, alignment);
176 if (ASMJIT_UNLIKELY(!p))
177 return p;
178 return memset(p, 0, size);
179 }
180
181 void* Zone::dup(const void* data, size_t size, bool nullTerminate) noexcept {
182 if (ASMJIT_UNLIKELY(!data || !size))
183 return nullptr;
184
185 ASMJIT_ASSERT(size != std::numeric_limits<size_t>::max());
186 uint8_t* m = allocT<uint8_t>(size + nullTerminate);
187 if (ASMJIT_UNLIKELY(!m)) return nullptr;
188
189 memcpy(m, data, size);
190 if (nullTerminate) m[size] = '\0';
191
192 return static_cast<void*>(m);
193 }
194
195 char* Zone::sformat(const char* fmt, ...) noexcept {
196 if (ASMJIT_UNLIKELY(!fmt))
197 return nullptr;
198
199 char buf[512];
200 size_t size;
201 va_list ap;
202
203 va_start(ap, fmt);
204 size = unsigned(vsnprintf(buf, ASMJIT_ARRAY_SIZE(buf) - 1, fmt, ap));
205 va_end(ap);
206
207 buf[size++] = 0;
208 return static_cast<char*>(dup(buf, size));
209 }
210
211 // ============================================================================
212 // [asmjit::ZoneAllocator - Helpers]
213 // ============================================================================
214
215 #if defined(ASMJIT_BUILD_DEBUG)
216 static bool ZoneAllocator_hasDynamicBlock(ZoneAllocator* self, ZoneAllocator::DynamicBlock* block) noexcept {
217 ZoneAllocator::DynamicBlock* cur = self->_dynamicBlocks;
218 while (cur) {
219 if (cur == block)
220 return true;
221 cur = cur->next;
222 }
223 return false;
224 }
225 #endif
226
227 // ============================================================================
228 // [asmjit::ZoneAllocator - Init / Reset]
229 // ============================================================================
230
231 void ZoneAllocator::reset(Zone* zone) noexcept {
232 // Free dynamic blocks.
233 DynamicBlock* block = _dynamicBlocks;
234 while (block) {
235 DynamicBlock* next = block->next;
236 ::free(block);
237 block = next;
238 }
239
240 // Zero the entire class and initialize to the given `zone`.
241 memset(this, 0, sizeof(*this));
242 _zone = zone;
243 }
244
245 // ============================================================================
246 // [asmjit::ZoneAllocator - Alloc / Release]
247 // ============================================================================
248
249 void* ZoneAllocator::_alloc(size_t size, size_t& allocatedSize) noexcept {
250 ASMJIT_ASSERT(isInitialized());
251
252 // Use the memory pool only if the requested block has a reasonable size.
253 uint32_t slot;
254 if (_getSlotIndex(size, slot, allocatedSize)) {
255 // Slot reuse.
256 uint8_t* p = reinterpret_cast<uint8_t*>(_slots[slot]);
257 size = allocatedSize;
258
259 if (p) {
260 _slots[slot] = reinterpret_cast<Slot*>(p)->next;
261 return p;
262 }
263
264 _zone->align(kBlockAlignment);
265 p = _zone->ptr();
266 size_t remain = (size_t)(_zone->end() - p);
267
268 if (ASMJIT_LIKELY(remain >= size)) {
269 _zone->setPtr(p + size);
270 return p;
271 }
272 else {
273 // Distribute the remaining memory to suitable slots, if possible.
274 if (remain >= kLoGranularity) {
275 do {
276 size_t distSize = Support::min<size_t>(remain, kLoMaxSize);
277 uint32_t distSlot = uint32_t((distSize - kLoGranularity) / kLoGranularity);
278 ASMJIT_ASSERT(distSlot < kLoCount);
279
280 reinterpret_cast<Slot*>(p)->next = _slots[distSlot];
281 _slots[distSlot] = reinterpret_cast<Slot*>(p);
282
283 p += distSize;
284 remain -= distSize;
285 } while (remain >= kLoGranularity);
286 _zone->setPtr(p);
287 }
288
289 p = static_cast<uint8_t*>(_zone->_alloc(size, kBlockAlignment));
290 if (ASMJIT_UNLIKELY(!p)) {
291 allocatedSize = 0;
292 return nullptr;
293 }
294
295 return p;
296 }
297 }
298 else {
299 // Allocate a dynamic block.
300 size_t kBlockOverhead = sizeof(DynamicBlock) + sizeof(DynamicBlock*) + kBlockAlignment;
301
302 // Handle a possible overflow.
303 if (ASMJIT_UNLIKELY(kBlockOverhead >= std::numeric_limits<size_t>::max() - size))
304 return nullptr;
305
306 void* p = ::malloc(size + kBlockOverhead);
307 if (ASMJIT_UNLIKELY(!p)) {
308 allocatedSize = 0;
309 return nullptr;
310 }
311
312 // Link as first in `_dynamicBlocks` double-linked list.
313 DynamicBlock* block = static_cast<DynamicBlock*>(p);
314 DynamicBlock* next = _dynamicBlocks;
315
316 if (next)
317 next->prev = block;
318
319 block->prev = nullptr;
320 block->next = next;
321 _dynamicBlocks = block;
322
323 // Align the pointer to the guaranteed alignment and store `DynamicBlock`
324 // at the beginning of the memory block, so `_releaseDynamic()` can find it.
325 p = Support::alignUp(static_cast<uint8_t*>(p) + sizeof(DynamicBlock) + sizeof(DynamicBlock*), kBlockAlignment);
326 reinterpret_cast<DynamicBlock**>(p)[-1] = block;
327
328 allocatedSize = size;
329 return p;
330 }
331 }
332
333 void* ZoneAllocator::_allocZeroed(size_t size, size_t& allocatedSize) noexcept {
334 ASMJIT_ASSERT(isInitialized());
335
336 void* p = _alloc(size, allocatedSize);
337 if (ASMJIT_UNLIKELY(!p)) return p;
338 return memset(p, 0, allocatedSize);
339 }
340
341 void ZoneAllocator::_releaseDynamic(void* p, size_t size) noexcept {
342 ASMJIT_UNUSED(size);
343 ASMJIT_ASSERT(isInitialized());
344
345 // Pointer to `DynamicBlock` is stored at [-1].
346 DynamicBlock* block = reinterpret_cast<DynamicBlock**>(p)[-1];
347 ASMJIT_ASSERT(ZoneAllocator_hasDynamicBlock(this, block));
348
349 // Unlink and free.
350 DynamicBlock* prev = block->prev;
351 DynamicBlock* next = block->next;
352
353 if (prev)
354 prev->next = next;
355 else
356 _dynamicBlocks = next;
357
358 if (next)
359 next->prev = prev;
360
361 ::free(block);
362 }
363
364 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONE_H
7 #define _ASMJIT_CORE_ZONE_H
8
9 #include "../core/support.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_zone
14 //! \{
15
16 // ============================================================================
17 // [asmjit::Zone]
18 // ============================================================================
19
20 //! Zone memory.
21 //!
22 //! Zone is an incremental memory allocator that allocates memory by simply
23 //! incrementing a pointer. It allocates blocks of memory by using C's `malloc()`,
24 //! but divides these blocks into smaller segments requested by calling
25 //! `Zone::alloc()` and friends.
26 //!
27 //! Zone has no function to release the allocated memory. It has to be released
28 //! all at once by calling `reset()`. If you need a more friendly allocator that
29 //! also supports `release()`, consider using `Zone` with `ZoneAllocator`.
30 class Zone {
31 public:
32 ASMJIT_NONCOPYABLE(Zone)
33
34 //! \cond INTERNAL
35
36 //! A single block of memory managed by `Zone`.
37 struct Block {
38 inline uint8_t* data() const noexcept {
39 return const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(this) + sizeof(*this));
40 }
41
42 //! Link to the previous block.
43 Block* prev;
44 //! Link to the next block.
45 Block* next;
46 //! Size of the block.
47 size_t size;
48 };
49
50 enum Limits : size_t {
51 kBlockSize = sizeof(Block),
52 kBlockOverhead = Globals::kAllocOverhead + kBlockSize,
53
54 kMinBlockSize = 64, // The number is ridiculously small, but still possible.
55 kMaxBlockSize = size_t(1) << (sizeof(size_t) * 8 - 4 - 1),
56 kMinAlignment = 1,
57 kMaxAlignment = 64
58 };
59
60 //! Pointer in the current block.
61 uint8_t* _ptr;
62 //! End of the current block.
63 uint8_t* _end;
64 //! Current block.
65 Block* _block;
66
67 union {
68 struct {
69 //! Default block size.
70 size_t _blockSize : Support::bitSizeOf<size_t>() - 4;
71 //! First block is temporary (ZoneTmp).
72 size_t _isTemporary : 1;
73 //! Block alignment (1 << alignment).
74 size_t _blockAlignmentShift : 3;
75 };
76 size_t _packedData;
77 };
78
79 static ASMJIT_API const Block _zeroBlock;
80
81 //! \endcond
82
83 //! \name Construction & Destruction
84 //! \{
85
86 //! Creates a new Zone.
87 //!
88 //! The `blockSize` parameter describes the default size of the block. If the
89 //! `size` parameter passed to `alloc()` is greater than the default size
90 //! `Zone` will allocate and use a larger block, but it will not change the
91 //! default `blockSize`.
92 //!
93 //! It's not required, but it's good practice to set `blockSize` to a
94 //! reasonable value that depends on the usage of `Zone`. Greater block sizes
95 //! are generally safer and perform better than unreasonably low block sizes.
96 ASMJIT_INLINE explicit Zone(size_t blockSize, size_t blockAlignment = 1) noexcept {
97 _init(blockSize, blockAlignment, nullptr);
98 }
99
100 ASMJIT_INLINE Zone(size_t blockSize, size_t blockAlignment, const Support::Temporary& temporary) noexcept {
101 _init(blockSize, blockAlignment, &temporary);
102 }
103
104 //! Moves an existing `Zone`.
105 //!
106 //! \note You cannot move an existing `ZoneTmp` as it uses embedded storage.
107 //! Attempting to move `ZoneTmp` would result in assertion failure in debug
108 //! mode and undefined behavior in release mode.
109 ASMJIT_INLINE Zone(Zone&& other) noexcept
110 : _ptr(other._ptr),
111 _end(other._end),
112 _block(other._block),
113 _packedData(other._packedData) {
114 ASMJIT_ASSERT(!other.isTemporary());
115 other._block = const_cast<Block*>(&_zeroBlock);
116 other._ptr = other._block->data();
117 other._end = other._block->data();
118 }
119
120 //! Destroys the `Zone` instance.
121 //!
122 //! This will destroy the `Zone` instance and release all blocks of memory
123 //! allocated by it. It performs implicit `reset(Globals::kResetHard)`.
124 ASMJIT_INLINE ~Zone() noexcept { reset(Globals::kResetHard); }
125
126 ASMJIT_API void _init(size_t blockSize, size_t blockAlignment, const Support::Temporary* temporary) noexcept;
127
128 //! Resets the `Zone` invalidating all blocks allocated.
129 //!
130 //! See `Globals::ResetPolicy` for more details.
131 ASMJIT_API void reset(uint32_t resetPolicy = Globals::kResetSoft) noexcept;
132
133 //! \}
134
135 //! \name Accessors
136 //! \{
137
138 //! Tests whether this `Zone` is actually a `ZoneTmp` that uses temporary memory.
139 ASMJIT_INLINE bool isTemporary() const noexcept { return _isTemporary != 0; }
140
141 //! Returns the default block size.
142 ASMJIT_INLINE size_t blockSize() const noexcept { return _blockSize; }
143 //! Returns the default block alignment.
144 ASMJIT_INLINE size_t blockAlignment() const noexcept { return size_t(1) << _blockAlignmentShift; }
145 //! Returns remaining size of the current block.
146 ASMJIT_INLINE size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
147
148 //! Returns the current zone cursor (dangerous).
149 //!
150 //! This is a function that can be used to get exclusive access to the current
151 //! block's memory buffer.
152 template<typename T = uint8_t>
153 ASMJIT_INLINE T* ptr() noexcept { return reinterpret_cast<T*>(_ptr); }
154
155 //! Returns the end of the current zone block, only useful if you use `ptr()`.
156 template<typename T = uint8_t>
157 ASMJIT_INLINE T* end() noexcept { return reinterpret_cast<T*>(_end); }
158
159 //! Sets the current zone pointer to `ptr` (must be within the current block).
160 template<typename T>
161 ASMJIT_INLINE void setPtr(T* ptr) noexcept {
162 uint8_t* p = reinterpret_cast<uint8_t*>(ptr);
163 ASMJIT_ASSERT(p >= _ptr && p <= _end);
164 _ptr = p;
165 }
166
167 //! Sets the end zone pointer to `end` (must be within the current block).
168 template<typename T>
169 ASMJIT_INLINE void setEnd(T* end) noexcept {
170 uint8_t* p = reinterpret_cast<uint8_t*>(end);
171 ASMJIT_ASSERT(p >= _ptr && p <= _end);
172 _end = p;
173 }
174
175 //! \}
176
177 //! \name Utilities
178 //! \{
179
180 ASMJIT_INLINE void swap(Zone& other) noexcept {
181 // This could lead to a disaster.
182 ASMJIT_ASSERT(!this->isTemporary());
183 ASMJIT_ASSERT(!other.isTemporary());
184
185 std::swap(_ptr, other._ptr);
186 std::swap(_end, other._end);
187 std::swap(_block, other._block);
188 std::swap(_packedData, other._packedData);
189 }
190
191 //! Aligns the current pointer to `alignment`.
192 ASMJIT_INLINE void align(size_t alignment) noexcept {
193 _ptr = Support::min(Support::alignUp(_ptr, alignment), _end);
194 }
195
196 //! Ensures the remaining size is at least equal or greater than `size`.
197 //!
198 //! \note This function doesn't respect any alignment. If you need to ensure
199 //! there is enough room for an aligned allocation you need to call `align()`
200 //! before calling `ensure()`.
201 ASMJIT_INLINE Error ensure(size_t size) noexcept {
202 if (size <= remainingSize())
203 return kErrorOk;
204 else
205 return _alloc(0, 1) ? kErrorOk : DebugUtils::errored(kErrorOutOfMemory);
206 }
207
208 ASMJIT_INLINE void _assignBlock(Block* block) noexcept {
209 size_t alignment = blockAlignment();
210 _ptr = Support::alignUp(block->data(), alignment);
211 _end = Support::alignDown(block->data() + block->size, alignment);
212 _block = block;
213 }
214
215 ASMJIT_INLINE void _assignZeroBlock() noexcept {
216 Block* block = const_cast<Block*>(&_zeroBlock);
217 _ptr = block->data();
218 _end = block->data();
219 _block = block;
220 }
221
222 //! \}
223
224 //! \name Allocation
225 //! \{
226
227 //! Allocates the requested memory specified by `size`.
228 //!
229 //! Pointer returned is valid until the `Zone` instance is destroyed or reset
230 //! by calling `reset()`. If you plan to make an instance of C++ from the
231 //! given pointer use placement `new` and `delete` operators:
232 //!
233 //! ```
234 //! using namespace asmjit;
235 //!
236 //! class Object { ... };
237 //!
238 //! // Create Zone with default block size of approximately 65536 bytes.
239 //! Zone zone(65536 - Zone::kBlockOverhead);
240 //!
241 //! // Create your objects using zone object allocating, for example:
242 //! Object* obj = static_cast<Object*>( zone.alloc(sizeof(Object)) );
243 //!
244 //! if (!obj) {
245 //! // Handle out of memory error.
246 //! }
247 //!
248 //! // Placement `new` and `delete` operators can be used to instantiate it.
249 //! new(obj) Object();
250 //!
251 //! // ... lifetime of your objects ...
252 //!
253 //! // To destroy the instance (if required).
254 //! obj->~Object();
255 //!
256 //! // Reset or destroy `Zone`.
257 //! zone.reset();
258 //! ```
259 ASMJIT_INLINE void* alloc(size_t size) noexcept {
260 if (ASMJIT_UNLIKELY(size > remainingSize()))
261 return _alloc(size, 1);
262
263 uint8_t* ptr = _ptr;
264 _ptr += size;
265 return static_cast<void*>(ptr);
266 }
267
268 //! Allocates the requested memory specified by `size` and `alignment`.
269 ASMJIT_INLINE void* alloc(size_t size, size_t alignment) noexcept {
270 ASMJIT_ASSERT(Support::isPowerOf2(alignment));
271 uint8_t* ptr = Support::alignUp(_ptr, alignment);
272
273 if (ptr >= _end || size > (size_t)(_end - ptr))
274 return _alloc(size, alignment);
275
276 _ptr = ptr + size;
277 return static_cast<void*>(ptr);
278 }
279
280 //! Allocates the requested memory specified by `size` without doing any checks.
281 //!
282 //! Can only be called if `remainingSize()` returns size at least equal to `size`.
283 ASMJIT_INLINE void* allocNoCheck(size_t size) noexcept {
284 ASMJIT_ASSERT(remainingSize() >= size);
285
286 uint8_t* ptr = _ptr;
287 _ptr += size;
288 return static_cast<void*>(ptr);
289 }
290
291 //! Allocates the requested memory specified by `size` and `alignment` without doing any checks.
292 //!
293 //! Performs the same operation as `Zone::allocNoCheck(size)` with `alignment` applied.
294 ASMJIT_INLINE void* allocNoCheck(size_t size, size_t alignment) noexcept {
295 ASMJIT_ASSERT(Support::isPowerOf2(alignment));
296
297 uint8_t* ptr = Support::alignUp(_ptr, alignment);
298 ASMJIT_ASSERT(size <= (size_t)(_end - ptr));
299
300 _ptr = ptr + size;
301 return static_cast<void*>(ptr);
302 }
303
304 //! Allocates `size` bytes of zeroed memory. See `alloc()` for more details.
305 ASMJIT_API void* allocZeroed(size_t size, size_t alignment = 1) noexcept;
306
307 //! Like `alloc()`, but the return pointer is casted to `T*`.
308 template<typename T>
309 ASMJIT_INLINE T* allocT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
310 return static_cast<T*>(alloc(size, alignment));
311 }
312
313 //! Like `allocNoCheck()`, but the return pointer is casted to `T*`.
314 template<typename T>
315 ASMJIT_INLINE T* allocNoCheckT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
316 return static_cast<T*>(allocNoCheck(size, alignment));
317 }
318
319 //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
320 template<typename T>
321 ASMJIT_INLINE T* allocZeroedT(size_t size = sizeof(T), size_t alignment = alignof(T)) noexcept {
322 return static_cast<T*>(allocZeroed(size, alignment));
323 }
324
325 //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
326 template<typename T>
327 ASMJIT_INLINE T* newT() noexcept {
328 void* p = alloc(sizeof(T), alignof(T));
329 if (ASMJIT_UNLIKELY(!p))
330 return nullptr;
331 return new(p) T();
332 }
333
334 //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
335 template<typename T, typename... Args>
336 ASMJIT_INLINE T* newT(Args&&... args) noexcept {
337 void* p = alloc(sizeof(T), alignof(T));
338 if (ASMJIT_UNLIKELY(!p))
339 return nullptr;
340 return new(p) T(std::forward<Args>(args)...);
341 }
342
343 //! \cond INTERNAL
344 //!
345 //! Internal alloc function used by other inlines.
346 ASMJIT_API void* _alloc(size_t size, size_t alignment) noexcept;
347 //! \endcond
348
349 //! Helper to duplicate data.
350 ASMJIT_API void* dup(const void* data, size_t size, bool nullTerminate = false) noexcept;
351
352 //! Helper to duplicate data.
353 ASMJIT_INLINE void* dupAligned(const void* data, size_t size, size_t alignment, bool nullTerminate = false) noexcept {
354 align(alignment);
355 return dup(data, size, nullTerminate);
356 }
357
358 //! Helper to duplicate a formatted string, maximum size is 256 bytes.
359 ASMJIT_API char* sformat(const char* str, ...) noexcept;
360
361 //! \}
362 };
363
364 // ============================================================================
365 // [b2d::ZoneTmp]
366 // ============================================================================
367
368 template<size_t N>
369 class ZoneTmp : public Zone {
370 public:
371 ASMJIT_NONCOPYABLE(ZoneTmp<N>)
372
373 struct Storage {
374 char data[N];
375 } _storage;
376
377 ASMJIT_INLINE explicit ZoneTmp(size_t blockSize, size_t blockAlignment = 1) noexcept
378 : Zone(blockSize, blockAlignment, Support::Temporary(_storage.data, N)) {}
379 };
380
381 // ============================================================================
382 // [asmjit::ZoneAllocator]
383 // ============================================================================
384
385 //! Zone-based memory allocator that uses an existing `Zone` and provides a
386 //! `release()` functionality on top of it. It uses `Zone` only for chunks
387 //! that can be pooled, and uses libc `malloc()` for chunks that are large.
388 //!
389 //! The advantage of ZoneAllocator is that it can allocate small chunks of memory
390 //! really fast, and these chunks, when released, will be reused by consecutive
391 //! calls to `alloc()`. Also, since ZoneAllocator uses `Zone`, you can turn any
392 //! `Zone` into a `ZoneAllocator`, and use it in your `Pass` when necessary.
393 //!
394 //! ZoneAllocator is used by AsmJit containers to make containers having only
395 //! few elements fast (and lightweight) and to allow them to grow and use
396 //! dynamic blocks when require more storage.
397 class ZoneAllocator {
398 public:
399 ASMJIT_NONCOPYABLE(ZoneAllocator)
400
401 //! \cond INTERNAL
402 enum {
403 // In short, we pool chunks of these sizes:
404 // [32, 64, 96, 128, 192, 256, 320, 384, 448, 512]
405
406 //! How many bytes per a low granularity pool (has to be at least 16).
407 kLoGranularity = 32,
408 //! Number of slots of a low granularity pool.
409 kLoCount = 4,
410 //! Maximum size of a block that can be allocated in a low granularity pool.
411 kLoMaxSize = kLoGranularity * kLoCount,
412
413 //! How many bytes per a high granularity pool.
414 kHiGranularity = 64,
415 //! Number of slots of a high granularity pool.
416 kHiCount = 6,
417 //! Maximum size of a block that can be allocated in a high granularity pool.
418 kHiMaxSize = kLoMaxSize + kHiGranularity * kHiCount,
419
420 //! Alignment of every pointer returned by `alloc()`.
421 kBlockAlignment = kLoGranularity
422 };
423
424 //! Single-linked list used to store unused chunks.
425 struct Slot {
426 //! Link to a next slot in a single-linked list.
427 Slot* next;
428 };
429
430 //! A block of memory that has been allocated dynamically and is not part of
431 //! block-list used by the allocator. This is used to keep track of all these
432 //! blocks so they can be freed by `reset()` if not freed explicitly.
433 struct DynamicBlock {
434 DynamicBlock* prev;
435 DynamicBlock* next;
436 };
437
438 //! \endcond
439
440 //! Zone used to allocate memory that fits into slots.
441 Zone* _zone;
442 //! Indexed slots containing released memory.
443 Slot* _slots[kLoCount + kHiCount];
444 //! Dynamic blocks for larger allocations (no slots).
445 DynamicBlock* _dynamicBlocks;
446
447 //! \name Construction & Destruction
448 //! \{
449
450 //! Creates a new `ZoneAllocator`.
451 //!
452 //! \note To use it, you must first `init()` it.
453 inline ZoneAllocator() noexcept {
454 memset(this, 0, sizeof(*this));
455 }
456
457 //! Creates a new `ZoneAllocator` initialized to use `zone`.
458 inline explicit ZoneAllocator(Zone* zone) noexcept {
459 memset(this, 0, sizeof(*this));
460 _zone = zone;
461 }
462
463 //! Destroys the `ZoneAllocator`.
464 inline ~ZoneAllocator() noexcept { reset(); }
465
466 //! Tests whether the `ZoneAllocator` is initialized (i.e. has `Zone`).
467 inline bool isInitialized() const noexcept { return _zone != nullptr; }
468
469 //! Convenience function to initialize the `ZoneAllocator` with `zone`.
470 //!
471 //! It's the same as calling `reset(zone)`.
472 inline void init(Zone* zone) noexcept { reset(zone); }
473
474 //! Resets this `ZoneAllocator` and also forget about the current `Zone` which
475 //! is attached (if any). Reset optionally attaches a new `zone` passed, or
476 //! keeps the `ZoneAllocator` in an uninitialized state, if `zone` is null.
477 ASMJIT_API void reset(Zone* zone = nullptr) noexcept;
478
479 //! \}
480
481 //! \name Accessors
482 //! \{
483
484 //! Returns the assigned `Zone` of this allocator or null if this `ZoneAllocator`
485 //! is not initialized.
486 inline Zone* zone() const noexcept { return _zone; }
487
488 //! \}
489
490 //! \cond
491 //! \name Internals
492 //! \{
493
494 //! Returns the slot index to be used for `size`. Returns `true` if a valid slot
495 //! has been written to `slot` and `allocatedSize` has been filled with slot
496 //! exact size (`allocatedSize` can be equal or slightly greater than `size`).
497 static ASMJIT_INLINE bool _getSlotIndex(size_t size, uint32_t& slot) noexcept {
498 ASMJIT_ASSERT(size > 0);
499 if (size > kHiMaxSize)
500 return false;
501
502 if (size <= kLoMaxSize)
503 slot = uint32_t((size - 1) / kLoGranularity);
504 else
505 slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
506
507 return true;
508 }
509
510 //! \overload
511 static ASMJIT_INLINE bool _getSlotIndex(size_t size, uint32_t& slot, size_t& allocatedSize) noexcept {
512 ASMJIT_ASSERT(size > 0);
513 if (size > kHiMaxSize)
514 return false;
515
516 if (size <= kLoMaxSize) {
517 slot = uint32_t((size - 1) / kLoGranularity);
518 allocatedSize = Support::alignUp(size, kLoGranularity);
519 }
520 else {
521 slot = uint32_t((size - kLoMaxSize - 1) / kHiGranularity) + kLoCount;
522 allocatedSize = Support::alignUp(size, kHiGranularity);
523 }
524
525 return true;
526 }
527
528 //! \}
529 //! \endcond
530
531 //! \name Allocation
532 //! \{
533
534 //! \cond INTERNAL
535 ASMJIT_API void* _alloc(size_t size, size_t& allocatedSize) noexcept;
536 ASMJIT_API void* _allocZeroed(size_t size, size_t& allocatedSize) noexcept;
537 ASMJIT_API void _releaseDynamic(void* p, size_t size) noexcept;
538 //! \endcond
539
540 //! Allocates `size` bytes of memory, ideally from an available pool.
541 //!
542 //! \note `size` can't be zero, it will assert in debug mode in such case.
543 inline void* alloc(size_t size) noexcept {
544 ASMJIT_ASSERT(isInitialized());
545 size_t allocatedSize;
546 return _alloc(size, allocatedSize);
547 }
548
549 //! Like `alloc(size)`, but provides a second argument `allocatedSize` that
550 //! provides a way to know how big the block returned actually is. This is
551 //! useful for containers to prevent growing too early.
552 inline void* alloc(size_t size, size_t& allocatedSize) noexcept {
553 ASMJIT_ASSERT(isInitialized());
554 return _alloc(size, allocatedSize);
555 }
556
557 //! Like `alloc()`, but the return pointer is casted to `T*`.
558 template<typename T>
559 inline T* allocT(size_t size = sizeof(T)) noexcept {
560 return static_cast<T*>(alloc(size));
561 }
562
563 //! Like `alloc(size)`, but returns zeroed memory.
564 inline void* allocZeroed(size_t size) noexcept {
565 ASMJIT_ASSERT(isInitialized());
566 size_t allocatedSize;
567 return _allocZeroed(size, allocatedSize);
568 }
569
570 //! Like `alloc(size, allocatedSize)`, but returns zeroed memory.
571 inline void* allocZeroed(size_t size, size_t& allocatedSize) noexcept {
572 ASMJIT_ASSERT(isInitialized());
573 return _allocZeroed(size, allocatedSize);
574 }
575
576 //! Like `allocZeroed()`, but the return pointer is casted to `T*`.
577 template<typename T>
578 inline T* allocZeroedT(size_t size = sizeof(T)) noexcept {
579 return static_cast<T*>(allocZeroed(size));
580 }
581
582 //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
583 template<typename T>
584 inline T* newT() noexcept {
585 void* p = allocT<T>();
586 if (ASMJIT_UNLIKELY(!p))
587 return nullptr;
588 return new(p) T();
589 }
590 //! Like `new(std::nothrow) T(...)`, but allocated by `Zone`.
591 template<typename T, typename... Args>
592 inline T* newT(Args&&... args) noexcept {
593 void* p = allocT<T>();
594 if (ASMJIT_UNLIKELY(!p))
595 return nullptr;
596 return new(p) T(std::forward<Args>(args)...);
597 }
598
599 //! Releases the memory previously allocated by `alloc()`. The `size` argument
600 //! has to be the same as used to call `alloc()` or `allocatedSize` returned
601 //! by `alloc()`.
602 inline void release(void* p, size_t size) noexcept {
603 ASMJIT_ASSERT(isInitialized());
604 ASMJIT_ASSERT(p != nullptr);
605 ASMJIT_ASSERT(size != 0);
606
607 uint32_t slot;
608 if (_getSlotIndex(size, slot)) {
609 static_cast<Slot*>(p)->next = static_cast<Slot*>(_slots[slot]);
610 _slots[slot] = static_cast<Slot*>(p);
611 }
612 else {
613 _releaseDynamic(p, size);
614 }
615 }
616
617 //! \}
618 };
619
620 //! \}
621
622 ASMJIT_END_NAMESPACE
623
624 #endif // _ASMJIT_CORE_ZONE_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/support.h"
8 #include "../core/zone.h"
9 #include "../core/zonehash.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [asmjit::ZoneHashBase - Helpers]
15 // ============================================================================
16
17 #define ASMJIT_POPULATE_PRIMES(ENTRY) \
18 ENTRY(2 , 0x80000000, 32), /* [N * 0x80000000 >> 32] (rcp=2147483648) */ \
19 ENTRY(11 , 0xBA2E8BA3, 35), /* [N * 0xBA2E8BA3 >> 35] (rcp=3123612579) */ \
20 ENTRY(29 , 0x8D3DCB09, 36), /* [N * 0x8D3DCB09 >> 36] (rcp=2369637129) */ \
21 ENTRY(41 , 0xC7CE0C7D, 37), /* [N * 0xC7CE0C7D >> 37] (rcp=3352169597) */ \
22 ENTRY(59 , 0x8AD8F2FC, 37), /* [N * 0x8AD8F2FC >> 37] (rcp=2329473788) */ \
23 ENTRY(83 , 0xC565C87C, 38), /* [N * 0xC565C87C >> 38] (rcp=3311782012) */ \
24 ENTRY(131 , 0xFA232CF3, 39), /* [N * 0xFA232CF3 >> 39] (rcp=4196609267) */ \
25 ENTRY(191 , 0xAB8F69E3, 39), /* [N * 0xAB8F69E3 >> 39] (rcp=2878302691) */ \
26 ENTRY(269 , 0xF3A0D52D, 40), /* [N * 0xF3A0D52D >> 40] (rcp=4087403821) */ \
27 ENTRY(383 , 0xAB1CBDD4, 40), /* [N * 0xAB1CBDD4 >> 40] (rcp=2870787540) */ \
28 ENTRY(541 , 0xF246FACC, 41), /* [N * 0xF246FACC >> 41] (rcp=4064737996) */ \
29 ENTRY(757 , 0xAD2589A4, 41), /* [N * 0xAD2589A4 >> 41] (rcp=2904918436) */ \
30 ENTRY(1061 , 0xF7129426, 42), /* [N * 0xF7129426 >> 42] (rcp=4145189926) */ \
31 ENTRY(1499 , 0xAEE116B7, 42), /* [N * 0xAEE116B7 >> 42] (rcp=2933986999) */ \
32 ENTRY(2099 , 0xF9C7A737, 43), /* [N * 0xF9C7A737 >> 43] (rcp=4190611255) */ \
33 ENTRY(2939 , 0xB263D25C, 43), /* [N * 0xB263D25C >> 43] (rcp=2992886364) */ \
34 ENTRY(4111 , 0xFF10E02E, 44), /* [N * 0xFF10E02E >> 44] (rcp=4279296046) */ \
35 ENTRY(5779 , 0xB5722823, 44), /* [N * 0xB5722823 >> 44] (rcp=3044157475) */ \
36 ENTRY(8087 , 0x81A97405, 44), /* [N * 0x81A97405 >> 44] (rcp=2175366149) */ \
37 ENTRY(11321 , 0xB93E91DB, 45), /* [N * 0xB93E91DB >> 45] (rcp=3107885531) */ \
38 ENTRY(15859 , 0x843CC26B, 45), /* [N * 0x843CC26B >> 45] (rcp=2218574443) */ \
39 ENTRY(22189 , 0xBD06B9EA, 46), /* [N * 0xBD06B9EA >> 46] (rcp=3171334634) */ \
40 ENTRY(31051 , 0x8713F186, 46), /* [N * 0x8713F186 >> 46] (rcp=2266231174) */ \
41 ENTRY(43451 , 0xC10F1CB9, 47), /* [N * 0xC10F1CB9 >> 47] (rcp=3238993081) */ \
42 ENTRY(60869 , 0x89D06A86, 47), /* [N * 0x89D06A86 >> 47] (rcp=2312137350) */ \
43 ENTRY(85159 , 0xC502AF3B, 48), /* [N * 0xC502AF3B >> 48] (rcp=3305287483) */ \
44 ENTRY(102107 , 0xA44F65AE, 48), /* [N * 0xA44F65AE >> 48] (rcp=2756666798) */ \
45 ENTRY(122449 , 0x89038F77, 48), /* [N * 0x89038F77 >> 48] (rcp=2298711927) */ \
46 ENTRY(146819 , 0xE48AF7E9, 49), /* [N * 0xE48AF7E9 >> 49] (rcp=3834312681) */ \
47 ENTRY(176041 , 0xBE9B145B, 49), /* [N * 0xBE9B145B >> 49] (rcp=3197834331) */ \
48 ENTRY(211073 , 0x9EF882BA, 49), /* [N * 0x9EF882BA >> 49] (rcp=2667086522) */ \
49 ENTRY(253081 , 0x849571AB, 49), /* [N * 0x849571AB >> 49] (rcp=2224386475) */ \
50 ENTRY(303469 , 0xDD239C97, 50), /* [N * 0xDD239C97 >> 50] (rcp=3710098583) */ \
51 ENTRY(363887 , 0xB86C196D, 50), /* [N * 0xB86C196D >> 50] (rcp=3094092141) */ \
52 ENTRY(436307 , 0x99CFA4E9, 50), /* [N * 0x99CFA4E9 >> 50] (rcp=2580522217) */ \
53 ENTRY(523177 , 0x804595C0, 50), /* [N * 0x804595C0 >> 50] (rcp=2152043968) */ \
54 ENTRY(627293 , 0xD5F69FCF, 51), /* [N * 0xD5F69FCF >> 51] (rcp=3589709775) */ \
55 ENTRY(752177 , 0xB27063BA, 51), /* [N * 0xB27063BA >> 51] (rcp=2993710010) */ \
56 ENTRY(901891 , 0x94D170AC, 51), /* [N * 0x94D170AC >> 51] (rcp=2496753836) */ \
57 ENTRY(1081369 , 0xF83C9767, 52), /* [N * 0xF83C9767 >> 52] (rcp=4164720487) */ \
58 ENTRY(1296563 , 0xCF09435D, 52), /* [N * 0xCF09435D >> 52] (rcp=3473490781) */ \
59 ENTRY(1554583 , 0xACAC7198, 52), /* [N * 0xACAC7198 >> 52] (rcp=2896982424) */ \
60 ENTRY(1863971 , 0x90033EE3, 52), /* [N * 0x90033EE3 >> 52] (rcp=2416131811) */ \
61 ENTRY(2234923 , 0xF0380EBD, 53), /* [N * 0xF0380EBD >> 53] (rcp=4030205629) */ \
62 ENTRY(2679673 , 0xC859731E, 53), /* [N * 0xC859731E >> 53] (rcp=3361305374) */ \
63 ENTRY(3212927 , 0xA718DE27, 53), /* [N * 0xA718DE27 >> 53] (rcp=2803424807) */ \
64 ENTRY(3852301 , 0x8B5D1B4B, 53), /* [N * 0x8B5D1B4B >> 53] (rcp=2338134859) */ \
65 ENTRY(4618921 , 0xE8774804, 54), /* [N * 0xE8774804 >> 54] (rcp=3900131332) */ \
66 ENTRY(5076199 , 0xD386574E, 54), /* [N * 0xD386574E >> 54] (rcp=3548796750) */ \
67 ENTRY(5578757 , 0xC0783FE1, 54), /* [N * 0xC0783FE1 >> 54] (rcp=3229106145) */ \
68 ENTRY(6131057 , 0xAF21B08F, 54), /* [N * 0xAF21B08F >> 54] (rcp=2938220687) */ \
69 ENTRY(6738031 , 0x9F5AFD6E, 54), /* [N * 0x9F5AFD6E >> 54] (rcp=2673540462) */ \
70 ENTRY(7405163 , 0x90FFC3B9, 54), /* [N * 0x90FFC3B9 >> 54] (rcp=2432680889) */ \
71 ENTRY(8138279 , 0x83EFECFC, 54), /* [N * 0x83EFECFC >> 54] (rcp=2213539068) */ \
72 ENTRY(8943971 , 0xF01AA2EF, 55), /* [N * 0xF01AA2EF >> 55] (rcp=4028277487) */ \
73 ENTRY(9829447 , 0xDA7979B2, 55), /* [N * 0xDA7979B2 >> 55] (rcp=3665394098) */ \
74 ENTRY(10802581 , 0xC6CB2771, 55), /* [N * 0xC6CB2771 >> 55] (rcp=3335202673) */ \
75 ENTRY(11872037 , 0xB4E2C7DD, 55), /* [N * 0xB4E2C7DD >> 55] (rcp=3034761181) */ \
76 ENTRY(13047407 , 0xA4974124, 55), /* [N * 0xA4974124 >> 55] (rcp=2761376036) */ \
77 ENTRY(14339107 , 0x95C39CF1, 55), /* [N * 0x95C39CF1 >> 55] (rcp=2512624881) */ \
78 ENTRY(15758737 , 0x8845C763, 55), /* [N * 0x8845C763 >> 55] (rcp=2286274403) */ \
79 ENTRY(17318867 , 0xF7FE593F, 56), /* [N * 0xF7FE593F >> 56] (rcp=4160641343) */ \
80 ENTRY(19033439 , 0xE1A75D93, 56), /* [N * 0xE1A75D93 >> 56] (rcp=3785842067) */ \
81 ENTRY(20917763 , 0xCD5389B3, 56), /* [N * 0xCD5389B3 >> 56] (rcp=3444804019) */ \
82 ENTRY(22988621 , 0xBAD4841A, 56), /* [N * 0xBAD4841A >> 56] (rcp=3134489626) */ \
83 ENTRY(25264543 , 0xA9FFF2FF, 56), /* [N * 0xA9FFF2FF >> 56] (rcp=2852123391) */ \
84 ENTRY(27765763 , 0x9AAF8BF3, 56), /* [N * 0x9AAF8BF3 >> 56] (rcp=2595195891) */ \
85 ENTRY(30514607 , 0x8CC04E18, 56), /* [N * 0x8CC04E18 >> 56] (rcp=2361413144) */ \
86 ENTRY(33535561 , 0x80127068, 56), /* [N * 0x80127068 >> 56] (rcp=2148692072) */ \
87 ENTRY(36855587 , 0xE911F0BB, 57), /* [N * 0xE911F0BB >> 57] (rcp=3910267067) */ \
88 ENTRY(38661533 , 0xDE2ED7BE, 57), /* [N * 0xDE2ED7BE >> 57] (rcp=3727611838) */ \
89 ENTRY(40555961 , 0xD3CDF2FD, 57), /* [N * 0xD3CDF2FD >> 57] (rcp=3553489661) */ \
90 ENTRY(42543269 , 0xC9E9196C, 57), /* [N * 0xC9E9196C >> 57] (rcp=3387496812) */ \
91 ENTRY(44627909 , 0xC07A9EB6, 57), /* [N * 0xC07A9EB6 >> 57] (rcp=3229261494) */ \
92 ENTRY(46814687 , 0xB77CEF65, 57), /* [N * 0xB77CEF65 >> 57] (rcp=3078418277) */ \
93 ENTRY(49108607 , 0xAEEAC65C, 57), /* [N * 0xAEEAC65C >> 57] (rcp=2934621788) */ \
94 ENTRY(51514987 , 0xA6BF0EF0, 57), /* [N * 0xA6BF0EF0 >> 57] (rcp=2797539056) */ \
95 ENTRY(54039263 , 0x9EF510B5, 57), /* [N * 0x9EF510B5 >> 57] (rcp=2666860725) */ \
96 ENTRY(56687207 , 0x97883B42, 57), /* [N * 0x97883B42 >> 57] (rcp=2542287682) */ \
97 ENTRY(59464897 , 0x907430ED, 57), /* [N * 0x907430ED >> 57] (rcp=2423533805) */ \
98 ENTRY(62378699 , 0x89B4CA91, 57), /* [N * 0x89B4CA91 >> 57] (rcp=2310326929) */ \
99 ENTRY(65435273 , 0x83461568, 57), /* [N * 0x83461568 >> 57] (rcp=2202408296) */ \
100 ENTRY(68641607 , 0xFA489AA8, 58), /* [N * 0xFA489AA8 >> 58] (rcp=4199062184) */ \
101 ENTRY(72005051 , 0xEE97B1C5, 58), /* [N * 0xEE97B1C5 >> 58] (rcp=4002918853) */ \
102 ENTRY(75533323 , 0xE3729293, 58), /* [N * 0xE3729293 >> 58] (rcp=3815936659) */ \
103 ENTRY(79234469 , 0xD8D2BBA3, 58), /* [N * 0xD8D2BBA3 >> 58] (rcp=3637689251) */ \
104 ENTRY(83116967 , 0xCEB1F196, 58), /* [N * 0xCEB1F196 >> 58] (rcp=3467768214) */ \
105 ENTRY(87189709 , 0xC50A4426, 58), /* [N * 0xC50A4426 >> 58] (rcp=3305784358) */ \
106 ENTRY(91462061 , 0xBBD6052B, 58), /* [N * 0xBBD6052B >> 58] (rcp=3151365419) */ \
107 ENTRY(95943737 , 0xB30FD999, 58), /* [N * 0xB30FD999 >> 58] (rcp=3004160409) */ \
108 ENTRY(100644991 , 0xAAB29CED, 58), /* [N * 0xAAB29CED >> 58] (rcp=2863832301) */ \
109 ENTRY(105576619 , 0xA2B96421, 58), /* [N * 0xA2B96421 >> 58] (rcp=2730058785) */ \
110 ENTRY(110749901 , 0x9B1F8434, 58), /* [N * 0x9B1F8434 >> 58] (rcp=2602533940) */ \
111 ENTRY(116176651 , 0x93E08B4A, 58), /* [N * 0x93E08B4A >> 58] (rcp=2480966474) */ \
112 ENTRY(121869317 , 0x8CF837E0, 58), /* [N * 0x8CF837E0 >> 58] (rcp=2365077472) */ \
113 ENTRY(127840913 , 0x86627F01, 58), /* [N * 0x86627F01 >> 58] (rcp=2254601985) */ \
114 ENTRY(134105159 , 0x801B8178, 58), /* [N * 0x801B8178 >> 58] (rcp=2149286264) */ \
115 ENTRY(140676353 , 0xF43F294F, 59), /* [N * 0xF43F294F >> 59] (rcp=4097780047) */ \
116 ENTRY(147569509 , 0xE8D67089, 59), /* [N * 0xE8D67089 >> 59] (rcp=3906367625) */ \
117 ENTRY(154800449 , 0xDDF6243C, 59), /* [N * 0xDDF6243C >> 59] (rcp=3723895868) */ \
118 ENTRY(162385709 , 0xD397E6AE, 59), /* [N * 0xD397E6AE >> 59] (rcp=3549947566) */ \
119 ENTRY(170342629 , 0xC9B5A65A, 59), /* [N * 0xC9B5A65A >> 59] (rcp=3384125018) */ \
120 ENTRY(178689419 , 0xC0499865, 59), /* [N * 0xC0499865 >> 59] (rcp=3226048613) */ \
121 ENTRY(187445201 , 0xB74E35FA, 59), /* [N * 0xB74E35FA >> 59] (rcp=3075356154) */ \
122 ENTRY(196630033 , 0xAEBE3AC1, 59), /* [N * 0xAEBE3AC1 >> 59] (rcp=2931702465) */ \
123 ENTRY(206264921 , 0xA694A37F, 59), /* [N * 0xA694A37F >> 59] (rcp=2794759039) */ \
124 ENTRY(216371963 , 0x9ECCA59F, 59), /* [N * 0x9ECCA59F >> 59] (rcp=2664211871) */ \
125 ENTRY(226974197 , 0x9761B6AE, 59), /* [N * 0x9761B6AE >> 59] (rcp=2539763374) */ \
126 ENTRY(238095983 , 0x904F79A1, 59), /* [N * 0x904F79A1 >> 59] (rcp=2421127585) */ \
127 ENTRY(249762697 , 0x8991CD1F, 59), /* [N * 0x8991CD1F >> 59] (rcp=2308033823) */ \
128 ENTRY(262001071 , 0x8324BCA5, 59), /* [N * 0x8324BCA5 >> 59] (rcp=2200222885) */ \
129 ENTRY(274839137 , 0xFA090732, 60), /* [N * 0xFA090732 >> 60] (rcp=4194895666) */ \
130 ENTRY(288306269 , 0xEE5B16ED, 60), /* [N * 0xEE5B16ED >> 60] (rcp=3998947053) */ \
131 ENTRY(302433337 , 0xE338CE49, 60), /* [N * 0xE338CE49 >> 60] (rcp=3812150857) */ \
132 ENTRY(317252587 , 0xD89BABC0, 60), /* [N * 0xD89BABC0 >> 60] (rcp=3634080704) */ \
133 ENTRY(374358107 , 0xB790EF43, 60), /* [N * 0xB790EF43 >> 60] (rcp=3079728963) */ \
134 ENTRY(441742621 , 0x9B908414, 60), /* [N * 0x9B908414 >> 60] (rcp=2609939476) */ \
135 ENTRY(521256293 , 0x83D596FA, 60), /* [N * 0x83D596FA >> 60] (rcp=2211813114) */ \
136 ENTRY(615082441 , 0xDF72B16E, 61), /* [N * 0xDF72B16E >> 61] (rcp=3748835694) */ \
137 ENTRY(725797313 , 0xBD5CDB3B, 61), /* [N * 0xBD5CDB3B >> 61] (rcp=3176979259) */ \
138 ENTRY(856440829 , 0xA07A14E9, 61), /* [N * 0xA07A14E9 >> 61] (rcp=2692355305) */ \
139 ENTRY(1010600209, 0x87FF5289, 61), /* [N * 0x87FF5289 >> 61] (rcp=2281656969) */ \
140 ENTRY(1192508257, 0xE6810540, 62), /* [N * 0xE6810540 >> 62] (rcp=3867215168) */ \
141 ENTRY(1407159797, 0xC357A480, 62), /* [N * 0xC357A480 >> 62] (rcp=3277300864) */ \
142 ENTRY(1660448617, 0xA58B5B4F, 62), /* [N * 0xA58B5B4F >> 62] (rcp=2777373519) */ \
143 ENTRY(1959329399, 0x8C4AB55F, 62), /* [N * 0x8C4AB55F >> 62] (rcp=2353706335) */ \
144 ENTRY(2312008693, 0xEDC86320, 63), /* [N * 0xEDC86320 >> 63] (rcp=3989332768) */ \
145 ENTRY(2728170257, 0xC982C4D2, 63), /* [N * 0xC982C4D2 >> 63] (rcp=3380790482) */ \
146 ENTRY(3219240923, 0xAAC599B6, 63) /* [N * 0xAAC599B6 >> 63] (rcp=2865076662) */
147
148
149 struct HashPrime {
150 //! Prime number
151 uint32_t prime;
152 //! Reciprocal to turn division into multiplication.
153 uint32_t rcp;
154 };
155
156 static const HashPrime ZoneHash_primeArray[] = {
157 #define E(PRIME, RCP, SHIFT) { PRIME, RCP }
158 ASMJIT_POPULATE_PRIMES(E)
159 #undef E
160 };
161
162 static const uint8_t ZoneHash_primeShift[] = {
163 #define E(PRIME, RCP, SHIFT) uint8_t(SHIFT)
164 ASMJIT_POPULATE_PRIMES(E)
165 #undef E
166 };
167
168 // ============================================================================
169 // [asmjit::ZoneHashBase - Rehash]
170 // ============================================================================
171
172 void ZoneHashBase::_rehash(ZoneAllocator* allocator, uint32_t primeIndex) noexcept {
173 ASMJIT_ASSERT(primeIndex < ASMJIT_ARRAY_SIZE(ZoneHash_primeArray));
174 uint32_t newCount = ZoneHash_primeArray[primeIndex].prime;
175
176 ZoneHashNode** oldData = _data;
177 ZoneHashNode** newData = reinterpret_cast<ZoneHashNode**>(
178 allocator->allocZeroed(size_t(newCount) * sizeof(ZoneHashNode*)));
179
180 // We can still store nodes into the table, but it will degrade.
181 if (ASMJIT_UNLIKELY(newData == nullptr))
182 return;
183
184 uint32_t i;
185 uint32_t oldCount = _bucketsCount;
186
187 _data = newData;
188 _bucketsCount = newCount;
189 _bucketsGrow = uint32_t(newCount * 0.9);
190 _rcpValue = ZoneHash_primeArray[primeIndex].rcp;
191 _rcpShift = ZoneHash_primeShift[primeIndex];
192 _primeIndex = uint8_t(primeIndex);
193
194 for (i = 0; i < oldCount; i++) {
195 ZoneHashNode* node = oldData[i];
196 while (node) {
197 ZoneHashNode* next = node->_hashNext;
198 uint32_t hashMod = _calcMod(node->_hashCode);
199
200 node->_hashNext = newData[hashMod];
201 newData[hashMod] = node;
202 node = next;
203 }
204 }
205
206 if (oldData != _embedded)
207 allocator->release(oldData, oldCount * sizeof(ZoneHashNode*));
208 }
209
210 // ============================================================================
211 // [asmjit::ZoneHashBase - Ops]
212 // ============================================================================
213
214 ZoneHashNode* ZoneHashBase::_insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
215 uint32_t hashMod = _calcMod(node->_hashCode);
216 ZoneHashNode* next = _data[hashMod];
217
218 node->_hashNext = next;
219 _data[hashMod] = node;
220
221 if (++_size > _bucketsGrow) {
222 uint32_t primeIndex = Support::min<uint32_t>(_primeIndex + 2, ASMJIT_ARRAY_SIZE(ZoneHash_primeArray) - 1);
223 if (primeIndex > _primeIndex)
224 _rehash(allocator, primeIndex);
225 }
226
227 return node;
228 }
229
230 ZoneHashNode* ZoneHashBase::_remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept {
231 ASMJIT_UNUSED(allocator);
232 uint32_t hashMod = _calcMod(node->_hashCode);
233
234 ZoneHashNode** pPrev = &_data[hashMod];
235 ZoneHashNode* p = *pPrev;
236
237 while (p) {
238 if (p == node) {
239 *pPrev = p->_hashNext;
240 _size--;
241 return node;
242 }
243
244 pPrev = &p->_hashNext;
245 p = *pPrev;
246 }
247
248 return nullptr;
249 }
250
251 // ============================================================================
252 // [asmjit::ZoneHash - Unit]
253 // ============================================================================
254
255 #if defined(ASMJIT_TEST)
256 struct MyHashNode : public ZoneHashNode {
257 inline MyHashNode(uint32_t key) noexcept
258 : ZoneHashNode(key),
259 _key(key) {}
260
261 uint32_t _key;
262 };
263
264 struct MyKeyMatcher {
265 inline MyKeyMatcher(uint32_t key) noexcept
266 : _key(key) {}
267
268 inline uint32_t hashCode() const noexcept { return _key; }
269 inline bool matches(const MyHashNode* node) const noexcept { return node->_key == _key; }
270
271 uint32_t _key;
272 };
273
274 UNIT(zone_hash) {
275 uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
276
277 Zone zone(4096);
278 ZoneAllocator allocator(&zone);
279
280 ZoneHash<MyHashNode> hashTable;
281
282 uint32_t key;
283 INFO("Inserting %u elements to HashTable", unsigned(kCount));
284 for (key = 0; key < kCount; key++) {
285 hashTable.insert(&allocator, zone.newT<MyHashNode>(key));
286 }
287
288 uint32_t count = kCount;
289 INFO("Removing %u elements from HashTable and validating each operation", unsigned(kCount));
290 do {
291 MyHashNode* node;
292
293 for (key = 0; key < count; key++) {
294 node = hashTable.get(MyKeyMatcher(key));
295 EXPECT(node != nullptr);
296 EXPECT(node->_key == key);
297 }
298
299 {
300 count--;
301 node = hashTable.get(MyKeyMatcher(count));
302 hashTable.remove(&allocator, node);
303
304 node = hashTable.get(MyKeyMatcher(count));
305 EXPECT(node == nullptr);
306 }
307 } while (count);
308
309 EXPECT(hashTable.empty());
310 }
311 #endif
312
313 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONEHASH_H
7 #define _ASMJIT_CORE_ZONEHASH_H
8
9 #include "../core/zone.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_zone
14 //! \{
15
16 // ============================================================================
17 // [asmjit::ZoneHashNode]
18 // ============================================================================
19
20 //! Node used by `ZoneHash<>` template.
21 //!
22 //! You must provide function `bool eq(const Key& key)` in order to make
23 //! `ZoneHash::get()` working.
24 class ZoneHashNode {
25 public:
26 ASMJIT_NONCOPYABLE(ZoneHashNode)
27
28 inline ZoneHashNode(uint32_t hashCode = 0) noexcept
29 : _hashNext(nullptr),
30 _hashCode(hashCode),
31 _customData(0) {}
32
33 //! Next node in the chain, null if it terminates the chain.
34 ZoneHashNode* _hashNext;
35 //! Precalculated hash-code of key.
36 uint32_t _hashCode;
37 //! Padding, can be reused by any Node that inherits `ZoneHashNode`.
38 uint32_t _customData;
39 };
40
41 // ============================================================================
42 // [asmjit::ZoneHashBase]
43 // ============================================================================
44
45 class ZoneHashBase {
46 public:
47 ASMJIT_NONCOPYABLE(ZoneHashBase)
48
49 //! Buckets data.
50 ZoneHashNode** _data;
51 //! Count of records inserted into the hash table.
52 size_t _size;
53 //! Count of hash buckets.
54 uint32_t _bucketsCount;
55 //! When buckets array should grow (only checked after insertion).
56 uint32_t _bucketsGrow;
57 //! Reciprocal value of `_bucketsCount`.
58 uint32_t _rcpValue;
59 //! How many bits to shift right when hash is multiplied with `_rcpValue`.
60 uint8_t _rcpShift;
61 //! Prime value index in internal prime array.
62 uint8_t _primeIndex;
63
64 //! Embedded data, used by empty hash tables.
65 ZoneHashNode* _embedded[1];
66
67 //! \name Construction & Destruction
68 //! \{
69
70 inline ZoneHashBase() noexcept {
71 reset();
72 }
73
74 inline ZoneHashBase(ZoneHashBase&& other) noexcept {
75 _data = other._data;
76 _size = other._size;
77 _bucketsCount = other._bucketsCount;
78 _bucketsGrow = other._bucketsGrow;
79 _rcpValue = other._rcpValue;
80 _rcpShift = other._rcpShift;
81 _primeIndex = other._primeIndex;
82 _embedded[0] = other._embedded[0];
83
84 if (_data == other._embedded) _data = _embedded;
85 }
86
87 inline void reset() noexcept {
88 _data = _embedded;
89 _size = 0;
90 _bucketsCount = 1;
91 _bucketsGrow = 1;
92 _rcpValue = 1;
93 _rcpShift = 0;
94 _primeIndex = 0;
95 _embedded[0] = nullptr;
96 }
97
98 inline void release(ZoneAllocator* allocator) noexcept {
99 ZoneHashNode** oldData = _data;
100 if (oldData != _embedded)
101 allocator->release(oldData, _bucketsCount * sizeof(ZoneHashNode*));
102 reset();
103 }
104
105 //! \}
106
107 //! \name Accessors
108 //! \{
109
110 inline bool empty() const noexcept { return _size == 0; }
111 inline size_t size() const noexcept { return _size; }
112
113 //! \}
114
115 //! \name Utilities
116 //! \{
117
118 inline void _swap(ZoneHashBase& other) noexcept {
119 std::swap(_data, other._data);
120 std::swap(_size, other._size);
121 std::swap(_bucketsCount, other._bucketsCount);
122 std::swap(_bucketsGrow, other._bucketsGrow);
123 std::swap(_rcpValue, other._rcpValue);
124 std::swap(_rcpShift, other._rcpShift);
125 std::swap(_primeIndex, other._primeIndex);
126 std::swap(_embedded[0], other._embedded[0]);
127
128 if (_data == other._embedded) _data = _embedded;
129 if (other._data == _embedded) other._data = other._embedded;
130 }
131
132 //! \cond INTERNAL
133 inline uint32_t _calcMod(uint32_t hash) const noexcept {
134 uint32_t x = uint32_t((uint64_t(hash) * _rcpValue) >> _rcpShift);
135 return hash - x * _bucketsCount;
136 }
137
138 ASMJIT_API void _rehash(ZoneAllocator* allocator, uint32_t newCount) noexcept;
139 ASMJIT_API ZoneHashNode* _insert(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
140 ASMJIT_API ZoneHashNode* _remove(ZoneAllocator* allocator, ZoneHashNode* node) noexcept;
141 //! \endcond
142
143 //! \}
144 };
145
146 // ============================================================================
147 // [asmjit::ZoneHash]
148 // ============================================================================
149
150 //! Low-level hash table specialized for storing string keys and POD values.
151 //!
152 //! This hash table allows duplicates to be inserted (the API is so low
153 //! level that it's up to you if you allow it or not, as you should first
154 //! `get()` the node and then modify it or insert a new node by using `insert()`,
155 //! depending on the intention).
156 template<typename NodeT>
157 class ZoneHash : public ZoneHashBase {
158 public:
159 ASMJIT_NONCOPYABLE(ZoneHash<NodeT>)
160
161 typedef NodeT Node;
162
163 //! \name Construction & Destruction
164 //! \{
165
166 inline ZoneHash() noexcept
167 : ZoneHashBase() {}
168
169 inline ZoneHash(ZoneHash&& other) noexcept
170 : ZoneHash(other) {}
171
172 //! \}
173
174 //! \name Utilities
175 //! \{
176
177 inline void swap(ZoneHash& other) noexcept { ZoneHashBase::_swap(other); }
178
179 template<typename KeyT>
180 inline NodeT* get(const KeyT& key) const noexcept {
181 uint32_t hashMod = _calcMod(key.hashCode());
182 NodeT* node = static_cast<NodeT*>(_data[hashMod]);
183
184 while (node && !key.matches(node))
185 node = static_cast<NodeT*>(node->_hashNext);
186 return node;
187 }
188
189 inline NodeT* insert(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_insert(allocator, node)); }
190 inline NodeT* remove(ZoneAllocator* allocator, NodeT* node) noexcept { return static_cast<NodeT*>(_remove(allocator, node)); }
191
192 //! \}
193 };
194
195 //! \}
196
197 ASMJIT_END_NAMESPACE
198
199 #endif // _ASMJIT_CORE_ZONEHASH_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/zone.h"
8 #include "../core/zonelist.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::ZoneList - Unit]
14 // ============================================================================
15
16 #if defined(ASMJIT_TEST)
17 class MyListNode : public ZoneListNode<MyListNode> {};
18
19 UNIT(zone_list) {
20 Zone zone(4096);
21 ZoneList<MyListNode> list;
22
23 MyListNode* a = zone.newT<MyListNode>();
24 MyListNode* b = zone.newT<MyListNode>();
25 MyListNode* c = zone.newT<MyListNode>();
26 MyListNode* d = zone.newT<MyListNode>();
27
28 INFO("Append / Unlink");
29
30 // []
31 EXPECT(list.empty() == true);
32
33 // [A]
34 list.append(a);
35 EXPECT(list.empty() == false);
36 EXPECT(list.first() == a);
37 EXPECT(list.last() == a);
38 EXPECT(a->prev() == nullptr);
39 EXPECT(a->next() == nullptr);
40
41 // [A, B]
42 list.append(b);
43 EXPECT(list.first() == a);
44 EXPECT(list.last() == b);
45 EXPECT(a->prev() == nullptr);
46 EXPECT(a->next() == b);
47 EXPECT(b->prev() == a);
48 EXPECT(b->next() == nullptr);
49
50 // [A, B, C]
51 list.append(c);
52 EXPECT(list.first() == a);
53 EXPECT(list.last() == c);
54 EXPECT(a->prev() == nullptr);
55 EXPECT(a->next() == b);
56 EXPECT(b->prev() == a);
57 EXPECT(b->next() == c);
58 EXPECT(c->prev() == b);
59 EXPECT(c->next() == nullptr);
60
61 // [B, C]
62 list.unlink(a);
63 EXPECT(list.first() == b);
64 EXPECT(list.last() == c);
65 EXPECT(a->prev() == nullptr);
66 EXPECT(a->next() == nullptr);
67 EXPECT(b->prev() == nullptr);
68 EXPECT(b->next() == c);
69 EXPECT(c->prev() == b);
70 EXPECT(c->next() == nullptr);
71
72 // [B]
73 list.unlink(c);
74 EXPECT(list.first() == b);
75 EXPECT(list.last() == b);
76 EXPECT(b->prev() == nullptr);
77 EXPECT(b->next() == nullptr);
78 EXPECT(c->prev() == nullptr);
79 EXPECT(c->next() == nullptr);
80
81 // []
82 list.unlink(b);
83 EXPECT(list.empty() == true);
84 EXPECT(list.first() == nullptr);
85 EXPECT(list.last() == nullptr);
86 EXPECT(b->prev() == nullptr);
87 EXPECT(b->next() == nullptr);
88
89 INFO("Prepend / Unlink");
90
91 // [A]
92 list.prepend(a);
93 EXPECT(list.empty() == false);
94 EXPECT(list.first() == a);
95 EXPECT(list.last() == a);
96 EXPECT(a->prev() == nullptr);
97 EXPECT(a->next() == nullptr);
98
99 // [B, A]
100 list.prepend(b);
101 EXPECT(list.first() == b);
102 EXPECT(list.last() == a);
103 EXPECT(b->prev() == nullptr);
104 EXPECT(b->next() == a);
105 EXPECT(a->prev() == b);
106 EXPECT(a->next() == nullptr);
107
108 INFO("InsertAfter / InsertBefore");
109
110 // [B, A, C]
111 list.insertAfter(a, c);
112 EXPECT(list.first() == b);
113 EXPECT(list.last() == c);
114 EXPECT(b->prev() == nullptr);
115 EXPECT(b->next() == a);
116 EXPECT(a->prev() == b);
117 EXPECT(a->next() == c);
118 EXPECT(c->prev() == a);
119 EXPECT(c->next() == nullptr);
120
121 // [B, D, A, C]
122 list.insertBefore(a, d);
123 EXPECT(list.first() == b);
124 EXPECT(list.last() == c);
125 EXPECT(b->prev() == nullptr);
126 EXPECT(b->next() == d);
127 EXPECT(d->prev() == b);
128 EXPECT(d->next() == a);
129 EXPECT(a->prev() == d);
130 EXPECT(a->next() == c);
131 EXPECT(c->prev() == a);
132 EXPECT(c->next() == nullptr);
133
134 INFO("PopFirst / Pop");
135
136 // [D, A, C]
137 EXPECT(list.popFirst() == b);
138 EXPECT(b->prev() == nullptr);
139 EXPECT(b->next() == nullptr);
140
141 EXPECT(list.first() == d);
142 EXPECT(list.last() == c);
143 EXPECT(d->prev() == nullptr);
144 EXPECT(d->next() == a);
145 EXPECT(a->prev() == d);
146 EXPECT(a->next() == c);
147 EXPECT(c->prev() == a);
148 EXPECT(c->next() == nullptr);
149
150 // [D, A]
151 EXPECT(list.pop() == c);
152 EXPECT(c->prev() == nullptr);
153 EXPECT(c->next() == nullptr);
154
155 EXPECT(list.first() == d);
156 EXPECT(list.last() == a);
157 EXPECT(d->prev() == nullptr);
158 EXPECT(d->next() == a);
159 EXPECT(a->prev() == d);
160 EXPECT(a->next() == nullptr);
161 }
162 #endif
163
164 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONELIST_H
7 #define _ASMJIT_CORE_ZONELIST_H
8
9 #include "../core/support.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_zone
14 //! \{
15
16 // ============================================================================
17 // [asmjit::ZoneListNode]
18 // ============================================================================
19
20 template<typename NodeT>
21 class ZoneListNode {
22 public:
23 ASMJIT_NONCOPYABLE(ZoneListNode)
24
25 NodeT* _listNodes[Globals::kLinkCount];
26
27 //! \name Construction & Destruction
28 //! \{
29
30 inline ZoneListNode() noexcept
31 : _listNodes { nullptr, nullptr } {}
32
33 inline ZoneListNode(ZoneListNode&& other) noexcept
34 : _listNodes { other._listNodes[0], other._listNodes[1] } {}
35
36 //! \}
37
38 //! \name Accessors
39 //! \{
40
41 inline bool hasPrev() const noexcept { return _listNodes[Globals::kLinkPrev] != nullptr; }
42 inline bool hasNext() const noexcept { return _listNodes[Globals::kLinkNext] != nullptr; }
43
44 inline NodeT* prev() const noexcept { return _listNodes[Globals::kLinkPrev]; }
45 inline NodeT* next() const noexcept { return _listNodes[Globals::kLinkNext]; }
46
47 //! \}
48 };
49
50 // ============================================================================
51 // [asmjit::ZoneList<T>]
52 // ============================================================================
53
54 template <typename NodeT>
55 class ZoneList {
56 public:
57 ASMJIT_NONCOPYABLE(ZoneList)
58
59 NodeT* _bounds[Globals::kLinkCount];
60
61 //! \name Construction & Destruction
62 //! \{
63
64 inline ZoneList() noexcept
65 : _bounds { nullptr, nullptr } {}
66
67 inline ZoneList(ZoneList&& other) noexcept
68 : _bounds { other._bounds[0], other._bounds[1] } {}
69
70 inline void reset() noexcept {
71 _bounds[0] = nullptr;
72 _bounds[1] = nullptr;
73 }
74
75 //! \}
76
77 //! \name Accessors
78 //! \{
79
80 inline bool empty() const noexcept { return _bounds[0] == nullptr; }
81 inline NodeT* first() const noexcept { return _bounds[Globals::kLinkFirst]; }
82 inline NodeT* last() const noexcept { return _bounds[Globals::kLinkLast]; }
83
84 //! \}
85
86 //! \name Utilities
87 //! \{
88
89 inline void swap(ZoneList& other) noexcept {
90 std::swap(_bounds[0], other._bounds[0]);
91 std::swap(_bounds[1], other._bounds[1]);
92 }
93
94 // Can be used to both prepend and append.
95 inline void _addNode(NodeT* node, size_t dir) noexcept {
96 NodeT* prev = _bounds[dir];
97
98 node->_listNodes[!dir] = prev;
99 _bounds[dir] = node;
100 if (prev)
101 prev->_listNodes[dir] = node;
102 else
103 _bounds[!dir] = node;
104 }
105
106 // Can be used to both prepend and append.
107 inline void _insertNode(NodeT* ref, NodeT* node, size_t dir) noexcept {
108 ASMJIT_ASSERT(ref != nullptr);
109
110 NodeT* prev = ref;
111 NodeT* next = ref->_listNodes[dir];
112
113 prev->_listNodes[dir] = node;
114 if (next)
115 next->_listNodes[!dir] = node;
116 else
117 _bounds[dir] = node;
118
119 node->_listNodes[!dir] = prev;
120 node->_listNodes[ dir] = next;
121 }
122
123 inline void append(NodeT* node) noexcept { _addNode(node, Globals::kLinkLast); }
124 inline void prepend(NodeT* node) noexcept { _addNode(node, Globals::kLinkFirst); }
125
126 inline void insertAfter(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, Globals::kLinkNext); }
127 inline void insertBefore(NodeT* ref, NodeT* node) noexcept { _insertNode(ref, node, Globals::kLinkPrev); }
128
129 inline NodeT* unlink(NodeT* node) noexcept {
130 NodeT* prev = node->prev();
131 NodeT* next = node->next();
132
133 if (prev) { prev->_listNodes[1] = next; node->_listNodes[0] = nullptr; } else { _bounds[0] = next; }
134 if (next) { next->_listNodes[0] = prev; node->_listNodes[1] = nullptr; } else { _bounds[1] = prev; }
135
136 node->_listNodes[0] = nullptr;
137 node->_listNodes[1] = nullptr;
138
139 return node;
140 }
141
142 inline NodeT* popFirst() noexcept {
143 NodeT* node = _bounds[0];
144 ASMJIT_ASSERT(node != nullptr);
145
146 NodeT* next = node->next();
147 _bounds[0] = next;
148
149 if (next) {
150 next->_listNodes[0] = nullptr;
151 node->_listNodes[1] = nullptr;
152 }
153 else {
154 _bounds[1] = nullptr;
155 }
156
157 return node;
158 }
159
160 inline NodeT* pop() noexcept {
161 NodeT* node = _bounds[1];
162 ASMJIT_ASSERT(node != nullptr);
163
164 NodeT* prev = node->prev();
165 _bounds[1] = prev;
166
167 if (prev) {
168 prev->_listNodes[1] = nullptr;
169 node->_listNodes[0] = nullptr;
170 }
171 else {
172 _bounds[0] = nullptr;
173 }
174
175 return node;
176 }
177
178 //! \}
179 };
180
181 //! \}
182
183 ASMJIT_END_NAMESPACE
184
185 #endif // _ASMJIT_CORE_ZONELIST_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/zone.h"
8 #include "../core/zonestack.h"
9
10 ASMJIT_BEGIN_NAMESPACE
11
12 // ============================================================================
13 // [asmjit::ZoneStackBase - Init / Reset]
14 // ============================================================================
15
16 Error ZoneStackBase::_init(ZoneAllocator* allocator, size_t middleIndex) noexcept {
17 ZoneAllocator* oldAllocator = _allocator;
18
19 if (oldAllocator) {
20 Block* block = _block[Globals::kLinkFirst];
21 while (block) {
22 Block* next = block->next();
23 oldAllocator->release(block, kBlockSize);
24 block = next;
25 }
26
27 _allocator = nullptr;
28 _block[Globals::kLinkLeft] = nullptr;
29 _block[Globals::kLinkRight] = nullptr;
30 }
31
32 if (allocator) {
33 Block* block = static_cast<Block*>(allocator->alloc(kBlockSize));
34 if (ASMJIT_UNLIKELY(!block))
35 return DebugUtils::errored(kErrorOutOfMemory);
36
37 block->_link[Globals::kLinkLeft] = nullptr;
38 block->_link[Globals::kLinkRight] = nullptr;
39 block->_start = (uint8_t*)block + middleIndex;
40 block->_end = (uint8_t*)block + middleIndex;
41
42 _allocator = allocator;
43 _block[Globals::kLinkLeft] = block;
44 _block[Globals::kLinkRight] = block;
45 }
46
47 return kErrorOk;
48 }
49
50 // ============================================================================
51 // [asmjit::ZoneStackBase - Ops]
52 // ============================================================================
53
54 Error ZoneStackBase::_prepareBlock(uint32_t side, size_t initialIndex) noexcept {
55 ASMJIT_ASSERT(isInitialized());
56
57 Block* prev = _block[side];
58 ASMJIT_ASSERT(!prev->empty());
59
60 Block* block = _allocator->allocT<Block>(kBlockSize);
61 if (ASMJIT_UNLIKELY(!block))
62 return DebugUtils::errored(kErrorOutOfMemory);
63
64 block->_link[ side] = nullptr;
65 block->_link[!side] = prev;
66 block->_start = (uint8_t*)block + initialIndex;
67 block->_end = (uint8_t*)block + initialIndex;
68
69 prev->_link[side] = block;
70 _block[side] = block;
71
72 return kErrorOk;
73 }
74
75 void ZoneStackBase::_cleanupBlock(uint32_t side, size_t middleIndex) noexcept {
76 Block* block = _block[side];
77 ASMJIT_ASSERT(block->empty());
78
79 Block* prev = block->_link[!side];
80 if (prev) {
81 ASMJIT_ASSERT(prev->_link[side] == block);
82 _allocator->release(block, kBlockSize);
83
84 prev->_link[side] = nullptr;
85 _block[side] = prev;
86 }
87 else if (_block[!side] == block) {
88 // If the container becomes empty center both pointers in the remaining block.
89 block->_start = (uint8_t*)block + middleIndex;
90 block->_end = (uint8_t*)block + middleIndex;
91 }
92 }
93
94 // ============================================================================
95 // [asmjit::ZoneStack - Unit]
96 // ============================================================================
97
98 #if defined(ASMJIT_TEST)
99 template<typename T>
100 static void test_zone_stack(ZoneAllocator* allocator, const char* typeName) {
101 ZoneStack<T> stack;
102
103 INFO("Testing ZoneStack<%s>", typeName);
104 INFO(" (%d items per one Block)", ZoneStack<T>::kNumBlockItems);
105
106 EXPECT(stack.init(allocator) == kErrorOk);
107 EXPECT(stack.empty(), "Stack must be empty after `init()`");
108
109 EXPECT(stack.append(42) == kErrorOk);
110 EXPECT(!stack.empty() , "Stack must not be empty after an item has been appended");
111 EXPECT(stack.pop() == 42 , "Stack.pop() must return the item that has been appended last");
112 EXPECT(stack.empty() , "Stack must be empty after the last item has been removed");
113
114 EXPECT(stack.prepend(43) == kErrorOk);
115 EXPECT(!stack.empty() , "Stack must not be empty after an item has been prepended");
116 EXPECT(stack.popFirst() == 43, "Stack.popFirst() must return the item that has been prepended last");
117 EXPECT(stack.empty() , "Stack must be empty after the last item has been removed");
118
119 int i;
120 int iMin =-100000;
121 int iMax = 100000;
122
123 INFO("Validating prepend() & popFirst()");
124 for (i = iMax; i >= 0; i--) stack.prepend(T(i));
125 for (i = 0; i <= iMax; i++) {
126 T item = stack.popFirst();
127 EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
128 if (!stack.empty()) {
129 item = stack.popFirst();
130 EXPECT(i + 1 == item, "Item '%d' didn't match the item '%lld' popped", i + 1, (long long)item);
131 stack.prepend(item);
132 }
133 }
134 EXPECT(stack.empty());
135
136 INFO("Validating append() & pop()");
137 for (i = 0; i <= iMax; i++) stack.append(T(i));
138 for (i = iMax; i >= 0; i--) {
139 T item = stack.pop();
140 EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
141 if (!stack.empty()) {
142 item = stack.pop();
143 EXPECT(i - 1 == item, "Item '%d' didn't match the item '%lld' popped", i - 1, (long long)item);
144 stack.append(item);
145 }
146 }
147 EXPECT(stack.empty());
148
149 INFO("Validating append()/prepend() & popFirst()");
150 for (i = 1; i <= iMax; i++) stack.append(T(i));
151 for (i = 0; i >= iMin; i--) stack.prepend(T(i));
152
153 for (i = iMin; i <= iMax; i++) {
154 T item = stack.popFirst();
155 EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
156 }
157 EXPECT(stack.empty());
158
159 INFO("Validating append()/prepend() & pop()");
160 for (i = 0; i >= iMin; i--) stack.prepend(T(i));
161 for (i = 1; i <= iMax; i++) stack.append(T(i));
162
163 for (i = iMax; i >= iMin; i--) {
164 T item = stack.pop();
165 EXPECT(i == item, "Item '%d' didn't match the item '%lld' popped", i, (long long)item);
166 }
167 EXPECT(stack.empty());
168 }
169
170 UNIT(zone_stack) {
171 Zone zone(8096 - Zone::kBlockOverhead);
172 ZoneAllocator allocator(&zone);
173
174 test_zone_stack<int>(&allocator, "int");
175 test_zone_stack<int64_t>(&allocator, "int64_t");
176 }
177 #endif
178
179 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONESTACK_H
7 #define _ASMJIT_CORE_ZONESTACK_H
8
9 #include "../core/zone.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_zone
14 //! \{
15
16 // ============================================================================
17 // [asmjit::ZoneStackBase]
18 // ============================================================================
19
20 //! Base class used by `ZoneStack<T>`.
21 class ZoneStackBase {
22 public:
23 ASMJIT_NONCOPYABLE(ZoneStackBase)
24
25 static constexpr uint32_t kBlockSize = ZoneAllocator::kHiMaxSize;
26
27 struct Block {
28 inline bool empty() const noexcept { return _start == _end; }
29 inline Block* prev() const noexcept { return _link[Globals::kLinkLeft]; }
30 inline Block* next() const noexcept { return _link[Globals::kLinkRight]; }
31
32 inline void setPrev(Block* block) noexcept { _link[Globals::kLinkLeft] = block; }
33 inline void setNext(Block* block) noexcept { _link[Globals::kLinkRight] = block; }
34
35 template<typename T>
36 inline T* start() const noexcept { return static_cast<T*>(_start); }
37 template<typename T>
38 inline void setStart(T* start) noexcept { _start = static_cast<void*>(start); }
39
40 template<typename T>
41 inline T* end() const noexcept { return (T*)_end; }
42 template<typename T>
43 inline void setEnd(T* end) noexcept { _end = (void*)end; }
44
45 template<typename T>
46 inline T* data() const noexcept { return (T*)((uint8_t*)(this) + sizeof(Block)); }
47
48 template<typename T>
49 inline bool canPrepend() const noexcept { return _start > data<void>(); }
50
51 template<typename T>
52 inline bool canAppend() const noexcept {
53 size_t kNumBlockItems = (kBlockSize - sizeof(Block)) / sizeof(T);
54 size_t kStartBlockIndex = sizeof(Block);
55 size_t kEndBlockIndex = kStartBlockIndex + kNumBlockItems * sizeof(T);
56
57 return (uintptr_t)_end <= ((uintptr_t)this + kEndBlockIndex - sizeof(T));
58 }
59
60 Block* _link[Globals::kLinkCount]; //!< Next and previous blocks.
61 void* _start; //!< Pointer to the start of the array.
62 void* _end; //!< Pointer to the end of the array.
63 };
64
65 //! Allocator used to allocate data.
66 ZoneAllocator* _allocator;
67 //! First and last blocks.
68 Block* _block[Globals::kLinkCount];
69
70 //! \name Construction / Destruction
71 //! \{
72
73 inline ZoneStackBase() noexcept {
74 _allocator = nullptr;
75 _block[0] = nullptr;
76 _block[1] = nullptr;
77 }
78 inline ~ZoneStackBase() noexcept { reset(); }
79
80 inline bool isInitialized() const noexcept { return _allocator != nullptr; }
81 ASMJIT_API Error _init(ZoneAllocator* allocator, size_t middleIndex) noexcept;
82 inline Error reset() noexcept { return _init(nullptr, 0); }
83
84 //! \}
85
86 //! \name Accessors
87 //! \{
88
89 //! Returns `ZoneAllocator` attached to this container.
90 inline ZoneAllocator* allocator() const noexcept { return _allocator; }
91
92 inline bool empty() const noexcept {
93 ASMJIT_ASSERT(isInitialized());
94 return _block[0]->start<void>() == _block[1]->end<void>();
95 }
96
97 //! \}
98
99 //! \cond INTERNAL
100 //! \name Internal
101 //! \{
102
103 ASMJIT_API Error _prepareBlock(uint32_t side, size_t initialIndex) noexcept;
104 ASMJIT_API void _cleanupBlock(uint32_t side, size_t middleIndex) noexcept;
105
106 //! \}
107 //! \endcond
108 };
109
110 // ============================================================================
111 // [asmjit::ZoneStack<T>]
112 // ============================================================================
113
114 //! Zone allocated stack container.
115 template<typename T>
116 class ZoneStack : public ZoneStackBase {
117 public:
118 ASMJIT_NONCOPYABLE(ZoneStack<T>)
119
120 enum : uint32_t {
121 kNumBlockItems = uint32_t((kBlockSize - sizeof(Block)) / sizeof(T)),
122 kStartBlockIndex = uint32_t(sizeof(Block)),
123 kMidBlockIndex = uint32_t(kStartBlockIndex + (kNumBlockItems / 2) * sizeof(T)),
124 kEndBlockIndex = uint32_t(kStartBlockIndex + (kNumBlockItems ) * sizeof(T))
125 };
126
127 //! \name Construction / Destruction
128 //! \{
129
130 inline ZoneStack() noexcept {}
131 inline ~ZoneStack() noexcept {}
132
133 inline Error init(ZoneAllocator* allocator) noexcept { return _init(allocator, kMidBlockIndex); }
134
135 //! \}
136
137 //! \name Utilities
138 //! \{
139
140 ASMJIT_INLINE Error prepend(T item) noexcept {
141 ASMJIT_ASSERT(isInitialized());
142 Block* block = _block[Globals::kLinkFirst];
143
144 if (!block->canPrepend<T>()) {
145 ASMJIT_PROPAGATE(_prepareBlock(Globals::kLinkFirst, kEndBlockIndex));
146 block = _block[Globals::kLinkFirst];
147 }
148
149 T* ptr = block->start<T>() - 1;
150 ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
151 *ptr = item;
152 block->setStart<T>(ptr);
153 return kErrorOk;
154 }
155
156 ASMJIT_INLINE Error append(T item) noexcept {
157 ASMJIT_ASSERT(isInitialized());
158 Block* block = _block[Globals::kLinkLast];
159
160 if (!block->canAppend<T>()) {
161 ASMJIT_PROPAGATE(_prepareBlock(Globals::kLinkLast, kStartBlockIndex));
162 block = _block[Globals::kLinkLast];
163 }
164
165 T* ptr = block->end<T>();
166 ASMJIT_ASSERT(ptr >= block->data<T>() && ptr <= block->data<T>() + (kNumBlockItems - 1));
167
168 *ptr++ = item;
169 block->setEnd(ptr);
170 return kErrorOk;
171 }
172
173 ASMJIT_INLINE T popFirst() noexcept {
174 ASMJIT_ASSERT(isInitialized());
175 ASMJIT_ASSERT(!empty());
176
177 Block* block = _block[Globals::kLinkFirst];
178 ASMJIT_ASSERT(!block->empty());
179
180 T* ptr = block->start<T>();
181 T item = *ptr++;
182
183 block->setStart(ptr);
184 if (block->empty())
185 _cleanupBlock(Globals::kLinkFirst, kMidBlockIndex);
186
187 return item;
188 }
189
190 ASMJIT_INLINE T pop() noexcept {
191 ASMJIT_ASSERT(isInitialized());
192 ASMJIT_ASSERT(!empty());
193
194 Block* block = _block[Globals::kLinkLast];
195 ASMJIT_ASSERT(!block->empty());
196
197 T* ptr = block->end<T>();
198 T item = *--ptr;
199 ASMJIT_ASSERT(ptr >= block->data<T>());
200 ASMJIT_ASSERT(ptr >= block->start<T>());
201
202 block->setEnd(ptr);
203 if (block->empty())
204 _cleanupBlock(Globals::kLinkLast, kMidBlockIndex);
205
206 return item;
207 }
208
209 //! \}
210 };
211
212 //! \}
213
214 ASMJIT_END_NAMESPACE
215
216 #endif // _ASMJIT_CORE_ZONESTACK_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_SMALLSTRING_H
7 #define _ASMJIT_CORE_SMALLSTRING_H
8
9 #include "../core/globals.h"
10 #include "../core/zone.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_zone
15 //! \{
16
17 // ============================================================================
18 // [asmjit::ZoneStringBase]
19 // ============================================================================
20
21 struct ZoneStringBase {
22 union {
23 struct {
24 uint32_t _size;
25 char _embedded[sizeof(void*) * 2 - 4];
26 };
27 struct {
28 void* _dummy;
29 char* _external;
30 };
31 };
32
33 inline void reset() noexcept {
34 _dummy = nullptr;
35 _external = nullptr;
36 }
37
38 Error setData(Zone* zone, uint32_t maxEmbeddedSize, const char* str, size_t size) noexcept {
39 if (size == SIZE_MAX)
40 size = strlen(str);
41
42 if (size <= maxEmbeddedSize) {
43 memcpy(_embedded, str, size);
44 _embedded[size] = '\0';
45 }
46 else {
47 char* external = static_cast<char*>(zone->dup(str, size, true));
48 if (ASMJIT_UNLIKELY(!external))
49 return DebugUtils::errored(kErrorOutOfMemory);
50 _external = external;
51 }
52
53 _size = uint32_t(size);
54 return kErrorOk;
55 }
56 };
57
58 // ============================================================================
59 // [asmjit::ZoneString<N>]
60 // ============================================================================
61
62 //! Small string is a template that helps to create strings that can be either
63 //! statically allocated if they are small, or externally allocated in case
64 //! their size exceeds the limit. The `N` represents the size of the whole
65 //! `ZoneString` structure, based on that size the maximum size of the internal
66 //! buffer is determined.
67 template<size_t N>
68 class ZoneString {
69 public:
70 static constexpr uint32_t kWholeSize =
71 (N > sizeof(ZoneStringBase)) ? uint32_t(N) : uint32_t(sizeof(ZoneStringBase));
72 static constexpr uint32_t kMaxEmbeddedSize = kWholeSize - 5;
73
74 union {
75 ZoneStringBase _base;
76 char _wholeData[kWholeSize];
77 };
78
79 //! \name Construction & Destruction
80 //! \{
81
82 inline ZoneString() noexcept { reset(); }
83 inline void reset() noexcept { _base.reset(); }
84
85 //! \}
86
87 //! \name Accessors
88 //! \{
89
90 inline const char* data() const noexcept { return _base._size <= kMaxEmbeddedSize ? _base._embedded : _base._external; }
91 inline bool empty() const noexcept { return _base._size == 0; }
92 inline uint32_t size() const noexcept { return _base._size; }
93
94 inline bool isEmbedded() const noexcept { return _base._size <= kMaxEmbeddedSize; }
95
96 inline Error setData(Zone* zone, const char* data, size_t size) noexcept {
97 return _base.setData(zone, kMaxEmbeddedSize, data, size);
98 }
99
100 //! \}
101 };
102
103 //! \}
104
105 ASMJIT_END_NAMESPACE
106
107 #endif // _ASMJIT_CORE_SMALLSTRING_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/support.h"
8 #include "../core/zone.h"
9 #include "../core/zonetree.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [asmjit::ZoneTree - Unit]
15 // ============================================================================
16
17 #if defined(ASMJIT_TEST)
18 template<typename NodeT>
19 struct ZoneRBUnit {
20 typedef ZoneTree<NodeT> Tree;
21
22 static void verifyTree(Tree& tree) noexcept {
23 EXPECT(checkHeight(static_cast<NodeT*>(tree._root)) > 0);
24 }
25
26 // Check whether the Red-Black tree is valid.
27 static int checkHeight(NodeT* node) noexcept {
28 if (!node) return 1;
29
30 NodeT* ln = node->left();
31 NodeT* rn = node->right();
32
33 // Invalid tree.
34 EXPECT(ln == nullptr || *ln < *node);
35 EXPECT(rn == nullptr || *rn > *node);
36
37 // Red violation.
38 EXPECT(!node->isRed() ||
39 (!ZoneTreeNode::_isValidRed(ln) && !ZoneTreeNode::_isValidRed(rn)));
40
41 // Black violation.
42 int lh = checkHeight(ln);
43 int rh = checkHeight(rn);
44 EXPECT(!lh || !rh || lh == rh);
45
46 // Only count black links.
47 return (lh && rh) ? lh + !node->isRed() : 0;
48 }
49 };
50
51 class MyRBNode : public ZoneTreeNodeT<MyRBNode> {
52 public:
53 ASMJIT_NONCOPYABLE(MyRBNode)
54
55 inline explicit MyRBNode(uint32_t key) noexcept
56 : _key(key) {}
57
58 inline bool operator<(const MyRBNode& other) const noexcept { return _key < other._key; }
59 inline bool operator>(const MyRBNode& other) const noexcept { return _key > other._key; }
60
61 inline bool operator<(uint32_t queryKey) const noexcept { return _key < queryKey; }
62 inline bool operator>(uint32_t queryKey) const noexcept { return _key > queryKey; }
63
64 uint32_t _key;
65 };
66
67 UNIT(zone_rbtree) {
68 uint32_t kCount = BrokenAPI::hasArg("--quick") ? 1000 : 10000;
69
70 Zone zone(4096);
71 ZoneTree<MyRBNode> rbTree;
72
73 uint32_t key;
74 INFO("Inserting %u elements to RBTree and validating each operation", unsigned(kCount));
75 for (key = 0; key < kCount; key++) {
76 rbTree.insert(zone.newT<MyRBNode>(key));
77 ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
78 }
79
80 uint32_t count = kCount;
81 INFO("Removing %u elements from RBTree and validating each operation", unsigned(kCount));
82 do {
83 MyRBNode* node;
84
85 for (key = 0; key < count; key++) {
86 node = rbTree.get(key);
87 EXPECT(node != nullptr);
88 EXPECT(node->_key == key);
89 }
90
91 node = rbTree.get(--count);
92 rbTree.remove(node);
93 ZoneRBUnit<MyRBNode>::verifyTree(rbTree);
94 } while (count);
95
96 EXPECT(rbTree.empty());
97 }
98 #endif
99
100 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONETREE_H
7 #define _ASMJIT_CORE_ZONETREE_H
8
9 #include "../core/support.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 //! \addtogroup asmjit_zone
14 //! \{
15
16 // ============================================================================
17 // [asmjit::ZoneTreeNode]
18 // ============================================================================
19
20 //! RB-Tree node.
21 //!
22 //! The color is stored in a least significant bit of the `left` node.
23 //!
24 //! WARNING: Always use accessors to access left and right children.
25 class ZoneTreeNode {
26 public:
27 ASMJIT_NONCOPYABLE(ZoneTreeNode)
28
29 enum : uintptr_t {
30 kRedMask = 0x1,
31 kPtrMask = ~kRedMask
32 };
33
34 uintptr_t _rbNodeData[Globals::kLinkCount];
35
36 //! \name Construction & Destruction
37 //! \{
38
39 inline ZoneTreeNode() noexcept
40 : _rbNodeData { 0, 0 } {}
41
42 //! \}
43
44 //! \name Accessors
45 //! \{
46
47 inline bool isRed() const noexcept { return static_cast<bool>(_rbNodeData[0] & kRedMask); }
48
49 inline bool hasChild(size_t i) const noexcept { return _rbNodeData[i] > kRedMask; }
50 inline bool hasLeft() const noexcept { return _rbNodeData[0] > kRedMask; }
51 inline bool hasRight() const noexcept { return _rbNodeData[1] != 0; }
52
53 template<typename T = ZoneTreeNode>
54 inline T* child(size_t i) const noexcept { return static_cast<T*>(_getChild(i)); }
55 template<typename T = ZoneTreeNode>
56 inline T* left() const noexcept { return static_cast<T*>(_getLeft()); }
57 template<typename T = ZoneTreeNode>
58 inline T* right() const noexcept { return static_cast<T*>(_getRight()); }
59
60 //! \}
61
62 //! \cond INTERNAL
63 //! \name Internal
64 //! \{
65
66 inline ZoneTreeNode* _getChild(size_t i) const noexcept { return (ZoneTreeNode*)(_rbNodeData[i] & kPtrMask); }
67 inline ZoneTreeNode* _getLeft() const noexcept { return (ZoneTreeNode*)(_rbNodeData[0] & kPtrMask); }
68 inline ZoneTreeNode* _getRight() const noexcept { return (ZoneTreeNode*)(_rbNodeData[1]); }
69
70 inline void _setChild(size_t i, ZoneTreeNode* node) noexcept { _rbNodeData[i] = (_rbNodeData[i] & kRedMask) | (uintptr_t)node; }
71 inline void _setLeft(ZoneTreeNode* node) noexcept { _rbNodeData[0] = (_rbNodeData[0] & kRedMask) | (uintptr_t)node; }
72 inline void _setRight(ZoneTreeNode* node) noexcept { _rbNodeData[1] = (uintptr_t)node; }
73
74 inline void _makeRed() noexcept { _rbNodeData[0] |= kRedMask; }
75 inline void _makeBlack() noexcept { _rbNodeData[0] &= kPtrMask; }
76
77 //! Tests whether the node is RED (RED node must be non-null and must have RED flag set).
78 static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return node && node->isRed(); }
79
80 //! \}
81 //! \endcond
82 };
83
84 //! RB-Tree typed to `NodeT`.
85 template<typename NodeT>
86 class ZoneTreeNodeT : public ZoneTreeNode {
87 public:
88 ASMJIT_NONCOPYABLE(ZoneTreeNodeT)
89
90 //! \name Construction & Destruction
91 //! \{
92
93 inline ZoneTreeNodeT() noexcept
94 : ZoneTreeNode() {}
95
96 //! \}
97
98 //! \name Accessors
99 //! \{
100
101 inline NodeT* child(size_t i) const noexcept { return static_cast<NodeT*>(_getChild(i)); }
102 inline NodeT* left() const noexcept { return static_cast<NodeT*>(_getLeft()); }
103 inline NodeT* right() const noexcept { return static_cast<NodeT*>(_getRight()); }
104
105 //! \}
106 };
107
108 // ============================================================================
109 // [asmjit::ZoneTree]
110 // ============================================================================
111
112 //! RB-Tree.
113 template<typename NodeT>
114 class ZoneTree {
115 public:
116 ASMJIT_NONCOPYABLE(ZoneTree)
117
118 typedef NodeT Node;
119 NodeT* _root;
120
121 //! \name Construction & Destruction
122 //! \{
123
124 inline ZoneTree() noexcept
125 : _root(nullptr) {}
126
127 inline ZoneTree(ZoneTree&& other) noexcept
128 : _root(other._root) {}
129
130 inline void reset() noexcept { _root = nullptr; }
131
132 //! \}
133
134 //! \name Accessors
135 //! \{
136
137 inline bool empty() const noexcept { return _root == nullptr; }
138 inline NodeT* root() const noexcept { return static_cast<NodeT*>(_root); }
139
140 //! \}
141
142 //! \name Utilities
143 //! \{
144
145 inline void swap(ZoneTree& other) noexcept {
146 std::swap(_root, other._root);
147 }
148
149 template<typename CompareT = Support::Compare<Support::kSortAscending>>
150 void insert(NodeT* node, const CompareT& cmp = CompareT()) noexcept {
151 // Node to insert must not contain garbage.
152 ASMJIT_ASSERT(!node->hasLeft());
153 ASMJIT_ASSERT(!node->hasRight());
154 ASMJIT_ASSERT(!node->isRed());
155
156 if (!_root) {
157 _root = node;
158 return;
159 }
160
161 ZoneTreeNode head; // False root node,
162 head._setRight(_root); // having root on the right.
163
164 ZoneTreeNode* g = nullptr; // Grandparent.
165 ZoneTreeNode* p = nullptr; // Parent.
166 ZoneTreeNode* t = &head; // Iterator.
167 ZoneTreeNode* q = _root; // Query.
168
169 size_t dir = 0; // Direction for accessing child nodes.
170 size_t last = 0; // Not needed to initialize, but makes some tools happy.
171
172 node->_makeRed(); // New nodes are always red and violations fixed appropriately.
173
174 // Search down the tree.
175 for (;;) {
176 if (!q) {
177 // Insert new node at the bottom.
178 q = node;
179 p->_setChild(dir, node);
180 }
181 else if (_isValidRed(q->_getLeft()) && _isValidRed(q->_getRight())) {
182 // Color flip.
183 q->_makeRed();
184 q->_getLeft()->_makeBlack();
185 q->_getRight()->_makeBlack();
186 }
187
188 // Fix red violation.
189 if (_isValidRed(q) && _isValidRed(p))
190 t->_setChild(t->_getRight() == g,
191 q == p->_getChild(last) ? _singleRotate(g, !last) : _doubleRotate(g, !last));
192
193 // Stop if found.
194 if (q == node)
195 break;
196
197 last = dir;
198 dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
199
200 // Update helpers.
201 if (g) t = g;
202
203 g = p;
204 p = q;
205 q = q->_getChild(dir);
206 }
207
208 // Update root and make it black.
209 _root = static_cast<NodeT*>(head._getRight());
210 _root->_makeBlack();
211 }
212
213 //! Remove node from RBTree.
214 template<typename CompareT = Support::Compare<Support::kSortAscending>>
215 void remove(ZoneTreeNode* node, const CompareT& cmp = CompareT()) noexcept {
216 ZoneTreeNode head; // False root node,
217 head._setRight(_root); // having root on the right.
218
219 ZoneTreeNode* g = nullptr; // Grandparent.
220 ZoneTreeNode* p = nullptr; // Parent.
221 ZoneTreeNode* q = &head; // Query.
222
223 ZoneTreeNode* f = nullptr; // Found item.
224 ZoneTreeNode* gf = nullptr; // Found grandparent.
225 size_t dir = 1; // Direction (0 or 1).
226
227 // Search and push a red down.
228 while (q->hasChild(dir)) {
229 size_t last = dir;
230
231 // Update helpers.
232 g = p;
233 p = q;
234 q = q->_getChild(dir);
235 dir = cmp(*static_cast<NodeT*>(q), *static_cast<NodeT*>(node)) < 0;
236
237 // Save found node.
238 if (q == node) {
239 f = q;
240 gf = g;
241 }
242
243 // Push the red node down.
244 if (!_isValidRed(q) && !_isValidRed(q->_getChild(dir))) {
245 if (_isValidRed(q->_getChild(!dir))) {
246 ZoneTreeNode* child = _singleRotate(q, dir);
247 p->_setChild(last, child);
248 p = child;
249 }
250 else if (!_isValidRed(q->_getChild(!dir)) && p->_getChild(!last)) {
251 ZoneTreeNode* s = p->_getChild(!last);
252 if (!_isValidRed(s->_getChild(!last)) && !_isValidRed(s->_getChild(last))) {
253 // Color flip.
254 p->_makeBlack();
255 s->_makeRed();
256 q->_makeRed();
257 }
258 else {
259 size_t dir2 = g->_getRight() == p;
260 ZoneTreeNode* child = g->_getChild(dir2);
261
262 if (_isValidRed(s->_getChild(last))) {
263 child = _doubleRotate(p, last);
264 g->_setChild(dir2, child);
265 }
266 else if (_isValidRed(s->_getChild(!last))) {
267 child = _singleRotate(p, last);
268 g->_setChild(dir2, child);
269 }
270
271 // Ensure correct coloring.
272 q->_makeRed();
273 child->_makeRed();
274 child->_getLeft()->_makeBlack();
275 child->_getRight()->_makeBlack();
276 }
277 }
278 }
279 }
280
281 // Replace and remove.
282 ASMJIT_ASSERT(f != nullptr);
283 ASMJIT_ASSERT(f != &head);
284 ASMJIT_ASSERT(q != &head);
285
286 p->_setChild(p->_getRight() == q,
287 q->_getChild(q->_getLeft() == nullptr));
288
289 // NOTE: The original algorithm used a trick to just copy 'key/value' to
290 // `f` and mark `q` for deletion. But this is unacceptable here as we
291 // really want to destroy the passed `node`. So, we have to make sure that
292 // we have really removed `f` and not `q`.
293 if (f != q) {
294 ASMJIT_ASSERT(f != &head);
295 ASMJIT_ASSERT(f != gf);
296
297 ZoneTreeNode* n = gf ? gf : &head;
298 dir = (n == &head) ? 1 : cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
299
300 for (;;) {
301 if (n->_getChild(dir) == f) {
302 n->_setChild(dir, q);
303 // RAW copy, including the color.
304 q->_rbNodeData[0] = f->_rbNodeData[0];
305 q->_rbNodeData[1] = f->_rbNodeData[1];
306 break;
307 }
308
309 n = n->_getChild(dir);
310
311 // Cannot be true as we know that it must reach `f` in few iterations.
312 ASMJIT_ASSERT(n != nullptr);
313 dir = cmp(*static_cast<NodeT*>(n), *static_cast<NodeT*>(node)) < 0;
314 }
315 }
316
317 // Update root and make it black.
318 _root = static_cast<NodeT*>(head._getRight());
319 if (_root) _root->_makeBlack();
320 }
321
322 template<typename KeyT, typename CompareT = Support::Compare<Support::kSortAscending>>
323 ASMJIT_INLINE NodeT* get(const KeyT& key, const CompareT& cmp = CompareT()) const noexcept {
324 ZoneTreeNode* node = _root;
325 while (node) {
326 auto result = cmp(*static_cast<const NodeT*>(node), key);
327 if (result == 0) break;
328
329 // Go left or right depending on the `result`.
330 node = node->_getChild(result < 0);
331 }
332 return static_cast<NodeT*>(node);
333 }
334
335 //! \}
336
337 //! \cond INTERNAL
338 //! \name Internal
339 //! \{
340
341 static inline bool _isValidRed(ZoneTreeNode* node) noexcept { return ZoneTreeNode::_isValidRed(node); }
342
343 //! Single rotation.
344 static ASMJIT_INLINE ZoneTreeNode* _singleRotate(ZoneTreeNode* root, size_t dir) noexcept {
345 ZoneTreeNode* save = root->_getChild(!dir);
346 root->_setChild(!dir, save->_getChild(dir));
347 save->_setChild( dir, root);
348 root->_makeRed();
349 save->_makeBlack();
350 return save;
351 }
352
353 //! Double rotation.
354 static ASMJIT_INLINE ZoneTreeNode* _doubleRotate(ZoneTreeNode* root, size_t dir) noexcept {
355 root->_setChild(!dir, _singleRotate(root->_getChild(!dir), !dir));
356 return _singleRotate(root, dir);
357 }
358
359 //! \}
360 //! \endcond
361 };
362
363 //! \}
364
365 ASMJIT_END_NAMESPACE
366
367 #endif // _ASMJIT_CORE_ZONETREE_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #include "../core/support.h"
8 #include "../core/zone.h"
9 #include "../core/zonevector.h"
10
11 ASMJIT_BEGIN_NAMESPACE
12
13 // ============================================================================
14 // [asmjit::ZoneVectorBase - Helpers]
15 // ============================================================================
16
17 Error ZoneVectorBase::_grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
18 uint32_t threshold = Globals::kGrowThreshold / sizeOfT;
19 uint32_t capacity = _capacity;
20 uint32_t after = _size;
21
22 if (ASMJIT_UNLIKELY(std::numeric_limits<uint32_t>::max() - n < after))
23 return DebugUtils::errored(kErrorOutOfMemory);
24
25 after += n;
26 if (capacity >= after)
27 return kErrorOk;
28
29 // ZoneVector is used as an array to hold short-lived data structures used
30 // during code generation. The growing strategy is simple - use small capacity
31 // at the beginning (very good for ZoneAllocator) and then grow quicker to
32 // prevent successive reallocations.
33 if (capacity < 4)
34 capacity = 4;
35 else if (capacity < 8)
36 capacity = 8;
37 else if (capacity < 16)
38 capacity = 16;
39 else if (capacity < 64)
40 capacity = 64;
41 else if (capacity < 256)
42 capacity = 256;
43
44 while (capacity < after) {
45 if (capacity < threshold)
46 capacity *= 2;
47 else
48 capacity += threshold;
49 }
50
51 return _reserve(allocator, sizeOfT, capacity);
52 }
53
54 Error ZoneVectorBase::_reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
55 uint32_t oldCapacity = _capacity;
56 if (oldCapacity >= n) return kErrorOk;
57
58 uint32_t nBytes = n * sizeOfT;
59 if (ASMJIT_UNLIKELY(nBytes < n))
60 return DebugUtils::errored(kErrorOutOfMemory);
61
62 size_t allocatedBytes;
63 uint8_t* newData = static_cast<uint8_t*>(allocator->alloc(nBytes, allocatedBytes));
64
65 if (ASMJIT_UNLIKELY(!newData))
66 return DebugUtils::errored(kErrorOutOfMemory);
67
68 void* oldData = _data;
69 if (_size)
70 memcpy(newData, oldData, size_t(_size) * sizeOfT);
71
72 if (oldData)
73 allocator->release(oldData, size_t(oldCapacity) * sizeOfT);
74
75 _capacity = uint32_t(allocatedBytes / sizeOfT);
76 ASMJIT_ASSERT(_capacity >= n);
77
78 _data = newData;
79 return kErrorOk;
80 }
81
82 Error ZoneVectorBase::_resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept {
83 uint32_t size = _size;
84
85 if (_capacity < n) {
86 ASMJIT_PROPAGATE(_grow(allocator, sizeOfT, n - size));
87 ASMJIT_ASSERT(_capacity >= n);
88 }
89
90 if (size < n)
91 memset(static_cast<uint8_t*>(_data) + size_t(size) * sizeOfT, 0, size_t(n - size) * sizeOfT);
92
93 _size = n;
94 return kErrorOk;
95 }
96
97 // ============================================================================
98 // [asmjit::ZoneBitVector - Ops]
99 // ============================================================================
100
101 Error ZoneBitVector::copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept {
102 BitWord* data = _data;
103 uint32_t newSize = other.size();
104
105 if (!newSize) {
106 _size = 0;
107 return kErrorOk;
108 }
109
110 if (newSize > _capacity) {
111 // Realloc needed... Calculate the minimum capacity (in bytes) requied.
112 uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(newSize, kBitWordSizeInBits);
113 if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
114 return DebugUtils::errored(kErrorOutOfMemory);
115
116 // Normalize to bytes.
117 uint32_t minimumCapacity = minimumCapacityInBits / 8;
118 size_t allocatedCapacity;
119
120 BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
121 if (ASMJIT_UNLIKELY(!newData))
122 return DebugUtils::errored(kErrorOutOfMemory);
123
124 // `allocatedCapacity` now contains number in bytes, we need bits.
125 size_t allocatedCapacityInBits = allocatedCapacity * 8;
126
127 // Arithmetic overflow should normally not happen. If it happens we just
128 // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
129 // this value is still safe to be used to call `_allocator->release(...)`.
130 if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
131 allocatedCapacityInBits = minimumCapacityInBits;
132
133 if (data)
134 allocator->release(data, _capacity / 8);
135 data = newData;
136
137 _data = data;
138 _capacity = uint32_t(allocatedCapacityInBits);
139 }
140
141 _size = newSize;
142 _copyBits(data, other.data(), _wordsPerBits(newSize));
143
144 return kErrorOk;
145 }
146
147 Error ZoneBitVector::_resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept {
148 ASMJIT_ASSERT(idealCapacity >= newSize);
149
150 if (newSize <= _size) {
151 // The size after the resize is lesser than or equal to the current size.
152 uint32_t idx = newSize / kBitWordSizeInBits;
153 uint32_t bit = newSize % kBitWordSizeInBits;
154
155 // Just set all bits outside of the new size in the last word to zero.
156 // There is a case that there are not bits to set if `bit` is zero. This
157 // happens when `newSize` is a multiply of `kBitWordSizeInBits` like 64, 128,
158 // and so on. In that case don't change anything as that would mean settings
159 // bits outside of the `_size`.
160 if (bit)
161 _data[idx] &= (BitWord(1) << bit) - 1u;
162
163 _size = newSize;
164 return kErrorOk;
165 }
166
167 uint32_t oldSize = _size;
168 BitWord* data = _data;
169
170 if (newSize > _capacity) {
171 // Realloc needed, calculate the minimum capacity (in bytes) requied.
172 uint32_t minimumCapacityInBits = Support::alignUp<uint32_t>(idealCapacity, kBitWordSizeInBits);
173
174 if (ASMJIT_UNLIKELY(minimumCapacityInBits < newSize))
175 return DebugUtils::errored(kErrorOutOfMemory);
176
177 // Normalize to bytes.
178 uint32_t minimumCapacity = minimumCapacityInBits / 8;
179 size_t allocatedCapacity;
180
181 BitWord* newData = static_cast<BitWord*>(allocator->alloc(minimumCapacity, allocatedCapacity));
182 if (ASMJIT_UNLIKELY(!newData))
183 return DebugUtils::errored(kErrorOutOfMemory);
184
185 // `allocatedCapacity` now contains number in bytes, we need bits.
186 size_t allocatedCapacityInBits = allocatedCapacity * 8;
187
188 // Arithmetic overflow should normally not happen. If it happens we just
189 // change the `allocatedCapacityInBits` to the `minimumCapacityInBits` as
190 // this value is still safe to be used to call `_allocator->release(...)`.
191 if (ASMJIT_UNLIKELY(allocatedCapacityInBits < allocatedCapacity))
192 allocatedCapacityInBits = minimumCapacityInBits;
193
194 _copyBits(newData, data, _wordsPerBits(oldSize));
195
196 if (data)
197 allocator->release(data, _capacity / 8);
198 data = newData;
199
200 _data = data;
201 _capacity = uint32_t(allocatedCapacityInBits);
202 }
203
204 // Start (of the old size) and end (of the new size) bits
205 uint32_t idx = oldSize / kBitWordSizeInBits;
206 uint32_t startBit = oldSize % kBitWordSizeInBits;
207 uint32_t endBit = newSize % kBitWordSizeInBits;
208
209 // Set new bits to either 0 or 1. The `pattern` is used to set multiple
210 // bits per bit-word and contains either all zeros or all ones.
211 BitWord pattern = Support::bitMaskFromBool<BitWord>(newBitsValue);
212
213 // First initialize the last bit-word of the old size.
214 if (startBit) {
215 uint32_t nBits = 0;
216
217 if (idx == (newSize / kBitWordSizeInBits)) {
218 // The number of bit-words is the same after the resize. In that case
219 // we need to set only bits necessary in the current last bit-word.
220 ASMJIT_ASSERT(startBit < endBit);
221 nBits = endBit - startBit;
222 }
223 else {
224 // There is be more bit-words after the resize. In that case we don't
225 // have to be extra careful about the last bit-word of the old size.
226 nBits = kBitWordSizeInBits - startBit;
227 }
228
229 data[idx++] |= pattern << nBits;
230 }
231
232 // Initialize all bit-words after the last bit-word of the old size.
233 uint32_t endIdx = _wordsPerBits(newSize);
234 while (idx < endIdx) data[idx++] = pattern;
235
236 // Clear unused bits of the last bit-word.
237 if (endBit)
238 data[endIdx - 1] = pattern & ((BitWord(1) << endBit) - 1);
239
240 _size = newSize;
241 return kErrorOk;
242 }
243
244 Error ZoneBitVector::_append(ZoneAllocator* allocator, bool value) noexcept {
245 uint32_t kThreshold = Globals::kGrowThreshold * 8;
246 uint32_t newSize = _size + 1;
247 uint32_t idealCapacity = _capacity;
248
249 if (idealCapacity < 128)
250 idealCapacity = 128;
251 else if (idealCapacity <= kThreshold)
252 idealCapacity *= 2;
253 else
254 idealCapacity += kThreshold;
255
256 if (ASMJIT_UNLIKELY(idealCapacity < _capacity)) {
257 if (ASMJIT_UNLIKELY(_size == std::numeric_limits<uint32_t>::max()))
258 return DebugUtils::errored(kErrorOutOfMemory);
259 idealCapacity = newSize;
260 }
261
262 return _resize(allocator, newSize, idealCapacity, value);
263 }
264
265 // ============================================================================
266 // [asmjit::ZoneVector / ZoneBitVector - Unit]
267 // ============================================================================
268
269 #if defined(ASMJIT_TEST)
270 template<typename T>
271 static void test_zone_vector(ZoneAllocator* allocator, const char* typeName) {
272 int i;
273 int kMax = 100000;
274
275 ZoneVector<T> vec;
276
277 INFO("ZoneVector<%s> basic tests", typeName);
278 EXPECT(vec.append(allocator, 0) == kErrorOk);
279 EXPECT(vec.empty() == false);
280 EXPECT(vec.size() == 1);
281 EXPECT(vec.capacity() >= 1);
282 EXPECT(vec.indexOf(0) == 0);
283 EXPECT(vec.indexOf(-11) == Globals::kNotFound);
284
285 vec.clear();
286 EXPECT(vec.empty());
287 EXPECT(vec.size() == 0);
288 EXPECT(vec.indexOf(0) == Globals::kNotFound);
289
290 for (i = 0; i < kMax; i++) {
291 EXPECT(vec.append(allocator, T(i)) == kErrorOk);
292 }
293 EXPECT(vec.empty() == false);
294 EXPECT(vec.size() == uint32_t(kMax));
295 EXPECT(vec.indexOf(T(kMax - 1)) == uint32_t(kMax - 1));
296
297 vec.release(allocator);
298 }
299
300 static void test_zone_bitvector(ZoneAllocator* allocator) {
301 Zone zone(8096 - Zone::kBlockOverhead);
302
303 uint32_t i, count;
304 uint32_t kMaxCount = 100;
305
306 ZoneBitVector vec;
307 EXPECT(vec.empty());
308 EXPECT(vec.size() == 0);
309
310 INFO("ZoneBitVector::resize()");
311 for (count = 1; count < kMaxCount; count++) {
312 vec.clear();
313 EXPECT(vec.resize(allocator, count, false) == kErrorOk);
314 EXPECT(vec.size() == count);
315
316 for (i = 0; i < count; i++)
317 EXPECT(vec.bitAt(i) == false);
318
319 vec.clear();
320 EXPECT(vec.resize(allocator, count, true) == kErrorOk);
321 EXPECT(vec.size() == count);
322
323 for (i = 0; i < count; i++)
324 EXPECT(vec.bitAt(i) == true);
325 }
326
327 INFO("ZoneBitVector::fillBits() / clearBits()");
328 for (count = 1; count < kMaxCount; count += 2) {
329 vec.clear();
330 EXPECT(vec.resize(allocator, count) == kErrorOk);
331 EXPECT(vec.size() == count);
332
333 for (i = 0; i < (count + 1) / 2; i++) {
334 bool value = bool(i & 1);
335 if (value)
336 vec.fillBits(i, count - i * 2);
337 else
338 vec.clearBits(i, count - i * 2);
339 }
340
341 for (i = 0; i < count; i++) {
342 EXPECT(vec.bitAt(i) == bool(i & 1));
343 }
344 }
345 }
346
347 UNIT(zone_vector) {
348 Zone zone(8096 - Zone::kBlockOverhead);
349 ZoneAllocator allocator(&zone);
350
351 test_zone_vector<int>(&allocator, "int");
352 test_zone_vector<int64_t>(&allocator, "int64_t");
353 test_zone_bitvector(&allocator);
354 }
355 #endif
356
357 ASMJIT_END_NAMESPACE
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_ZONEVECTOR_H
7 #define _ASMJIT_CORE_ZONEVECTOR_H
8
9 #include "../core/support.h"
10 #include "../core/zone.h"
11
12 ASMJIT_BEGIN_NAMESPACE
13
14 //! \addtogroup asmjit_zone
15 //! \{
16
17 // ============================================================================
18 // [asmjit::ZoneVectorBase]
19 // ============================================================================
20
21 //! \cond INTERNAL
22
23 //! Base class implementing core `ZoneVector<>` functionality.
24 class ZoneVectorBase {
25 public:
26 ASMJIT_NONCOPYABLE(ZoneVectorBase)
27
28 // STL compatibility;
29 typedef uint32_t size_type;
30 typedef ptrdiff_t difference_type;
31
32 //! Vector data (untyped).
33 void* _data;
34 //! Size of the vector.
35 size_type _size;
36 //! Capacity of the vector.
37 size_type _capacity;
38
39 protected:
40 //! \name Construction & Destruction
41 //! \{
42
43 //! Creates a new instance of `ZoneVectorBase`.
44 inline ZoneVectorBase() noexcept
45 : _data(nullptr),
46 _size(0),
47 _capacity(0) {}
48
49 inline ZoneVectorBase(ZoneVectorBase&& other) noexcept
50 : _data(other._data),
51 _size(other._size),
52 _capacity(other._capacity) {}
53
54 //! \}
55
56 //! \cond INTERNAL
57 //! \name Internal
58 //! \{
59
60 inline void _release(ZoneAllocator* allocator, uint32_t sizeOfT) noexcept {
61 if (_data != nullptr) {
62 allocator->release(_data, _capacity * sizeOfT);
63 reset();
64 }
65 }
66
67 ASMJIT_API Error _grow(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
68 ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
69 ASMJIT_API Error _reserve(ZoneAllocator* allocator, uint32_t sizeOfT, uint32_t n) noexcept;
70
71 inline void _swap(ZoneVectorBase& other) noexcept {
72 std::swap(_data, other._data);
73 std::swap(_size, other._size);
74 std::swap(_capacity, other._capacity);
75 }
76
77 //! \}
78
79 public:
80 //! \name Accessors
81 //! \{
82
83 //! Tests whether the vector is empty.
84 inline bool empty() const noexcept { return _size == 0; }
85 //! Returns the vector size.
86 inline size_type size() const noexcept { return _size; }
87 //! Returns the vector capacity.
88 inline size_type capacity() const noexcept { return _capacity; }
89
90 //! \}
91
92 //! \name Utilities
93 //! \{
94
95 //! Makes the vector empty (won't change the capacity or data pointer).
96 inline void clear() noexcept { _size = 0; }
97 //! Resets the vector data and set its `size` to zero.
98 inline void reset() noexcept {
99 _data = nullptr;
100 _size = 0;
101 _capacity = 0;
102 }
103
104 //! Truncates the vector to at most `n` items.
105 inline void truncate(size_type n) noexcept {
106 _size = Support::min(_size, n);
107 }
108
109 //! Sets size of the vector to `n`. Used internally by some algorithms.
110 inline void _setSize(size_type n) noexcept {
111 ASMJIT_ASSERT(n <= _capacity);
112 _size = n;
113 }
114
115 //! \}
116 };
117
118 //! \endcond
119
120 // ============================================================================
121 // [asmjit::ZoneVector<T>]
122 // ============================================================================
123
124 //! Template used to store and manage array of Zone allocated data.
125 //!
126 //! This template has these advantages over other std::vector<>:
127 //! - Always non-copyable (designed to be non-copyable, we want it).
128 //! - Optimized for working only with POD types.
129 //! - Uses ZoneAllocator, thus small vectors are almost for free.
130 //! - Explicit allocation, ZoneAllocator is not part of the data.
131 template <typename T>
132 class ZoneVector : public ZoneVectorBase {
133 public:
134 ASMJIT_NONCOPYABLE(ZoneVector<T>)
135
136 // STL compatibility;
137 typedef T value_type;
138 typedef T* pointer;
139 typedef const T* const_pointer;
140 typedef T& reference;
141 typedef const T& const_reference;
142
143 typedef Support::Iterator<T> iterator;
144 typedef Support::Iterator<const T> const_iterator;
145 typedef Support::ReverseIterator<T> reverse_iterator;
146 typedef Support::ReverseIterator<const T> const_reverse_iterator;
147
148 //! \name Construction & Destruction
149 //! \{
150
151 inline ZoneVector() noexcept : ZoneVectorBase() {}
152 inline ZoneVector(ZoneVector&& other) noexcept : ZoneVector(other) {}
153
154 //! \}
155
156 //! \name Accessors
157 //! \{
158
159 //! Returns vector data.
160 inline T* data() noexcept { return static_cast<T*>(_data); }
161 //! Returns vector data (const)
162 inline const T* data() const noexcept { return static_cast<const T*>(_data); }
163
164 //! Returns item at the given index `i` (const).
165 inline const T& at(uint32_t i) const noexcept {
166 ASMJIT_ASSERT(i < _size);
167 return data()[i];
168 }
169
170 inline void _setEndPtr(T* p) noexcept {
171 ASMJIT_ASSERT(p >= data() && p <= data() + _capacity);
172 _setSize(uint32_t((uintptr_t)(p - data())));
173 }
174
175 //! \}
176
177 //! \name STL Compatibility (Iterators)
178 //! \{
179
180 inline iterator begin() noexcept { return iterator(data()); };
181 inline const_iterator begin() const noexcept { return const_iterator(data()); };
182
183 inline iterator end() noexcept { return iterator(data() + _size); };
184 inline const_iterator end() const noexcept { return const_iterator(data() + _size); };
185
186 inline reverse_iterator rbegin() noexcept { return reverse_iterator(data()); };
187 inline const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(data()); };
188
189 inline reverse_iterator rend() noexcept { return reverse_iterator(data() + _size); };
190 inline const_reverse_iterator rend() const noexcept { return const_reverse_iterator(data() + _size); };
191
192 inline const_iterator cbegin() const noexcept { return const_iterator(data()); };
193 inline const_iterator cend() const noexcept { return const_iterator(data() + _size); };
194
195 inline const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(data()); };
196 inline const_reverse_iterator crend() const noexcept { return const_reverse_iterator(data() + _size); };
197
198 //! \}
199
200 //! \name Utilities
201 //! \{
202
203 //! Swaps this vector with `other`.
204 inline void swap(ZoneVector<T>& other) noexcept { _swap(other); }
205
206 //! Prepends `item` to the vector.
207 inline Error prepend(ZoneAllocator* allocator, const T& item) noexcept {
208 if (ASMJIT_UNLIKELY(_size == _capacity))
209 ASMJIT_PROPAGATE(grow(allocator, 1));
210
211 ::memmove(static_cast<T*>(_data) + 1, _data, size_t(_size) * sizeof(T));
212 memcpy(_data, &item, sizeof(T));
213
214 _size++;
215 return kErrorOk;
216 }
217
218 //! Inserts an `item` at the specified `index`.
219 inline Error insert(ZoneAllocator* allocator, uint32_t index, const T& item) noexcept {
220 ASMJIT_ASSERT(index <= _size);
221
222 if (ASMJIT_UNLIKELY(_size == _capacity))
223 ASMJIT_PROPAGATE(grow(allocator, 1));
224
225 T* dst = static_cast<T*>(_data) + index;
226 ::memmove(dst + 1, dst, size_t(_size - index) * sizeof(T));
227 memcpy(dst, &item, sizeof(T));
228 _size++;
229
230 return kErrorOk;
231 }
232
233 //! Appends `item` to the vector.
234 inline Error append(ZoneAllocator* allocator, const T& item) noexcept {
235 if (ASMJIT_UNLIKELY(_size == _capacity))
236 ASMJIT_PROPAGATE(grow(allocator, 1));
237
238 memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
239 _size++;
240
241 return kErrorOk;
242 }
243
244 inline Error concat(ZoneAllocator* allocator, const ZoneVector<T>& other) noexcept {
245 uint32_t size = other._size;
246 if (_capacity - _size < size)
247 ASMJIT_PROPAGATE(grow(allocator, size));
248
249 if (size) {
250 memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
251 _size += size;
252 }
253
254 return kErrorOk;
255 }
256
257 //! Prepends `item` to the vector (unsafe case).
258 //!
259 //! Can only be used together with `willGrow()`. If `willGrow(N)` returns
260 //! `kErrorOk` then N elements can be added to the vector without checking
261 //! if there is a place for them. Used mostly internally.
262 inline void prependUnsafe(const T& item) noexcept {
263 ASMJIT_ASSERT(_size < _capacity);
264 T* data = static_cast<T*>(_data);
265
266 if (_size)
267 ::memmove(data + 1, data, size_t(_size) * sizeof(T));
268
269 memcpy(data, &item, sizeof(T));
270 _size++;
271 }
272
273 //! Append s`item` to the vector (unsafe case).
274 //!
275 //! Can only be used together with `willGrow()`. If `willGrow(N)` returns
276 //! `kErrorOk` then N elements can be added to the vector without checking
277 //! if there is a place for them. Used mostly internally.
278 inline void appendUnsafe(const T& item) noexcept {
279 ASMJIT_ASSERT(_size < _capacity);
280
281 memcpy(static_cast<T*>(_data) + _size, &item, sizeof(T));
282 _size++;
283 }
284
285 //! Concatenates all items of `other` at the end of the vector.
286 inline void concatUnsafe(const ZoneVector<T>& other) noexcept {
287 uint32_t size = other._size;
288 ASMJIT_ASSERT(_capacity - _size >= size);
289
290 if (size) {
291 memcpy(static_cast<T*>(_data) + _size, other._data, size_t(size) * sizeof(T));
292 _size += size;
293 }
294 }
295
296 //! Returns index of the given `val` or `Globals::kNotFound` if it doesn't exist.
297 inline uint32_t indexOf(const T& val) const noexcept {
298 const T* data = static_cast<const T*>(_data);
299 uint32_t size = _size;
300
301 for (uint32_t i = 0; i < size; i++)
302 if (data[i] == val)
303 return i;
304 return Globals::kNotFound;
305 }
306
307 //! Tests whether the vector contains `val`.
308 inline bool contains(const T& val) const noexcept {
309 return indexOf(val) != Globals::kNotFound;
310 }
311
312 //! Removes item at index `i`.
313 inline void removeAt(uint32_t i) noexcept {
314 ASMJIT_ASSERT(i < _size);
315
316 T* data = static_cast<T*>(_data) + i;
317 uint32_t size = --_size - i;
318
319 if (size)
320 ::memmove(data, data + 1, size_t(size) * sizeof(T));
321 }
322
323 inline T pop() noexcept {
324 ASMJIT_ASSERT(_size > 0);
325
326 uint32_t index = --_size;
327 return data()[index];
328 }
329
330 template<typename CompareT = Support::Compare<Support::kSortAscending>>
331 inline void sort(const CompareT& cmp = CompareT()) noexcept {
332 Support::qSort<T, CompareT>(data(), size(), cmp);
333 }
334
335 //! Returns item at index `i`.
336 inline T& operator[](uint32_t i) noexcept {
337 ASMJIT_ASSERT(i < _size);
338 return data()[i];
339 }
340
341 //! Returns item at index `i`.
342 inline const T& operator[](uint32_t i) const noexcept {
343 ASMJIT_ASSERT(i < _size);
344 return data()[i];
345 }
346
347 inline T& first() noexcept { return operator[](0); }
348 inline const T& first() const noexcept { return operator[](0); }
349
350 inline T& last() noexcept { return operator[](_size - 1); }
351 inline const T& last() const noexcept { return operator[](_size - 1); }
352
353 //! \}
354
355 //! \name Memory Management
356 //! \{
357
358 //! Releases the memory held by `ZoneVector<T>` back to the `allocator`.
359 inline void release(ZoneAllocator* allocator) noexcept {
360 _release(allocator, sizeof(T));
361 }
362
363 //! Called to grow the buffer to fit at least `n` elements more.
364 inline Error grow(ZoneAllocator* allocator, uint32_t n) noexcept {
365 return ZoneVectorBase::_grow(allocator, sizeof(T), n);
366 }
367
368 //! Resizes the vector to hold `n` elements.
369 //!
370 //! If `n` is greater than the current size then the additional elements'
371 //! content will be initialized to zero. If `n` is less than the current
372 //! size then the vector will be truncated to exactly `n` elements.
373 inline Error resize(ZoneAllocator* allocator, uint32_t n) noexcept {
374 return ZoneVectorBase::_resize(allocator, sizeof(T), n);
375 }
376
377 //! Reallocates the internal array to fit at least `n` items.
378 inline Error reserve(ZoneAllocator* allocator, uint32_t n) noexcept {
379 return n > _capacity ? ZoneVectorBase::_reserve(allocator, sizeof(T), n) : Error(kErrorOk);
380 }
381
382 inline Error willGrow(ZoneAllocator* allocator, uint32_t n = 1) noexcept {
383 return _capacity - _size < n ? grow(allocator, n) : Error(kErrorOk);
384 }
385
386 //! \}
387 };
388
389 // ============================================================================
390 // [asmjit::ZoneBitVector]
391 // ============================================================================
392
393 class ZoneBitVector {
394 public:
395 typedef Support::BitWord BitWord;
396 static constexpr uint32_t kBitWordSizeInBits = Support::kBitWordSizeInBits;
397
398 //! Bits.
399 BitWord* _data;
400 //! Size of the bit-vector (in bits).
401 uint32_t _size;
402 //! Capacity of the bit-vector (in bits).
403 uint32_t _capacity;
404
405 ASMJIT_NONCOPYABLE(ZoneBitVector)
406
407 //! \cond INTERNAL
408 //! \name Internal
409 //! \{
410
411 static inline uint32_t _wordsPerBits(uint32_t nBits) noexcept {
412 return ((nBits + kBitWordSizeInBits - 1) / kBitWordSizeInBits);
413 }
414
415 static inline void _zeroBits(BitWord* dst, uint32_t nBitWords) noexcept {
416 for (uint32_t i = 0; i < nBitWords; i++)
417 dst[i] = 0;
418 }
419
420 static inline void _fillBits(BitWord* dst, uint32_t nBitWords) noexcept {
421 for (uint32_t i = 0; i < nBitWords; i++)
422 dst[i] = ~BitWord(0);
423 }
424
425 static inline void _copyBits(BitWord* dst, const BitWord* src, uint32_t nBitWords) noexcept {
426 for (uint32_t i = 0; i < nBitWords; i++)
427 dst[i] = src[i];
428 }
429
430 //! \}
431 //! \endcond
432
433 //! \name Construction & Destruction
434 //! \{
435
436 inline ZoneBitVector() noexcept
437 : _data(nullptr),
438 _size(0),
439 _capacity(0) {}
440
441 inline ZoneBitVector(ZoneBitVector&& other) noexcept
442 : _data(other._data),
443 _size(other._size),
444 _capacity(other._capacity) {}
445
446 //! \}
447
448 //! \name Overloaded Operators
449 //! \{
450
451 inline bool operator==(const ZoneBitVector& other) const noexcept { return eq(other); }
452 inline bool operator!=(const ZoneBitVector& other) const noexcept { return !eq(other); }
453
454 //! \}
455
456 //! \name Accessors
457 //! \{
458
459 //! Tests whether the bit-vector is empty (has no bits).
460 inline bool empty() const noexcept { return _size == 0; }
461 //! Returns the size of this bit-vector (in bits).
462 inline uint32_t size() const noexcept { return _size; }
463 //! Returns the capacity of this bit-vector (in bits).
464 inline uint32_t capacity() const noexcept { return _capacity; }
465
466 //! Returns the size of the `BitWord[]` array in `BitWord` units.
467 inline uint32_t sizeInBitWords() const noexcept { return _wordsPerBits(_size); }
468 //! Returns the capacity of the `BitWord[]` array in `BitWord` units.
469 inline uint32_t capacityInBitWords() const noexcept { return _wordsPerBits(_capacity); }
470
471 //! REturns bit-vector data as `BitWord[]`.
472 inline BitWord* data() noexcept { return _data; }
473 //! \overload
474 inline const BitWord* data() const noexcept { return _data; }
475
476 //! \}
477
478 //! \name Utilities
479 //! \{
480
481 inline void swap(ZoneBitVector& other) noexcept {
482 std::swap(_data, other._data);
483 std::swap(_size, other._size);
484 std::swap(_capacity, other._capacity);
485 }
486
487 inline void clear() noexcept {
488 _size = 0;
489 }
490
491 inline void reset() noexcept {
492 _data = nullptr;
493 _size = 0;
494 _capacity = 0;
495 }
496
497 inline void truncate(uint32_t newSize) noexcept {
498 _size = Support::min(_size, newSize);
499 _clearUnusedBits();
500 }
501
502 inline bool bitAt(uint32_t index) const noexcept {
503 ASMJIT_ASSERT(index < _size);
504 return Support::bitVectorGetBit(_data, index);
505 }
506
507 inline void setBit(uint32_t index, bool value) noexcept {
508 ASMJIT_ASSERT(index < _size);
509 Support::bitVectorSetBit(_data, index, value);
510 }
511
512 inline void flipBit(uint32_t index) noexcept {
513 ASMJIT_ASSERT(index < _size);
514 Support::bitVectorFlipBit(_data, index);
515 }
516
517 ASMJIT_INLINE Error append(ZoneAllocator* allocator, bool value) noexcept {
518 uint32_t index = _size;
519 if (ASMJIT_UNLIKELY(index >= _capacity))
520 return _append(allocator, value);
521
522 uint32_t idx = index / kBitWordSizeInBits;
523 uint32_t bit = index % kBitWordSizeInBits;
524
525 if (bit == 0)
526 _data[idx] = BitWord(value) << bit;
527 else
528 _data[idx] |= BitWord(value) << bit;
529
530 _size++;
531 return kErrorOk;
532 }
533
534 ASMJIT_API Error copyFrom(ZoneAllocator* allocator, const ZoneBitVector& other) noexcept;
535
536 inline void clearAll() noexcept {
537 _zeroBits(_data, _wordsPerBits(_size));
538 }
539
540 inline void fillAll() noexcept {
541 _fillBits(_data, _wordsPerBits(_size));
542 _clearUnusedBits();
543 }
544
545 inline void clearBits(uint32_t start, uint32_t count) noexcept {
546 ASMJIT_ASSERT(start <= _size);
547 ASMJIT_ASSERT(_size - start >= count);
548
549 Support::bitVectorClear(_data, start, count);
550 }
551
552 inline void fillBits(uint32_t start, uint32_t count) noexcept {
553 ASMJIT_ASSERT(start <= _size);
554 ASMJIT_ASSERT(_size - start >= count);
555
556 Support::bitVectorFill(_data, start, count);
557 }
558
559 //! Performs a logical bitwise AND between bits specified in this array and bits
560 //! in `other`. If `other` has less bits than `this` then all remaining bits are
561 //! set to zero.
562 //!
563 //! \note The size of the BitVector is unaffected by this operation.
564 inline void and_(const ZoneBitVector& other) noexcept {
565 BitWord* dst = _data;
566 const BitWord* src = other._data;
567
568 uint32_t thisBitWordCount = sizeInBitWords();
569 uint32_t otherBitWordCount = other.sizeInBitWords();
570 uint32_t commonBitWordCount = Support::min(thisBitWordCount, otherBitWordCount);
571
572 uint32_t i = 0;
573 while (i < commonBitWordCount) {
574 dst[i] = dst[i] & src[i];
575 i++;
576 }
577
578 while (i < thisBitWordCount) {
579 dst[i] = 0;
580 i++;
581 }
582 }
583
584 //! Performs a logical bitwise AND between bits specified in this array and
585 //! negated bits in `other`. If `other` has less bits than `this` then all
586 //! remaining bits are kept intact.
587 //!
588 //! \note The size of the BitVector is unaffected by this operation.
589 inline void andNot(const ZoneBitVector& other) noexcept {
590 BitWord* dst = _data;
591 const BitWord* src = other._data;
592
593 uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
594 for (uint32_t i = 0; i < commonBitWordCount; i++)
595 dst[i] = dst[i] & ~src[i];
596 }
597
598 //! Performs a logical bitwise OP between bits specified in this array and bits
599 //! in `other`. If `other` has less bits than `this` then all remaining bits
600 //! are kept intact.
601 //!
602 //! \note The size of the BitVector is unaffected by this operation.
603 inline void or_(const ZoneBitVector& other) noexcept {
604 BitWord* dst = _data;
605 const BitWord* src = other._data;
606
607 uint32_t commonBitWordCount = _wordsPerBits(Support::min(_size, other._size));
608 for (uint32_t i = 0; i < commonBitWordCount; i++)
609 dst[i] = dst[i] | src[i];
610 _clearUnusedBits();
611 }
612
613 inline void _clearUnusedBits() noexcept {
614 uint32_t idx = _size / kBitWordSizeInBits;
615 uint32_t bit = _size % kBitWordSizeInBits;
616
617 if (!bit) return;
618 _data[idx] &= (BitWord(1) << bit) - 1u;
619 }
620
621 inline bool eq(const ZoneBitVector& other) const noexcept {
622 if (_size != other._size)
623 return false;
624
625 const BitWord* aData = _data;
626 const BitWord* bData = other._data;
627 uint32_t numBitWords = _wordsPerBits(_size);
628
629 for (uint32_t i = 0; i < numBitWords; i++)
630 if (aData[i] != bData[i])
631 return false;
632 return true;
633 }
634
635 //! \}
636
637 //! \name Memory Management
638 //! \{
639
640 inline void release(ZoneAllocator* allocator) noexcept {
641 if (!_data) return;
642 allocator->release(_data, _capacity / 8);
643 reset();
644 }
645
646 inline Error resize(ZoneAllocator* allocator, uint32_t newSize, bool newBitsValue = false) noexcept {
647 return _resize(allocator, newSize, newSize, newBitsValue);
648 }
649
650 ASMJIT_API Error _resize(ZoneAllocator* allocator, uint32_t newSize, uint32_t idealCapacity, bool newBitsValue) noexcept;
651 ASMJIT_API Error _append(ZoneAllocator* allocator, bool value) noexcept;
652
653 //! \}
654
655 //! \name Iterators
656 //! \{
657
658 class ForEachBitSet : public Support::BitVectorIterator<BitWord> {
659 public:
660 ASMJIT_INLINE explicit ForEachBitSet(const ZoneBitVector& bitVector) noexcept
661 : Support::BitVectorIterator<BitWord>(bitVector.data(), bitVector.sizeInBitWords()) {}
662 };
663
664 template<class Operator>
665 class ForEachBitOp : public Support::BitVectorOpIterator<BitWord, Operator> {
666 public:
667 ASMJIT_INLINE ForEachBitOp(const ZoneBitVector& a, const ZoneBitVector& b) noexcept
668 : Support::BitVectorOpIterator<BitWord, Operator>(a.data(), b.data(), a.sizeInBitWords()) {
669 ASMJIT_ASSERT(a.size() == b.size());
670 }
671 };
672
673 //! \}
674
675 };
676
677 //! \}
678
679 ASMJIT_END_NAMESPACE
680
681 #endif // _ASMJIT_CORE_ZONEVECTOR_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_CORE_H
7 #define _ASMJIT_CORE_H
8
9 //! \defgroup asmjit_core Core
10 //! \brief Core API.
11 //!
12 //! API that provides classes and functions not specific to any architecture.
13
14 //! \defgroup asmjit_builder Builder
15 //! \brief Builder API.
16 //!
17 //! Both Builder and Compiler are emitters that emit everything to a representation
18 //! that allows further processing. The code stored in such representation is
19 //! completely safe to be patched, simplified, reordered, obfuscated, removed,
20 //! injected, analyzed, or processed some other way. Each instruction, label,
21 //! directive, or other building block is stored as \ref BaseNode (or derived
22 //! class like \ref InstNode or \ref LabelNode) and contains all the information
23 //! necessary to pass that node later to the Assembler.
24
25 //! \defgroup asmjit_compiler Compiler
26 //! \brief Compiler API.
27 //!
28 //! Compiler tool is built on top of a \ref asmjit_builder API and adds register
29 //! allocation and support for defining and calling functions into it. At the
30 //! moment it's the easiest way to generate some code as most architecture and
31 //! OS specific stuff is properly abstracted, however, abstractions also mean
32 //! that not everything is possible with the Compiler.
33
34 //! \defgroup asmjit_func Function
35 //! \brief Function API.
36
37 //! \defgroup asmjit_jit JIT
38 //! \brief JIT API and Virtual Memory Management.
39
40 //! \defgroup asmjit_zone Zone
41 //! \brief Zone allocator and zone allocated containers.
42
43 //! \defgroup asmjit_support Support
44 //! \brief Support API.
45
46 //! \cond INTERNAL
47 //! \defgroup asmjit_ra RA
48 //! \brief Register allocator internals.
49 //! \endcond
50
51 #include "./core/globals.h"
52
53 #include "./core/arch.h"
54 #include "./core/assembler.h"
55 #include "./core/builder.h"
56 #include "./core/callconv.h"
57 #include "./core/codeholder.h"
58 #include "./core/compiler.h"
59 #include "./core/constpool.h"
60 #include "./core/cpuinfo.h"
61 #include "./core/datatypes.h"
62 #include "./core/emitter.h"
63 #include "./core/features.h"
64 #include "./core/func.h"
65 #include "./core/inst.h"
66 #include "./core/jitallocator.h"
67 #include "./core/jitruntime.h"
68 #include "./core/logging.h"
69 #include "./core/operand.h"
70 #include "./core/osutils.h"
71 #include "./core/string.h"
72 #include "./core/support.h"
73 #include "./core/target.h"
74 #include "./core/type.h"
75 #include "./core/virtmem.h"
76 #include "./core/zone.h"
77 #include "./core/zonehash.h"
78 #include "./core/zonelist.h"
79 #include "./core/zonetree.h"
80 #include "./core/zonestack.h"
81 #include "./core/zonestring.h"
82 #include "./core/zonevector.h"
83
84 #endif // _ASMJIT_CORE_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifdef ASMJIT_BUILD_X86
8
9 #include "../core/codebufferwriter_p.h"
10 #include "../core/cpuinfo.h"
11 #include "../core/logging.h"
12 #include "../core/misc_p.h"
13 #include "../core/support.h"
14 #include "../x86/x86assembler.h"
15 #include "../x86/x86instdb_p.h"
16 #include "../x86/x86logging_p.h"
17 #include "../x86/x86opcode_p.h"
18
19 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
20
21 // ============================================================================
22 // [TypeDefs]
23 // ============================================================================
24
25 typedef Support::FastUInt8 FastUInt8;
26
27 // ============================================================================
28 // [Constants]
29 // ============================================================================
30
31 //! X86 bytes used to encode important prefixes.
32 enum X86Byte : uint32_t {
33 //! 1-byte REX prefix mask.
34 kX86ByteRex = 0x40,
35
36 //! 1-byte REX.W component.
37 kX86ByteRexW = 0x08,
38
39 kX86ByteInvalidRex = 0x80,
40
41 //! 2-byte VEX prefix:
42 //! - `[0]` - `0xC5`.
43 //! - `[1]` - `RvvvvLpp`.
44 kX86ByteVex2 = 0xC5,
45
46 //! 3-byte VEX prefix:
47 //! - `[0]` - `0xC4`.
48 //! - `[1]` - `RXBmmmmm`.
49 //! - `[2]` - `WvvvvLpp`.
50 kX86ByteVex3 = 0xC4,
51
52 //! 3-byte XOP prefix:
53 //! - `[0]` - `0x8F`.
54 //! - `[1]` - `RXBmmmmm`.
55 //! - `[2]` - `WvvvvLpp`.
56 kX86ByteXop3 = 0x8F,
57
58 //! 4-byte EVEX prefix:
59 //! - `[0]` - `0x62`.
60 //! - `[1]` - Payload0 or `P[ 7: 0]` - `[R X B R' 0 0 m m]`.
61 //! - `[2]` - Payload1 or `P[15: 8]` - `[W v v v v 1 p p]`.
62 //! - `[3]` - Payload2 or `P[23:16]` - `[z L' L b V' a a a]`.
63 //!
64 //! Payload:
65 //! - `P[ 1: 0]` - OPCODE: EVEX.mmmmm, only lowest 2 bits [1:0] used.
66 //! - `P[ 3: 2]` - ______: Must be 0.
67 //! - `P[ 4]` - REG-ID: EVEX.R' - 5th bit of 'RRRRR'.
68 //! - `P[ 5]` - REG-ID: EVEX.B - 4th bit of 'BBBBB'.
69 //! - `P[ 6]` - REG-ID: EVEX.X - 5th bit of 'BBBBB' or 4th bit of 'XXXX' (with SIB).
70 //! - `P[ 7]` - REG-ID: EVEX.R - 4th bit of 'RRRRR'.
71 //! - `P[ 9: 8]` - OPCODE: EVEX.pp.
72 //! - `P[ 10]` - ______: Must be 1.
73 //! - `P[14:11]` - REG-ID: 4 bits of 'VVVV'.
74 //! - `P[ 15]` - OPCODE: EVEX.W.
75 //! - `P[18:16]` - REG-ID: K register k0...k7 (Merging/Zeroing Vector Ops).
76 //! - `P[ 19]` - REG-ID: 5th bit of 'VVVVV'.
77 //! - `P[ 20]` - OPCODE: Broadcast/Rounding Control/SAE bit.
78 //! - `P[22.21]` - OPCODE: Vector Length (L' and L) / Rounding Control.
79 //! - `P[ 23]` - OPCODE: Zeroing/Merging.
80 kX86ByteEvex = 0x62
81 };
82
83 // AsmJit specific (used to encode VVVVV field in XOP/VEX/EVEX).
84 enum VexVVVVV : uint32_t {
85 kVexVVVVVShift = 7,
86 kVexVVVVVMask = 0x1F << kVexVVVVVShift
87 };
88
89 //! Instruction 2-byte/3-byte opcode prefix definition.
90 struct X86OpcodeMM {
91 uint8_t size;
92 uint8_t data[3];
93 };
94
95 //! Mandatory prefixes used to encode legacy [66, F3, F2] or [9B] byte.
96 static const uint8_t x86OpcodePP[8] = { 0x00, 0x66, 0xF3, 0xF2, 0x00, 0x00, 0x00, 0x9B };
97
98 //! Instruction 2-byte/3-byte opcode prefix data.
99 static const X86OpcodeMM x86OpcodeMM[] = {
100 { 0, { 0x00, 0x00, 0 } }, // #00 (0b0000).
101 { 1, { 0x0F, 0x00, 0 } }, // #01 (0b0001).
102 { 2, { 0x0F, 0x38, 0 } }, // #02 (0b0010).
103 { 2, { 0x0F, 0x3A, 0 } }, // #03 (0b0011).
104 { 2, { 0x0F, 0x01, 0 } }, // #04 (0b0100).
105 { 0, { 0x00, 0x00, 0 } }, // #05 (0b0101).
106 { 0, { 0x00, 0x00, 0 } }, // #06 (0b0110).
107 { 0, { 0x00, 0x00, 0 } }, // #07 (0b0111).
108 { 0, { 0x00, 0x00, 0 } }, // #08 (0b1000).
109 { 0, { 0x00, 0x00, 0 } }, // #09 (0b1001).
110 { 0, { 0x00, 0x00, 0 } }, // #0A (0b1010).
111 { 0, { 0x00, 0x00, 0 } }, // #0B (0b1011).
112 { 0, { 0x00, 0x00, 0 } }, // #0C (0b1100).
113 { 0, { 0x00, 0x00, 0 } }, // #0D (0b1101).
114 { 0, { 0x00, 0x00, 0 } }, // #0E (0b1110).
115 { 0, { 0x00, 0x00, 0 } } // #0F (0b1111).
116 };
117
118 static const uint8_t x86SegmentPrefix[8] = {
119 0x00, // None.
120 0x26, // ES.
121 0x2E, // CS.
122 0x36, // SS.
123 0x3E, // DS.
124 0x64, // FS.
125 0x65 // GS.
126 };
127
128 static const uint32_t x86OpcodePushSReg[8] = {
129 Opcode::k000000 | 0x00, // None.
130 Opcode::k000000 | 0x06, // Push ES.
131 Opcode::k000000 | 0x0E, // Push CS.
132 Opcode::k000000 | 0x16, // Push SS.
133 Opcode::k000000 | 0x1E, // Push DS.
134 Opcode::k000F00 | 0xA0, // Push FS.
135 Opcode::k000F00 | 0xA8 // Push GS.
136 };
137
138 static const uint32_t x86OpcodePopSReg[8] = {
139 Opcode::k000000 | 0x00, // None.
140 Opcode::k000000 | 0x07, // Pop ES.
141 Opcode::k000000 | 0x00, // Pop CS.
142 Opcode::k000000 | 0x17, // Pop SS.
143 Opcode::k000000 | 0x1F, // Pop DS.
144 Opcode::k000F00 | 0xA1, // Pop FS.
145 Opcode::k000F00 | 0xA9 // Pop GS.
146 };
147
148 // ============================================================================
149 // [asmjit::X86MemInfo | X86VEXPrefix | X86LLByRegType | X86CDisp8Table]
150 // ============================================================================
151
152 //! Memory operand's info bits.
153 //!
154 //! A lookup table that contains various information based on the BASE and INDEX
155 //! information of a memory operand. This is much better and safer than playing
156 //! with IFs in the code and can check for errors must faster and better.
157 enum X86MemInfo_Enum {
158 kX86MemInfo_0 = 0x00,
159
160 kX86MemInfo_BaseGp = 0x01, //!< Has BASE reg, REX.B can be 1, compatible with REX.B byte.
161 kX86MemInfo_Index = 0x02, //!< Has INDEX reg, REX.X can be 1, compatible with REX.X byte.
162
163 kX86MemInfo_BaseLabel = 0x10, //!< Base is Label.
164 kX86MemInfo_BaseRip = 0x20, //!< Base is RIP.
165
166 kX86MemInfo_67H_X86 = 0x40, //!< Address-size override in 32-bit mode.
167 kX86MemInfo_67H_X64 = 0x80, //!< Address-size override in 64-bit mode.
168 kX86MemInfo_67H_Mask = 0xC0 //!< Contains all address-size override bits.
169 };
170
171 template<uint32_t X>
172 struct X86MemInfo_T {
173 enum {
174 B = (X ) & 0x1F,
175 I = (X >> 5) & 0x1F,
176
177 kBase = (B >= Reg::kTypeGpw && B <= Reg::kTypeGpq ) ? kX86MemInfo_BaseGp :
178 (B == Reg::kTypeRip ) ? kX86MemInfo_BaseRip :
179 (B == Label::kLabelTag ) ? kX86MemInfo_BaseLabel : 0,
180
181 kIndex = (I >= Reg::kTypeGpw && I <= Reg::kTypeGpq ) ? kX86MemInfo_Index :
182 (I >= Reg::kTypeXmm && I <= Reg::kTypeZmm ) ? kX86MemInfo_Index : 0,
183
184 k67H = (B == Reg::kTypeGpw && I == Reg::kTypeNone) ? kX86MemInfo_67H_X86 :
185 (B == Reg::kTypeGpd && I == Reg::kTypeNone) ? kX86MemInfo_67H_X64 :
186 (B == Reg::kTypeNone && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
187 (B == Reg::kTypeNone && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 :
188 (B == Reg::kTypeGpw && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
189 (B == Reg::kTypeGpd && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 :
190 (B == Reg::kTypeGpw && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X86 :
191 (B == Reg::kTypeGpd && I == Reg::kTypeXmm ) ? kX86MemInfo_67H_X64 :
192 (B == Reg::kTypeGpw && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X86 :
193 (B == Reg::kTypeGpd && I == Reg::kTypeYmm ) ? kX86MemInfo_67H_X64 :
194 (B == Reg::kTypeGpw && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X86 :
195 (B == Reg::kTypeGpd && I == Reg::kTypeZmm ) ? kX86MemInfo_67H_X64 :
196 (B == Label::kLabelTag && I == Reg::kTypeGpw ) ? kX86MemInfo_67H_X86 :
197 (B == Label::kLabelTag && I == Reg::kTypeGpd ) ? kX86MemInfo_67H_X64 : 0,
198
199 kValue = kBase | kIndex | k67H | 0x04 | 0x08
200 };
201 };
202
203 // The result stored in the LUT is a combination of
204 // - 67H - Address override prefix - depends on BASE+INDEX register types and
205 // the target architecture.
206 // - REX - A possible combination of REX.[B|X|R|W] bits in REX prefix where
207 // REX.B and REX.X are possibly masked out, but REX.R and REX.W are
208 // kept as is.
209 #define VALUE(X) X86MemInfo_T<X>::kValue
210 static const uint8_t x86MemInfo[] = { ASMJIT_LOOKUP_TABLE_1024(VALUE, 0) };
211 #undef VALUE
212
213 // VEX3 or XOP xor bits applied to the opcode before emitted. The index to this
214 // table is 'mmmmm' value, which contains all we need. This is only used by a
215 // 3 BYTE VEX and XOP prefixes, 2 BYTE VEX prefix is handled differently. The
216 // idea is to minimize the difference between VEX3 vs XOP when encoding VEX
217 // or XOP instruction. This should minimize the code required to emit such
218 // instructions and should also make it faster as we don't need any branch to
219 // decide between VEX3 vs XOP.
220 // ____ ___
221 // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP]
222 #define VALUE(X) ((X & 0x08) ? kX86ByteXop3 : kX86ByteVex3) | (0xF << 19) | (0x7 << 13)
223 static const uint32_t x86VEXPrefix[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
224 #undef VALUE
225
226 // Table that contains LL opcode field addressed by a register size / 16. It's
227 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
228 // respectively.
229 #define VALUE(X) (X & (64 >> 4)) ? Opcode::kLL_2 : \
230 (X & (32 >> 4)) ? Opcode::kLL_1 : Opcode::kLL_0
231 static const uint32_t x86LLBySizeDiv16[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
232 #undef VALUE
233
234 // Table that contains LL opcode field addressed by a register size / 16. It's
235 // used to propagate L.256 or L.512 when YMM or ZMM registers are used,
236 // respectively.
237 #define VALUE(X) X == Reg::kTypeZmm ? Opcode::kLL_2 : \
238 X == Reg::kTypeYmm ? Opcode::kLL_1 : Opcode::kLL_0
239 static const uint32_t x86LLByRegType[] = { ASMJIT_LOOKUP_TABLE_16(VALUE, 0) };
240 #undef VALUE
241
242 // Table that contains a scale (shift left) based on 'TTWLL' field and
243 // the instruction's tuple-type (TT) field. The scale is then applied to
244 // the BASE-N stored in each opcode to calculate the final compressed
245 // displacement used by all EVEX encoded instructions.
246 template<uint32_t X>
247 struct X86CDisp8SHL_T {
248 enum {
249 TT = (X >> 3) << Opcode::kCDTT_Shift,
250 LL = (X >> 0) & 0x3,
251 W = (X >> 2) & 0x1,
252
253 kValue = (TT == Opcode::kCDTT_None ? ((LL==0) ? 0 : (LL==1) ? 0 : 0 ) :
254 TT == Opcode::kCDTT_ByLL ? ((LL==0) ? 0 : (LL==1) ? 1 : 2 ) :
255 TT == Opcode::kCDTT_T1W ? ((LL==0) ? W : (LL==1) ? 1+W : 2+W) :
256 TT == Opcode::kCDTT_DUP ? ((LL==0) ? 0 : (LL==1) ? 2 : 3 ) : 0) << Opcode::kCDSHL_Shift
257 };
258 };
259
260 #define VALUE(X) X86CDisp8SHL_T<X>::kValue
261 static const uint32_t x86CDisp8SHL[] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
262 #undef VALUE
263
264 // Table that contains MOD byte of a 16-bit [BASE + disp] address.
265 // 0xFF == Invalid.
266 static const uint8_t x86Mod16BaseTable[8] = {
267 0xFF, // AX -> N/A.
268 0xFF, // CX -> N/A.
269 0xFF, // DX -> N/A.
270 0x07, // BX -> 111.
271 0xFF, // SP -> N/A.
272 0x06, // BP -> 110.
273 0x04, // SI -> 100.
274 0x05 // DI -> 101.
275 };
276
277 // Table that contains MOD byte of a 16-bit [BASE + INDEX + disp] combination.
278 // 0xFF == Invalid.
279 template<uint32_t X>
280 struct X86Mod16BaseIndexTable_T {
281 enum {
282 B = X >> 3,
283 I = X & 0x7,
284
285 kValue = ((B == Gp::kIdBx && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBx)) ? 0x00 :
286 ((B == Gp::kIdBx && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBx)) ? 0x01 :
287 ((B == Gp::kIdBp && I == Gp::kIdSi) || (B == Gp::kIdSi && I == Gp::kIdBp)) ? 0x02 :
288 ((B == Gp::kIdBp && I == Gp::kIdDi) || (B == Gp::kIdDi && I == Gp::kIdBp)) ? 0x03 : 0xFF
289 };
290 };
291
292 #define VALUE(X) X86Mod16BaseIndexTable_T<X>::kValue
293 static const uint8_t x86Mod16BaseIndexTable[] = { ASMJIT_LOOKUP_TABLE_64(VALUE, 0) };
294 #undef VALUE
295
296 // ============================================================================
297 // [asmjit::x86::Assembler - Helpers]
298 // ============================================================================
299
300 static ASMJIT_INLINE bool x86IsJmpOrCall(uint32_t instId) noexcept {
301 return instId == Inst::kIdJmp || instId == Inst::kIdCall;
302 }
303
304 static ASMJIT_INLINE bool x86IsImplicitMem(const Operand_& op, uint32_t base) noexcept {
305 return op.isMem() && op.as<Mem>().baseId() == base && !op.as<Mem>().hasOffset();
306 }
307
308 //! Combine `regId` and `vvvvvId` into a single value (used by AVX and AVX-512).
309 static ASMJIT_INLINE uint32_t x86PackRegAndVvvvv(uint32_t regId, uint32_t vvvvvId) noexcept {
310 return regId + (vvvvvId << kVexVVVVVShift);
311 }
312
313 static ASMJIT_INLINE uint32_t x86OpcodeLByVMem(const Operand_& op) noexcept {
314 return x86LLByRegType[op.as<Mem>().indexType()];
315 }
316
317 static ASMJIT_INLINE uint32_t x86OpcodeLBySize(uint32_t size) noexcept {
318 return x86LLBySizeDiv16[size / 16];
319 }
320
321 //! Encode MOD byte.
322 static ASMJIT_INLINE uint32_t x86EncodeMod(uint32_t m, uint32_t o, uint32_t rm) noexcept {
323 ASMJIT_ASSERT(m <= 3);
324 ASMJIT_ASSERT(o <= 7);
325 ASMJIT_ASSERT(rm <= 7);
326 return (m << 6) + (o << 3) + rm;
327 }
328
329 //! Encode SIB byte.
330 static ASMJIT_INLINE uint32_t x86EncodeSib(uint32_t s, uint32_t i, uint32_t b) noexcept {
331 ASMJIT_ASSERT(s <= 3);
332 ASMJIT_ASSERT(i <= 7);
333 ASMJIT_ASSERT(b <= 7);
334 return (s << 6) + (i << 3) + b;
335 }
336
337 static ASMJIT_INLINE bool x86IsRexInvalid(uint32_t rex) noexcept {
338 // Validates the following possibilities:
339 // REX == 0x00 -> OKAY (X86_32 / X86_64).
340 // REX == 0x40-0x4F -> OKAY (X86_64).
341 // REX == 0x80 -> OKAY (X86_32 mode, rex prefix not used).
342 // REX == 0x81-0xCF -> BAD (X86_32 mode, rex prefix used).
343 return rex > kX86ByteInvalidRex;
344 }
345
346 template<typename T>
347 static constexpr T x86SignExtendI32(T imm) noexcept { return T(int64_t(int32_t(imm & T(0xFFFFFFFF)))); }
348
349 static ASMJIT_INLINE uint32_t x86AltOpcodeOf(const InstDB::InstInfo* info) noexcept {
350 return InstDB::_altOpcodeTable[info->_altOpcodeIndex];
351 }
352
353 // ============================================================================
354 // [asmjit::X86BufferWriter]
355 // ============================================================================
356
357 class X86BufferWriter : public CodeBufferWriter {
358 public:
359 ASMJIT_INLINE explicit X86BufferWriter(Assembler* a) noexcept
360 : CodeBufferWriter(a) {}
361
362 ASMJIT_INLINE void emitPP(uint32_t opcode) noexcept {
363 uint32_t ppIndex = (opcode >> Opcode::kPP_Shift) &
364 (Opcode::kPP_FPUMask >> Opcode::kPP_Shift) ;
365 emit8If(x86OpcodePP[ppIndex], ppIndex != 0);
366 }
367
368 ASMJIT_INLINE void emitMMAndOpcode(uint32_t opcode) noexcept {
369 uint32_t mmIndex = (opcode & Opcode::kMM_Mask) >> Opcode::kMM_Shift;
370 const X86OpcodeMM& mmCode = x86OpcodeMM[mmIndex];
371
372 emit8If(mmCode.data[0], mmCode.size > 0);
373 emit8If(mmCode.data[1], mmCode.size > 1);
374 emit8(opcode);
375 }
376
377 ASMJIT_INLINE void emitSegmentOverride(uint32_t segmentId) noexcept {
378 ASMJIT_ASSERT(segmentId < ASMJIT_ARRAY_SIZE(x86SegmentPrefix));
379
380 FastUInt8 prefix = x86SegmentPrefix[segmentId];
381 emit8If(prefix, prefix != 0);
382 }
383
384 template<typename CondT>
385 ASMJIT_INLINE void emitAddressOverride(CondT condition) noexcept {
386 emit8If(0x67, condition);
387 }
388
389 ASMJIT_INLINE void emitImmByteOrDWord(uint64_t immValue, FastUInt8 immSize) noexcept {
390 if (!immSize)
391 return;
392
393 ASMJIT_ASSERT(immSize == 1 || immSize == 4);
394
395 #if ASMJIT_ARCH_BITS >= 64
396 uint64_t imm = uint64_t(immValue);
397 #else
398 uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
399 #endif
400
401 // Many instructions just use a single byte immediate, so make it fast.
402 emit8(imm & 0xFFu);
403 if (immSize == 1) return;
404
405 imm >>= 8;
406 emit8(imm & 0xFFu);
407 imm >>= 8;
408 emit8(imm & 0xFFu);
409 imm >>= 8;
410 emit8(imm & 0xFFu);
411 }
412
413 ASMJIT_INLINE void emitImmediate(uint64_t immValue, FastUInt8 immSize) noexcept {
414 if (!immSize)
415 return;
416
417 #if ASMJIT_ARCH_BITS >= 64
418 uint64_t imm = uint64_t(immValue);
419 #else
420 uint32_t imm = uint32_t(immValue & 0xFFFFFFFFu);
421 #endif
422
423 // Many instructions just use a single byte immediate, so make it fast.
424 emit8(imm & 0xFFu);
425 if (--immSize == 0) return;
426
427 imm >>= 8;
428 emit8(imm & 0xFFu);
429 if (--immSize == 0) return;
430
431 imm >>= 8;
432 emit8(imm & 0xFFu);
433 if (--immSize == 0) return;
434
435 imm >>= 8;
436 emit8(imm & 0xFFu);
437 if (--immSize == 0) return;
438
439 // Can be 1, 2, 4 or 8 bytes, this handles the remaining high DWORD of an 8-byte immediate.
440 ASMJIT_ASSERT(immSize == 4);
441
442 #if ASMJIT_ARCH_BITS >= 64
443 imm >>= 8;
444 emit32uLE(uint32_t(imm));
445 #else
446 emit32uLE(uint32_t((uint64_t(immValue) >> 32) & 0xFFFFFFFFu));
447 #endif
448 }
449 };
450
451 // If the operand is BPL|SPL|SIL|DIL|R8B-15B
452 // - Force REX prefix
453 // If the operand is AH|BH|CH|DH
454 // - patch its index from 0..3 to 4..7 as encoded by X86.
455 // - Disallow REX prefix.
456 #define FIXUP_GPB(REG_OP, REG_ID) \
457 do { \
458 if (!static_cast<const Gp&>(REG_OP).isGpbHi()) { \
459 options |= (REG_ID >= 4) ? uint32_t(Inst::kOptionRex) \
460 : uint32_t(0); \
461 } \
462 else { \
463 options |= Inst::_kOptionInvalidRex; \
464 REG_ID += 4; \
465 } \
466 } while (0)
467
468 #define ENC_OPS1(OP0) ((Operand::kOp##OP0))
469 #define ENC_OPS2(OP0, OP1) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3))
470 #define ENC_OPS3(OP0, OP1, OP2) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6))
471 #define ENC_OPS4(OP0, OP1, OP2, OP3) ((Operand::kOp##OP0) + ((Operand::kOp##OP1) << 3) + ((Operand::kOp##OP2) << 6) + ((Operand::kOp##OP3) << 9))
472
473 // ============================================================================
474 // [asmjit::x86::Assembler - Movabs Heuristics]
475 // ============================================================================
476
477 static ASMJIT_INLINE bool x86GetMovAbsInstSize64Bit(uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
478 uint32_t segmentPrefixSize = rmRel.segmentId() != 0;
479 uint32_t _66hPrefixSize = regSize == 2;
480 uint32_t rexPrefixSize = (regSize == 8) || ((options & Inst::kOptionRex) != 0);
481 uint32_t opCodeByteSize = 1;
482 uint32_t immediateSize = 8;
483
484 return segmentPrefixSize + _66hPrefixSize + rexPrefixSize + opCodeByteSize + immediateSize;
485 }
486
487 static ASMJIT_INLINE uint32_t x86GetMovAbsAddrType(Assembler* self, X86BufferWriter& writer, uint32_t regSize, uint32_t options, const Mem& rmRel) noexcept {
488 uint32_t addrType = rmRel.addrType();
489 int64_t addrValue = rmRel.offset();
490
491 if (addrType == BaseMem::kAddrTypeDefault && !(options & Inst::kOptionModMR)) {
492 if (self->is64Bit()) {
493 uint64_t baseAddress = self->codeInfo().baseAddress();
494 if (baseAddress != Globals::kNoBaseAddress && !rmRel.hasSegment()) {
495 uint32_t instructionSize = x86GetMovAbsInstSize64Bit(regSize, options, rmRel);
496 uint64_t virtualOffset = uint64_t(writer.offsetFrom(self->_bufferData));
497 uint64_t rip64 = baseAddress + self->_section->offset() + virtualOffset + instructionSize;
498 uint64_t rel64 = uint64_t(addrValue) - rip64;
499
500 if (!Support::isInt32(int64_t(rel64)))
501 addrType = BaseMem::kAddrTypeAbs;
502 }
503 else {
504 if (!Support::isInt32(addrValue))
505 addrType = BaseMem::kAddrTypeAbs;
506 }
507 }
508 else {
509 addrType = BaseMem::kAddrTypeAbs;
510 }
511 }
512
513 return addrType;
514 }
515
516 // ============================================================================
517 // [asmjit::x86::Assembler - Construction / Destruction]
518 // ============================================================================
519
520 Assembler::Assembler(CodeHolder* code) noexcept : BaseAssembler() {
521 if (code)
522 code->attach(this);
523 }
524 Assembler::~Assembler() noexcept {}
525
526 // ============================================================================
527 // [asmjit::x86::Assembler - Emit (Low-Level)]
528 // ============================================================================
529
530 ASMJIT_FAVOR_SPEED Error Assembler::_emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) {
531 constexpr uint32_t kVSHR_W = Opcode::kW_Shift - 23;
532 constexpr uint32_t kVSHR_PP = Opcode::kPP_Shift - 16;
533 constexpr uint32_t kVSHR_PP_EW = Opcode::kPP_Shift - 16;
534
535 constexpr uint32_t kRequiresSpecialHandling =
536 Inst::kOptionReserved | // Logging/Validation/Error.
537 Inst::kOptionRep | // REP/REPE prefix.
538 Inst::kOptionRepne | // REPNE prefix.
539 Inst::kOptionLock | // LOCK prefix.
540 Inst::kOptionXAcquire | // XACQUIRE prefix.
541 Inst::kOptionXRelease ; // XRELEASE prefix.
542
543 Error err;
544
545 Opcode opcode; // Instruction opcode.
546 uint32_t options; // Instruction options.
547 uint32_t isign3; // A combined signature of first 3 operands.
548
549 const Operand_* rmRel; // Memory operand or operand that holds Label|Imm.
550 uint32_t rmInfo; // Memory operand's info based on x86MemInfo.
551 uint32_t rbReg; // Memory base or modRM register.
552 uint32_t rxReg; // Memory index register.
553 uint32_t opReg; // ModR/M opcode or register id.
554
555 LabelEntry* label; // Label entry.
556 RelocEntry* re = nullptr; // Relocation entry.
557 int32_t relOffset; // Relative offset
558 FastUInt8 relSize = 0; // Relative size.
559 uint8_t* memOpAOMark = nullptr; // Marker that points before 'address-override prefix' is emitted.
560
561 int64_t immValue = 0; // Immediate value (must be 64-bit).
562 FastUInt8 immSize = 0; // Immediate size.
563
564 X86BufferWriter writer(this);
565
566 if (instId >= Inst::_kIdCount)
567 instId = 0;
568
569 const InstDB::InstInfo* instInfo = &InstDB::_instInfoTable[instId];
570 const InstDB::CommonInfo* commonInfo = &instInfo->commonInfo();
571
572 // Signature of the first 3 operands.
573 isign3 = o0.opType() + (o1.opType() << 3) + (o2.opType() << 6);
574
575 // Combine all instruction options and also check whether the instruction
576 // is valid. All options that require special handling (including invalid
577 // instruction) are handled by the next branch.
578 options = uint32_t(instId == 0);
579 options |= uint32_t((size_t)(_bufferEnd - writer.cursor()) < 16);
580 options |= uint32_t(instOptions() | globalInstOptions());
581
582 // Handle failure and rare cases first.
583 if (ASMJIT_UNLIKELY(options & kRequiresSpecialHandling)) {
584 if (ASMJIT_UNLIKELY(!_code))
585 return DebugUtils::errored(kErrorNotInitialized);
586
587 // Unknown instruction.
588 if (ASMJIT_UNLIKELY(instId == 0))
589 goto InvalidInstruction;
590
591 // Grow request, happens rarely.
592 err = writer.ensureSpace(this, 16);
593 if (ASMJIT_UNLIKELY(err))
594 goto Failed;
595
596 // Strict validation.
597 #ifndef ASMJIT_NO_VALIDATION
598 if (hasEmitterOption(kOptionStrictValidation)) {
599 Operand_ opArray[Globals::kMaxOpCount];
600
601 opArray[0].copyFrom(o0);
602 opArray[1].copyFrom(o1);
603 opArray[2].copyFrom(o2);
604 opArray[3].copyFrom(o3);
605
606 if (options & Inst::kOptionOp4Op5Used) {
607 opArray[4].copyFrom(_op4);
608 opArray[5].copyFrom(_op5);
609 }
610 else {
611 opArray[4].reset();
612 opArray[5].reset();
613 }
614
615 err = InstAPI::validate(archId(), BaseInst(instId, options, _extraReg), opArray, Globals::kMaxOpCount);
616 if (ASMJIT_UNLIKELY(err)) goto Failed;
617 }
618 #endif
619
620 uint32_t iFlags = instInfo->flags();
621
622 // LOCK, XACQUIRE, and XRELEASE prefixes.
623 if (options & Inst::kOptionLock) {
624 bool xAcqRel = (options & (Inst::kOptionXAcquire | Inst::kOptionXRelease)) != 0;
625
626 if (ASMJIT_UNLIKELY(!(iFlags & (InstDB::kFlagLock)) && !xAcqRel))
627 goto InvalidLockPrefix;
628
629 if (xAcqRel) {
630 if (ASMJIT_UNLIKELY((options & Inst::kOptionXAcquire) && !(iFlags & InstDB::kFlagXAcquire)))
631 goto InvalidXAcquirePrefix;
632
633 if (ASMJIT_UNLIKELY((options & Inst::kOptionXRelease) && !(iFlags & InstDB::kFlagXRelease)))
634 goto InvalidXReleasePrefix;
635
636 writer.emit8((options & Inst::kOptionXAcquire) ? 0xF2 : 0xF3);
637 }
638
639 writer.emit8(0xF0);
640 }
641
642 // REP and REPNE prefixes.
643 if (options & (Inst::kOptionRep | Inst::kOptionRepne)) {
644 if (ASMJIT_UNLIKELY(!(iFlags & InstDB::kFlagRep)))
645 goto InvalidRepPrefix;
646
647 if (_extraReg.isReg() && ASMJIT_UNLIKELY(_extraReg.group() != Reg::kGroupGp || _extraReg.id() != Gp::kIdCx))
648 goto InvalidRepPrefix;
649
650 writer.emit8((options & Inst::kOptionRepne) ? 0xF2 : 0xF3);
651 }
652 }
653
654 // This sequence seems to be the fastest.
655 opcode = InstDB::_mainOpcodeTable[instInfo->_mainOpcodeIndex];
656 opReg = opcode.extractO();
657 opcode |= instInfo->_mainOpcodeValue;
658
659 // --------------------------------------------------------------------------
660 // [Encoding Scope]
661 // --------------------------------------------------------------------------
662
663 switch (instInfo->_encoding) {
664 case InstDB::kEncodingNone:
665 goto EmitDone;
666
667 // ------------------------------------------------------------------------
668 // [X86]
669 // ------------------------------------------------------------------------
670
671 case InstDB::kEncodingX86Op:
672 goto EmitX86Op;
673
674 case InstDB::kEncodingX86Op_O_I8:
675 if (ASMJIT_UNLIKELY(isign3 != ENC_OPS1(Imm)))
676 goto InvalidInstruction;
677
678 immValue = o0.as<Imm>().u8();
679 immSize = 1;
680 ASMJIT_FALLTHROUGH;
681
682 case InstDB::kEncodingX86Op_O:
683 rbReg = 0;
684 goto EmitX86R;
685
686 case InstDB::kEncodingX86Op_xAddr:
687 if (ASMJIT_UNLIKELY(!o0.isReg()))
688 goto InvalidInstruction;
689
690 rmInfo = x86MemInfo[o0.as<Reg>().type()];
691 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
692 goto EmitX86Op;
693
694 case InstDB::kEncodingX86Op_xAX:
695 if (isign3 == 0)
696 goto EmitX86Op;
697
698 if (isign3 == ENC_OPS1(Reg) && o0.id() == Gp::kIdAx)
699 goto EmitX86Op;
700 break;
701
702 case InstDB::kEncodingX86Op_xDX_xAX:
703 if (isign3 == 0)
704 goto EmitX86Op;
705
706 if (isign3 == ENC_OPS2(Reg, Reg) && o0.id() == Gp::kIdDx && o1.id() == Gp::kIdAx)
707 goto EmitX86Op;
708 break;
709
710 case InstDB::kEncodingX86Op_MemZAX:
711 if (isign3 == 0)
712 goto EmitX86Op;
713
714 rmRel = &o0;
715 if (isign3 == ENC_OPS1(Mem) && x86IsImplicitMem(o0, Gp::kIdAx))
716 goto EmitX86OpImplicitMem;
717
718 break;
719
720 case InstDB::kEncodingX86I_xAX:
721 // Implicit form.
722 if (isign3 == ENC_OPS1(Imm)) {
723 immValue = o0.as<Imm>().u8();
724 immSize = 1;
725 goto EmitX86Op;
726 }
727
728 // Explicit form.
729 if (isign3 == ENC_OPS2(Reg, Imm) && o0.id() == Gp::kIdAx) {
730 immValue = o1.as<Imm>().u8();
731 immSize = 1;
732 goto EmitX86Op;
733 }
734 break;
735
736 case InstDB::kEncodingX86M:
737 opcode.addPrefixBySize(o0.size());
738 ASMJIT_FALLTHROUGH;
739
740 case InstDB::kEncodingX86M_NoSize:
741 rbReg = o0.id();
742 if (isign3 == ENC_OPS1(Reg))
743 goto EmitX86R;
744
745 rmRel = &o0;
746 if (isign3 == ENC_OPS1(Mem))
747 goto EmitX86M;
748 break;
749
750 case InstDB::kEncodingX86M_GPB_MulDiv:
751 CaseX86M_GPB_MulDiv:
752 // Explicit form?
753 if (isign3 > 0x7) {
754 // [AX] <- [AX] div|mul r8.
755 if (isign3 == ENC_OPS2(Reg, Reg)) {
756 if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx) || !Reg::isGpb(o1)))
757 goto InvalidInstruction;
758
759 rbReg = o1.id();
760 FIXUP_GPB(o1, rbReg);
761 goto EmitX86R;
762 }
763
764 // [AX] <- [AX] div|mul m8.
765 if (isign3 == ENC_OPS2(Reg, Mem)) {
766 if (ASMJIT_UNLIKELY(!Reg::isGpw(o0, Gp::kIdAx)))
767 goto InvalidInstruction;
768
769 rmRel = &o1;
770 goto EmitX86M;
771 }
772
773 // [?DX:?AX] <- [?DX:?AX] div|mul r16|r32|r64
774 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
775 if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
776 goto InvalidInstruction;
777
778 opcode.addArithBySize(o0.size());
779 rbReg = o2.id();
780 goto EmitX86R;
781 }
782
783 // [?DX:?AX] <- [?DX:?AX] div|mul m16|m32|m64
784 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
785 if (ASMJIT_UNLIKELY(o0.size() != o1.size()))
786 goto InvalidInstruction;
787
788 opcode.addArithBySize(o0.size());
789 rmRel = &o2;
790 goto EmitX86M;
791 }
792
793 goto InvalidInstruction;
794 }
795
796 ASMJIT_FALLTHROUGH;
797
798 case InstDB::kEncodingX86M_GPB:
799 if (isign3 == ENC_OPS1(Reg)) {
800 opcode.addArithBySize(o0.size());
801 rbReg = o0.id();
802
803 if (o0.size() != 1)
804 goto EmitX86R;
805
806 FIXUP_GPB(o0, rbReg);
807 goto EmitX86R;
808 }
809
810 if (isign3 == ENC_OPS1(Mem)) {
811 if (ASMJIT_UNLIKELY(o0.size() == 0))
812 goto AmbiguousOperandSize;
813
814 opcode.addArithBySize(o0.size());
815 rmRel = &o0;
816 goto EmitX86M;
817 }
818 break;
819
820 case InstDB::kEncodingX86M_Only:
821 if (isign3 == ENC_OPS1(Mem)) {
822 rmRel = &o0;
823 goto EmitX86M;
824 }
825 break;
826
827 case InstDB::kEncodingX86M_Nop:
828 if (isign3 == ENC_OPS1(None))
829 goto EmitX86Op;
830
831 // Multi-byte NOP instruction "0F 1F /0".
832 opcode = Opcode::k000F00 | 0x1F;
833 opReg = 0;
834
835 if (isign3 == ENC_OPS1(Reg)) {
836 opcode.add66hBySize(o0.size());
837 rbReg = o0.id();
838 goto EmitX86R;
839 }
840
841 if (isign3 == ENC_OPS1(Mem)) {
842 opcode.add66hBySize(o0.size());
843 rmRel = &o0;
844 goto EmitX86M;
845 }
846 break;
847
848 case InstDB::kEncodingX86R_Native:
849 if (isign3 == ENC_OPS1(Reg)) {
850 rbReg = o0.id();
851 goto EmitX86R;
852 }
853 break;
854
855 case InstDB::kEncodingX86Rm:
856 opcode.addPrefixBySize(o0.size());
857 ASMJIT_FALLTHROUGH;
858
859 case InstDB::kEncodingX86Rm_NoSize:
860 if (isign3 == ENC_OPS2(Reg, Reg)) {
861 opReg = o0.id();
862 rbReg = o1.id();
863 goto EmitX86R;
864 }
865
866 if (isign3 == ENC_OPS2(Reg, Mem)) {
867 opReg = o0.id();
868 rmRel = &o1;
869 goto EmitX86M;
870 }
871 break;
872
873 case InstDB::kEncodingX86Rm_Raw66H:
874 // We normally emit either [66|F2|F3], this instruction requires 66+[F2|F3].
875 if (isign3 == ENC_OPS2(Reg, Reg)) {
876 opReg = o0.id();
877 rbReg = o1.id();
878
879 if (o0.size() == 2)
880 writer.emit8(0x66);
881 else
882 opcode.addWBySize(o0.size());
883 goto EmitX86R;
884 }
885
886 if (isign3 == ENC_OPS2(Reg, Mem)) {
887 opReg = o0.id();
888 rmRel = &o1;
889
890 if (o0.size() == 2)
891 writer.emit8(0x66);
892 else
893 opcode.addWBySize(o0.size());
894 goto EmitX86M;
895 }
896 break;
897
898 case InstDB::kEncodingX86Mr:
899 opcode.addPrefixBySize(o0.size());
900 ASMJIT_FALLTHROUGH;
901
902 case InstDB::kEncodingX86Mr_NoSize:
903 if (isign3 == ENC_OPS2(Reg, Reg)) {
904 rbReg = o0.id();
905 opReg = o1.id();
906 goto EmitX86R;
907 }
908
909 if (isign3 == ENC_OPS2(Mem, Reg)) {
910 rmRel = &o0;
911 opReg = o1.id();
912 goto EmitX86M;
913 }
914 break;
915
916 case InstDB::kEncodingX86Arith:
917 if (isign3 == ENC_OPS2(Reg, Reg)) {
918 opcode += 2;
919 opcode.addArithBySize(o0.size());
920
921 if (o0.size() != o1.size())
922 goto OperandSizeMismatch;
923
924 opReg = o0.id();
925 rbReg = o1.id();
926
927 if (o0.size() == 1) {
928 FIXUP_GPB(o0, opReg);
929 FIXUP_GPB(o1, rbReg);
930
931 if (!(options & Inst::kOptionModMR))
932 goto EmitX86R;
933
934 opcode -= 2;
935 std::swap(opReg, rbReg);
936 goto EmitX86R;
937 }
938 else {
939 if (!(options & Inst::kOptionModMR))
940 goto EmitX86R;
941
942 opcode -= 2;
943 std::swap(opReg, rbReg);
944 goto EmitX86R;
945 }
946 }
947
948 if (isign3 == ENC_OPS2(Reg, Mem)) {
949 opcode += 2;
950 opcode.addArithBySize(o0.size());
951
952 opReg = o0.id();
953 rmRel = &o1;
954
955 if (o0.size() != 1)
956 goto EmitX86M;
957
958 FIXUP_GPB(o0, opReg);
959 goto EmitX86M;
960 }
961
962 if (isign3 == ENC_OPS2(Mem, Reg)) {
963 opcode.addArithBySize(o1.size());
964 opReg = o1.id();
965 rmRel = &o0;
966
967 if (o1.size() != 1)
968 goto EmitX86M;
969
970 FIXUP_GPB(o1, opReg);
971 goto EmitX86M;
972 }
973
974 // The remaining instructions use 0x80 opcode.
975 opcode = 0x80;
976
977 if (isign3 == ENC_OPS2(Reg, Imm)) {
978 uint32_t size = o0.size();
979
980 rbReg = o0.id();
981 immValue = o1.as<Imm>().i64();
982
983 if (size == 1) {
984 FIXUP_GPB(o0, rbReg);
985 immSize = 1;
986 }
987 else {
988 if (size == 2) {
989 opcode |= Opcode::kPP_66;
990 }
991 else if (size == 4) {
992 // Sign extend so isInt8 returns the right result.
993 immValue = x86SignExtendI32<int64_t>(immValue);
994 }
995 else if (size == 8) {
996 bool canTransformTo32Bit = instId == Inst::kIdAnd && Support::isUInt32(immValue);
997
998 if (!Support::isInt32(immValue)) {
999 // We would do this by default when `kOptionOptimizedForSize` is
1000 // enabled, however, in this case we just force this as otherwise
1001 // we would have to fail.
1002 if (canTransformTo32Bit)
1003 size = 4;
1004 else
1005 goto InvalidImmediate;
1006 }
1007 else if (canTransformTo32Bit && hasEmitterOption(kOptionOptimizedForSize)) {
1008 // This is a code-size optimization.
1009 size = 4;
1010 }
1011
1012 opcode.addWBySize(size);
1013 }
1014
1015 immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1016 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1017 immSize = 1;
1018 }
1019
1020 // Short form - AL, AX, EAX, RAX.
1021 if (rbReg == 0 && (size == 1 || immSize != 1) && !(options & Inst::kOptionLongForm)) {
1022 opcode &= Opcode::kPP_66 | Opcode::kW;
1023 opcode |= ((opReg << 3) | (0x04 + (size != 1)));
1024 immSize = FastUInt8(Support::min<uint32_t>(size, 4));
1025 goto EmitX86Op;
1026 }
1027
1028 opcode += size != 1 ? (immSize != 1 ? 1 : 3) : 0;
1029 goto EmitX86R;
1030 }
1031
1032 if (isign3 == ENC_OPS2(Mem, Imm)) {
1033 uint32_t memSize = o0.size();
1034
1035 if (ASMJIT_UNLIKELY(memSize == 0))
1036 goto AmbiguousOperandSize;
1037
1038 immValue = o1.as<Imm>().i64();
1039 immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1040
1041 // Sign extend so isInt8 returns the right result.
1042 if (memSize == 4)
1043 immValue = x86SignExtendI32<int64_t>(immValue);
1044
1045 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1046 immSize = 1;
1047
1048 opcode += memSize != 1 ? (immSize != 1 ? 1 : 3) : 0;
1049 opcode.addPrefixBySize(memSize);
1050
1051 rmRel = &o0;
1052 goto EmitX86M;
1053 }
1054 break;
1055
1056 case InstDB::kEncodingX86Bswap:
1057 if (isign3 == ENC_OPS1(Reg)) {
1058 if (ASMJIT_UNLIKELY(o0.size() == 1))
1059 goto InvalidInstruction;
1060
1061 opReg = o0.id();
1062 opcode.addPrefixBySize(o0.size());
1063 goto EmitX86OpReg;
1064 }
1065 break;
1066
1067 case InstDB::kEncodingX86Bt:
1068 if (isign3 == ENC_OPS2(Reg, Reg)) {
1069 opcode.addPrefixBySize(o1.size());
1070 opReg = o1.id();
1071 rbReg = o0.id();
1072 goto EmitX86R;
1073 }
1074
1075 if (isign3 == ENC_OPS2(Mem, Reg)) {
1076 opcode.addPrefixBySize(o1.size());
1077 opReg = o1.id();
1078 rmRel = &o0;
1079 goto EmitX86M;
1080 }
1081
1082 // The remaining instructions use the secondary opcode/r.
1083 immValue = o1.as<Imm>().i64();
1084 immSize = 1;
1085
1086 opcode = x86AltOpcodeOf(instInfo);
1087 opcode.addPrefixBySize(o0.size());
1088 opReg = opcode.extractO();
1089
1090 if (isign3 == ENC_OPS2(Reg, Imm)) {
1091 rbReg = o0.id();
1092 goto EmitX86R;
1093 }
1094
1095 if (isign3 == ENC_OPS2(Mem, Imm)) {
1096 if (ASMJIT_UNLIKELY(o0.size() == 0))
1097 goto AmbiguousOperandSize;
1098
1099 rmRel = &o0;
1100 goto EmitX86M;
1101 }
1102 break;
1103
1104 case InstDB::kEncodingX86Call:
1105 if (isign3 == ENC_OPS1(Reg)) {
1106 rbReg = o0.id();
1107 goto EmitX86R;
1108 }
1109
1110 rmRel = &o0;
1111 if (isign3 == ENC_OPS1(Mem))
1112 goto EmitX86M;
1113
1114 // Call with 32-bit displacement use 0xE8 opcode. Call with 8-bit
1115 // displacement is not encodable so the alternative opcode field
1116 // in X86DB must be zero.
1117 opcode = 0xE8;
1118 opReg = 0;
1119 goto EmitJmpCall;
1120
1121 case InstDB::kEncodingX86Cmpxchg: {
1122 // Convert explicit to implicit.
1123 if (isign3 & (0x7 << 6)) {
1124 if (!Reg::isGp(o2) || o2.id() != Gp::kIdAx)
1125 goto InvalidInstruction;
1126 isign3 &= 0x3F;
1127 }
1128
1129 if (isign3 == ENC_OPS2(Reg, Reg)) {
1130 if (o0.size() != o1.size())
1131 goto OperandSizeMismatch;
1132
1133 opcode.addArithBySize(o0.size());
1134 rbReg = o0.id();
1135 opReg = o1.id();
1136
1137 if (o0.size() != 1)
1138 goto EmitX86R;
1139
1140 FIXUP_GPB(o0, rbReg);
1141 FIXUP_GPB(o1, opReg);
1142 goto EmitX86R;
1143 }
1144
1145 if (isign3 == ENC_OPS2(Mem, Reg)) {
1146 opcode.addArithBySize(o1.size());
1147 opReg = o1.id();
1148 rmRel = &o0;
1149
1150 if (o1.size() != 1)
1151 goto EmitX86M;
1152
1153 FIXUP_GPB(o0, opReg);
1154 goto EmitX86M;
1155 }
1156 break;
1157 }
1158
1159 case InstDB::kEncodingX86Cmpxchg8b_16b: {
1160 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
1161 if (o3.isReg() && _op4.isReg()) {
1162 rmRel = &o0;
1163 goto EmitX86M;
1164 }
1165 }
1166
1167 if (isign3 == ENC_OPS1(Mem)) {
1168 rmRel = &o0;
1169 goto EmitX86M;
1170 }
1171 break;
1172 }
1173
1174 case InstDB::kEncodingX86Crc:
1175 opReg = o0.id();
1176 opcode.addWBySize(o0.size());
1177
1178 if (isign3 == ENC_OPS2(Reg, Reg)) {
1179 rbReg = o1.id();
1180
1181 if (o1.size() == 1) {
1182 FIXUP_GPB(o1, rbReg);
1183 goto EmitX86R;
1184 }
1185 else {
1186 // This seems to be the only exception of encoding '66F2' prefix.
1187 if (o1.size() == 2) writer.emit8(0x66);
1188
1189 opcode.add(1);
1190 goto EmitX86R;
1191 }
1192 }
1193
1194 if (isign3 == ENC_OPS2(Reg, Mem)) {
1195 rmRel = &o1;
1196 if (o1.size() == 0)
1197 goto AmbiguousOperandSize;
1198
1199 // This seems to be the only exception of encoding '66F2' prefix.
1200 if (o1.size() == 2) writer.emit8(0x66);
1201
1202 opcode += o1.size() != 1;
1203 goto EmitX86M;
1204 }
1205 break;
1206
1207 case InstDB::kEncodingX86Enter:
1208 if (isign3 == ENC_OPS2(Imm, Imm)) {
1209 uint32_t iw = o0.as<Imm>().u16();
1210 uint32_t ib = o1.as<Imm>().u8();
1211
1212 immValue = iw | (ib << 16);
1213 immSize = 3;
1214 goto EmitX86Op;
1215 }
1216 break;
1217
1218 case InstDB::kEncodingX86Imul:
1219 // First process all forms distinct of `kEncodingX86M_OptB_MulDiv`.
1220 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
1221 opcode = 0x6B;
1222 opcode.addPrefixBySize(o0.size());
1223
1224 immValue = o2.as<Imm>().i64();
1225 immSize = 1;
1226
1227 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1228 opcode -= 2;
1229 immSize = o0.size() == 2 ? 2 : 4;
1230 }
1231
1232 opReg = o0.id();
1233 rbReg = o1.id();
1234
1235 goto EmitX86R;
1236 }
1237
1238 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
1239 opcode = 0x6B;
1240 opcode.addPrefixBySize(o0.size());
1241
1242 immValue = o2.as<Imm>().i64();
1243 immSize = 1;
1244
1245 // Sign extend so isInt8 returns the right result.
1246 if (o0.size() == 4)
1247 immValue = x86SignExtendI32<int64_t>(immValue);
1248
1249 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1250 opcode -= 2;
1251 immSize = o0.size() == 2 ? 2 : 4;
1252 }
1253
1254 opReg = o0.id();
1255 rmRel = &o1;
1256
1257 goto EmitX86M;
1258 }
1259
1260 if (isign3 == ENC_OPS2(Reg, Reg)) {
1261 // Must be explicit 'ax, r8' form.
1262 if (o1.size() == 1)
1263 goto CaseX86M_GPB_MulDiv;
1264
1265 if (o0.size() != o1.size())
1266 goto OperandSizeMismatch;
1267
1268 opReg = o0.id();
1269 rbReg = o1.id();
1270
1271 opcode = Opcode::k000F00 | 0xAF;
1272 opcode.addPrefixBySize(o0.size());
1273 goto EmitX86R;
1274 }
1275
1276 if (isign3 == ENC_OPS2(Reg, Mem)) {
1277 // Must be explicit 'ax, m8' form.
1278 if (o1.size() == 1)
1279 goto CaseX86M_GPB_MulDiv;
1280
1281 opReg = o0.id();
1282 rmRel = &o1;
1283
1284 opcode = Opcode::k000F00 | 0xAF;
1285 opcode.addPrefixBySize(o0.size());
1286 goto EmitX86M;
1287 }
1288
1289 // Shorthand to imul 'reg, reg, imm'.
1290 if (isign3 == ENC_OPS2(Reg, Imm)) {
1291 opcode = 0x6B;
1292 opcode.addPrefixBySize(o0.size());
1293
1294 immValue = o1.as<Imm>().i64();
1295 immSize = 1;
1296
1297 // Sign extend so isInt8 returns the right result.
1298 if (o0.size() == 4)
1299 immValue = x86SignExtendI32<int64_t>(immValue);
1300
1301 if (!Support::isInt8(immValue) || (options & Inst::kOptionLongForm)) {
1302 opcode -= 2;
1303 immSize = o0.size() == 2 ? 2 : 4;
1304 }
1305
1306 opReg = rbReg = o0.id();
1307 goto EmitX86R;
1308 }
1309
1310 // Try implicit form.
1311 goto CaseX86M_GPB_MulDiv;
1312
1313 case InstDB::kEncodingX86In:
1314 if (isign3 == ENC_OPS2(Reg, Imm)) {
1315 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
1316 goto InvalidInstruction;
1317
1318 immValue = o1.as<Imm>().u8();
1319 immSize = 1;
1320
1321 opcode = x86AltOpcodeOf(instInfo) + (o0.size() != 1);
1322 opcode.add66hBySize(o0.size());
1323 goto EmitX86Op;
1324 }
1325
1326 if (isign3 == ENC_OPS2(Reg, Reg)) {
1327 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx || o1.id() != Gp::kIdDx))
1328 goto InvalidInstruction;
1329
1330 opcode += o0.size() != 1;
1331 opcode.add66hBySize(o0.size());
1332 goto EmitX86Op;
1333 }
1334 break;
1335
1336 case InstDB::kEncodingX86Ins:
1337 if (isign3 == ENC_OPS2(Mem, Reg)) {
1338 if (ASMJIT_UNLIKELY(!x86IsImplicitMem(o0, Gp::kIdDi) || o1.id() != Gp::kIdDx))
1339 goto InvalidInstruction;
1340
1341 uint32_t size = o0.size();
1342 if (ASMJIT_UNLIKELY(size == 0))
1343 goto AmbiguousOperandSize;
1344
1345 rmRel = &o0;
1346 opcode += (size != 1);
1347
1348 opcode.add66hBySize(size);
1349 goto EmitX86OpImplicitMem;
1350 }
1351 break;
1352
1353 case InstDB::kEncodingX86IncDec:
1354 if (isign3 == ENC_OPS1(Reg)) {
1355 rbReg = o0.id();
1356
1357 if (o0.size() == 1) {
1358 FIXUP_GPB(o0, rbReg);
1359 goto EmitX86R;
1360 }
1361
1362 if (is32Bit()) {
1363 // INC r16|r32 is only encodable in 32-bit mode (collides with REX).
1364 opcode = x86AltOpcodeOf(instInfo) + (rbReg & 0x07);
1365 opcode.add66hBySize(o0.size());
1366 goto EmitX86Op;
1367 }
1368 else {
1369 opcode.addArithBySize(o0.size());
1370 goto EmitX86R;
1371 }
1372 }
1373
1374 if (isign3 == ENC_OPS1(Mem)) {
1375 opcode.addArithBySize(o0.size());
1376 rmRel = &o0;
1377 goto EmitX86M;
1378 }
1379 break;
1380
1381 case InstDB::kEncodingX86Int:
1382 if (isign3 == ENC_OPS1(Imm)) {
1383 immValue = o0.as<Imm>().i64();
1384 immSize = 1;
1385 goto EmitX86Op;
1386 }
1387 break;
1388
1389 case InstDB::kEncodingX86Jcc:
1390 if (_emitterOptions & kOptionPredictedJumps) {
1391 if (options & Inst::kOptionTaken)
1392 writer.emit8(0x3E);
1393 if (options & Inst::kOptionNotTaken)
1394 writer.emit8(0x2E);
1395 }
1396
1397 rmRel = &o0;
1398 opReg = 0;
1399 goto EmitJmpCall;
1400
1401 case InstDB::kEncodingX86JecxzLoop:
1402 rmRel = &o0;
1403 // Explicit jecxz|loop [r|e]cx, dst
1404 if (o0.isReg()) {
1405 if (ASMJIT_UNLIKELY(!Reg::isGp(o0, Gp::kIdCx)))
1406 goto InvalidInstruction;
1407
1408 writer.emitAddressOverride((is32Bit() && o0.size() == 2) || (is64Bit() && o0.size() == 4));
1409 rmRel = &o1;
1410 }
1411
1412 opReg = 0;
1413 goto EmitJmpCall;
1414
1415 case InstDB::kEncodingX86Jmp:
1416 if (isign3 == ENC_OPS1(Reg)) {
1417 rbReg = o0.id();
1418 goto EmitX86R;
1419 }
1420
1421 rmRel = &o0;
1422 if (isign3 == ENC_OPS1(Mem))
1423 goto EmitX86M;
1424
1425 // Jump encoded with 32-bit displacement use 0xE9 opcode. Jump encoded
1426 // with 8-bit displacement's opcode is stored as an alternative opcode.
1427 opcode = 0xE9;
1428 opReg = 0;
1429 goto EmitJmpCall;
1430
1431 case InstDB::kEncodingX86JmpRel:
1432 rmRel = &o0;
1433 goto EmitJmpCall;
1434
1435 case InstDB::kEncodingX86Lea:
1436 if (isign3 == ENC_OPS2(Reg, Mem)) {
1437 opcode.addPrefixBySize(o0.size());
1438 opReg = o0.id();
1439 rmRel = &o1;
1440 goto EmitX86M;
1441 }
1442 break;
1443
1444 case InstDB::kEncodingX86Mov:
1445 // Reg <- Reg
1446 if (isign3 == ENC_OPS2(Reg, Reg)) {
1447 opReg = o0.id();
1448 rbReg = o1.id();
1449
1450 // Asmjit uses segment registers indexed from 1 to 6, leaving zero as
1451 // "no segment register used". We have to fix this (decrement the index
1452 // of the register) when emitting MOV instructions which move to/from
1453 // a segment register. The segment register is always `opReg`, because
1454 // the MOV instruction uses either RM or MR encoding.
1455
1456 // GP <- ??
1457 if (Reg::isGp(o0)) {
1458 // GP <- GP
1459 if (Reg::isGp(o1)) {
1460 uint32_t size0 = o0.size();
1461 uint32_t size1 = o1.size();
1462
1463 if (size0 != size1) {
1464 // We allow 'mov r64, r32' as it's basically zero-extend.
1465 if (size0 == 8 && size1 == 4)
1466 size0 = 4; // Zero extend, don't promote to 64-bit.
1467 else
1468 goto InvalidInstruction;
1469 }
1470
1471 if (size0 == 1) {
1472 FIXUP_GPB(o0, opReg);
1473 FIXUP_GPB(o1, rbReg);
1474 opcode = 0x8A;
1475
1476 if (!(options & Inst::kOptionModMR))
1477 goto EmitX86R;
1478
1479 opcode -= 2;
1480 std::swap(opReg, rbReg);
1481 goto EmitX86R;
1482 }
1483 else {
1484 opcode = 0x8B;
1485 opcode.addPrefixBySize(size0);
1486
1487 if (!(options & Inst::kOptionModMR))
1488 goto EmitX86R;
1489
1490 opcode -= 2;
1491 std::swap(opReg, rbReg);
1492 goto EmitX86R;
1493 }
1494 }
1495
1496 opReg = rbReg;
1497 rbReg = o0.id();
1498
1499 // GP <- SReg
1500 if (Reg::isSReg(o1)) {
1501 opcode = 0x8C;
1502 opcode.addPrefixBySize(o0.size());
1503 opReg--;
1504 goto EmitX86R;
1505 }
1506
1507 // GP <- CReg
1508 if (Reg::isCReg(o1)) {
1509 opcode = Opcode::k000F00 | 0x20;
1510
1511 // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1512 if ((opReg & 0x8) && is32Bit()) {
1513 writer.emit8(0xF0);
1514 opReg &= 0x7;
1515 }
1516 goto EmitX86R;
1517 }
1518
1519 // GP <- DReg
1520 if (Reg::isDReg(o1)) {
1521 opcode = Opcode::k000F00 | 0x21;
1522 goto EmitX86R;
1523 }
1524 }
1525 else {
1526 // ?? <- GP
1527 if (!Reg::isGp(o1))
1528 goto InvalidInstruction;
1529
1530 // SReg <- GP
1531 if (Reg::isSReg(o0)) {
1532 opcode = 0x8E;
1533 opcode.addPrefixBySize(o1.size());
1534 opReg--;
1535 goto EmitX86R;
1536 }
1537
1538 // CReg <- GP
1539 if (Reg::isCReg(o0)) {
1540 opcode = Opcode::k000F00 | 0x22;
1541
1542 // Use `LOCK MOV` in 32-bit mode if CR8+ register is accessed (AMD extension).
1543 if ((opReg & 0x8) && is32Bit()) {
1544 writer.emit8(0xF0);
1545 opReg &= 0x7;
1546 }
1547 goto EmitX86R;
1548 }
1549
1550 // DReg <- GP
1551 if (Reg::isDReg(o0)) {
1552 opcode = Opcode::k000F00 | 0x23;
1553 goto EmitX86R;
1554 }
1555 }
1556
1557 goto InvalidInstruction;
1558 }
1559
1560 if (isign3 == ENC_OPS2(Reg, Mem)) {
1561 opReg = o0.id();
1562 rmRel = &o1;
1563
1564 // SReg <- Mem
1565 if (Reg::isSReg(o0)) {
1566 opcode = 0x8E;
1567 opcode.addPrefixBySize(o1.size());
1568 opReg--;
1569 goto EmitX86M;
1570 }
1571 // Reg <- Mem
1572 else {
1573 opcode = 0;
1574 opcode.addArithBySize(o0.size());
1575
1576 if (o0.size() == 1)
1577 FIXUP_GPB(o0, opReg);
1578
1579 // Handle a special form of `mov al|ax|eax|rax, [ptr64]` that doesn't use MOD.
1580 if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1581 immValue = rmRel->as<Mem>().offset();
1582 if (x86GetMovAbsAddrType(this, writer, o0.size(), options, rmRel->as<Mem>()) == BaseMem::kAddrTypeAbs) {
1583 opcode += 0xA0;
1584 goto EmitX86OpMovAbs;
1585 }
1586 }
1587
1588 opcode += 0x8A;
1589 goto EmitX86M;
1590 }
1591 }
1592
1593 if (isign3 == ENC_OPS2(Mem, Reg)) {
1594 opReg = o1.id();
1595 rmRel = &o0;
1596
1597 // Mem <- SReg
1598 if (Reg::isSReg(o1)) {
1599 opcode = 0x8C;
1600 opcode.addPrefixBySize(o0.size());
1601 opReg--;
1602 goto EmitX86M;
1603 }
1604 // Mem <- Reg
1605 else {
1606 opcode = 0;
1607 opcode.addArithBySize(o1.size());
1608
1609 if (o1.size() == 1)
1610 FIXUP_GPB(o1, opReg);
1611
1612 // Handle a special form of `mov [ptr64], al|ax|eax|rax` that doesn't use MOD.
1613 if (opReg == Gp::kIdAx && !rmRel->as<Mem>().hasBaseOrIndex()) {
1614 immValue = rmRel->as<Mem>().offset();
1615 if (x86GetMovAbsAddrType(this, writer, o1.size(), options, rmRel->as<Mem>()) == BaseMem::kAddrTypeAbs) {
1616 opcode += 0xA2;
1617 goto EmitX86OpMovAbs;
1618 }
1619 }
1620
1621 opcode += 0x88;
1622 goto EmitX86M;
1623 }
1624 }
1625
1626 if (isign3 == ENC_OPS2(Reg, Imm)) {
1627 opReg = o0.id();
1628 immSize = FastUInt8(o0.size());
1629
1630 if (immSize == 1) {
1631 FIXUP_GPB(o0, opReg);
1632
1633 opcode = 0xB0;
1634 immValue = o1.as<Imm>().u8();
1635 goto EmitX86OpReg;
1636 }
1637 else {
1638 // 64-bit immediate in 64-bit mode is allowed.
1639 immValue = o1.as<Imm>().i64();
1640
1641 // Optimize the instruction size by using a 32-bit immediate if possible.
1642 if (immSize == 8 && !(options & Inst::kOptionLongForm)) {
1643 if (Support::isUInt32(immValue) && hasEmitterOption(kOptionOptimizedForSize)) {
1644 // Zero-extend by using a 32-bit GPD destination instead of a 64-bit GPQ.
1645 immSize = 4;
1646 }
1647 else if (Support::isInt32(immValue)) {
1648 // Sign-extend, uses 'C7 /0' opcode.
1649 rbReg = opReg;
1650
1651 opcode = Opcode::kW | 0xC7;
1652 opReg = 0;
1653
1654 immSize = 4;
1655 goto EmitX86R;
1656 }
1657 }
1658
1659 opcode = 0xB8;
1660 opcode.addPrefixBySize(immSize);
1661 goto EmitX86OpReg;
1662 }
1663 }
1664
1665 if (isign3 == ENC_OPS2(Mem, Imm)) {
1666 uint32_t memSize = o0.size();
1667 if (ASMJIT_UNLIKELY(memSize == 0))
1668 goto AmbiguousOperandSize;
1669
1670 opcode = 0xC6 + (memSize != 1);
1671 opcode.addPrefixBySize(memSize);
1672 opReg = 0;
1673 rmRel = &o0;
1674
1675 immValue = o1.as<Imm>().i64();
1676 immSize = FastUInt8(Support::min<uint32_t>(memSize, 4));
1677 goto EmitX86M;
1678 }
1679 break;
1680
1681 case InstDB::kEncodingX86MovsxMovzx:
1682 opcode.add(o1.size() != 1);
1683 opcode.addPrefixBySize(o0.size());
1684
1685 if (isign3 == ENC_OPS2(Reg, Reg)) {
1686 opReg = o0.id();
1687 rbReg = o1.id();
1688
1689 if (o1.size() != 1)
1690 goto EmitX86R;
1691
1692 FIXUP_GPB(o1, rbReg);
1693 goto EmitX86R;
1694 }
1695
1696 if (isign3 == ENC_OPS2(Reg, Mem)) {
1697 opReg = o0.id();
1698 rmRel = &o1;
1699 goto EmitX86M;
1700 }
1701 break;
1702
1703 case InstDB::kEncodingX86MovntiMovdiri:
1704 if (isign3 == ENC_OPS2(Mem, Reg)) {
1705 opcode.addWIf(Reg::isGpq(o1));
1706
1707 opReg = o1.id();
1708 rmRel = &o0;
1709 goto EmitX86M;
1710 }
1711 break;
1712
1713 case InstDB::kEncodingX86EnqcmdMovdir64b:
1714 if (isign3 == ENC_OPS2(Mem, Mem)) {
1715 const Mem& m0 = o0.as<Mem>();
1716 // This is the only required validation, the rest is handled afterwards.
1717 if (ASMJIT_UNLIKELY(m0.baseType() != o1.as<Mem>().baseType() ||
1718 m0.hasIndex() ||
1719 m0.hasOffset() ||
1720 (m0.hasSegment() && m0.segmentId() != SReg::kIdEs)))
1721 goto InvalidInstruction;
1722
1723 // The first memory operand is passed via register, the second memory operand is RM.
1724 opReg = o0.as<Mem>().baseId();
1725 rmRel = &o1;
1726 goto EmitX86M;
1727 }
1728 break;
1729
1730 case InstDB::kEncodingX86Out:
1731 if (isign3 == ENC_OPS2(Imm, Reg)) {
1732 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdAx))
1733 goto InvalidInstruction;
1734
1735 opcode = x86AltOpcodeOf(instInfo) + (o1.size() != 1);
1736 opcode.add66hBySize(o1.size());
1737
1738 immValue = o0.as<Imm>().u8();
1739 immSize = 1;
1740 goto EmitX86Op;
1741 }
1742
1743 if (isign3 == ENC_OPS2(Reg, Reg)) {
1744 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || o1.id() != Gp::kIdAx))
1745 goto InvalidInstruction;
1746
1747 opcode.add(o1.size() != 1);
1748 opcode.add66hBySize(o1.size());
1749 goto EmitX86Op;
1750 }
1751 break;
1752
1753 case InstDB::kEncodingX86Outs:
1754 if (isign3 == ENC_OPS2(Reg, Mem)) {
1755 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdDx || !x86IsImplicitMem(o1, Gp::kIdSi)))
1756 goto InvalidInstruction;
1757
1758 uint32_t size = o1.size();
1759 if (ASMJIT_UNLIKELY(size == 0))
1760 goto AmbiguousOperandSize;
1761
1762 rmRel = &o1;
1763 opcode.add(size != 1);
1764 opcode.add66hBySize(size);
1765 goto EmitX86OpImplicitMem;
1766 }
1767 break;
1768
1769 case InstDB::kEncodingX86Push:
1770 if (isign3 == ENC_OPS1(Reg)) {
1771 if (Reg::isSReg(o0)) {
1772 uint32_t segment = o0.id();
1773 if (ASMJIT_UNLIKELY(segment >= SReg::kIdCount))
1774 goto InvalidSegment;
1775
1776 opcode = x86OpcodePushSReg[segment];
1777 goto EmitX86Op;
1778 }
1779 else {
1780 goto CaseX86PushPop_Gp;
1781 }
1782 }
1783
1784 if (isign3 == ENC_OPS1(Imm)) {
1785 immValue = o0.as<Imm>().i64();
1786 immSize = 4;
1787
1788 if (Support::isInt8(immValue) && !(options & Inst::kOptionLongForm))
1789 immSize = 1;
1790
1791 opcode = immSize == 1 ? 0x6A : 0x68;
1792 goto EmitX86Op;
1793 }
1794 ASMJIT_FALLTHROUGH;
1795
1796 case InstDB::kEncodingX86Pop:
1797 if (isign3 == ENC_OPS1(Reg)) {
1798 if (Reg::isSReg(o0)) {
1799 uint32_t segment = o0.id();
1800 if (ASMJIT_UNLIKELY(segment == SReg::kIdCs || segment >= SReg::kIdCount))
1801 goto InvalidSegment;
1802
1803 opcode = x86OpcodePopSReg[segment];
1804 goto EmitDone;
1805 }
1806 else {
1807 CaseX86PushPop_Gp:
1808 // We allow 2 byte, 4 byte, and 8 byte register sizes, although PUSH
1809 // and POP only allow 2 bytes or native size. On 64-bit we simply
1810 // PUSH/POP 64-bit register even if 32-bit register was given.
1811 if (ASMJIT_UNLIKELY(o0.size() < 2))
1812 goto InvalidInstruction;
1813
1814 opcode = x86AltOpcodeOf(instInfo);
1815 opcode.add66hBySize(o0.size());
1816 opReg = o0.id();
1817 goto EmitX86OpReg;
1818 }
1819 }
1820
1821 if (isign3 == ENC_OPS1(Mem)) {
1822 if (ASMJIT_UNLIKELY(o0.size() == 0))
1823 goto AmbiguousOperandSize;
1824
1825 if (ASMJIT_UNLIKELY(o0.size() != 2 && o0.size() != gpSize()))
1826 goto InvalidInstruction;
1827
1828 opcode.add66hBySize(o0.size());
1829 rmRel = &o0;
1830 goto EmitX86M;
1831 }
1832 break;
1833
1834 case InstDB::kEncodingX86Ret:
1835 if (isign3 == 0) {
1836 // 'ret' without immediate, change C2 to C3.
1837 opcode.add(1);
1838 goto EmitX86Op;
1839 }
1840
1841 if (isign3 == ENC_OPS1(Imm)) {
1842 immValue = o0.as<Imm>().i64();
1843 if (immValue == 0 && !(options & Inst::kOptionLongForm)) {
1844 // 'ret' without immediate, change C2 to C3.
1845 opcode.add(1);
1846 goto EmitX86Op;
1847 }
1848 else {
1849 immSize = 2;
1850 goto EmitX86Op;
1851 }
1852 }
1853 break;
1854
1855 case InstDB::kEncodingX86Rot:
1856 if (o0.isReg()) {
1857 opcode.addArithBySize(o0.size());
1858 rbReg = o0.id();
1859
1860 if (o0.size() == 1)
1861 FIXUP_GPB(o0, rbReg);
1862
1863 if (isign3 == ENC_OPS2(Reg, Reg)) {
1864 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
1865 goto InvalidInstruction;
1866
1867 opcode += 2;
1868 goto EmitX86R;
1869 }
1870
1871 if (isign3 == ENC_OPS2(Reg, Imm)) {
1872 immValue = o1.as<Imm>().i64() & 0xFF;
1873 immSize = 0;
1874
1875 if (immValue == 1 && !(options & Inst::kOptionLongForm))
1876 goto EmitX86R;
1877
1878 opcode -= 0x10;
1879 immSize = 1;
1880 goto EmitX86R;
1881 }
1882 }
1883 else {
1884 opcode.addArithBySize(o0.size());
1885
1886 if (isign3 == ENC_OPS2(Mem, Reg)) {
1887 if (ASMJIT_UNLIKELY(o1.id() != Gp::kIdCx))
1888 goto InvalidInstruction;
1889
1890 opcode += 2;
1891 rmRel = &o0;
1892 goto EmitX86M;
1893 }
1894
1895 if (isign3 == ENC_OPS2(Mem, Imm)) {
1896 if (ASMJIT_UNLIKELY(o0.size() == 0))
1897 goto AmbiguousOperandSize;
1898
1899 rmRel = &o0;
1900 immValue = o1.as<Imm>().i64() & 0xFF;
1901 immSize = 0;
1902
1903 if (immValue == 1 && !(options & Inst::kOptionLongForm))
1904 goto EmitX86M;
1905
1906 opcode -= 0x10;
1907 immSize = 1;
1908 goto EmitX86M;
1909 }
1910 }
1911 break;
1912
1913 case InstDB::kEncodingX86Set:
1914 if (isign3 == ENC_OPS1(Reg)) {
1915 rbReg = o0.id();
1916 FIXUP_GPB(o0, rbReg);
1917 goto EmitX86R;
1918 }
1919
1920 if (isign3 == ENC_OPS1(Mem)) {
1921 rmRel = &o0;
1922 goto EmitX86M;
1923 }
1924 break;
1925
1926 case InstDB::kEncodingX86ShldShrd:
1927 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
1928 opcode.addPrefixBySize(o0.size());
1929 opReg = o1.id();
1930 rbReg = o0.id();
1931
1932 immValue = o2.as<Imm>().i64();
1933 immSize = 1;
1934 goto EmitX86R;
1935 }
1936
1937 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
1938 opcode.addPrefixBySize(o1.size());
1939 opReg = o1.id();
1940 rmRel = &o0;
1941
1942 immValue = o2.as<Imm>().i64();
1943 immSize = 1;
1944 goto EmitX86M;
1945 }
1946
1947 // The following instructions use opcode + 1.
1948 opcode.add(1);
1949
1950 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
1951 if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
1952 goto InvalidInstruction;
1953
1954 opcode.addPrefixBySize(o0.size());
1955 opReg = o1.id();
1956 rbReg = o0.id();
1957 goto EmitX86R;
1958 }
1959
1960 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
1961 if (ASMJIT_UNLIKELY(o2.id() != Gp::kIdCx))
1962 goto InvalidInstruction;
1963
1964 opcode.addPrefixBySize(o1.size());
1965 opReg = o1.id();
1966 rmRel = &o0;
1967 goto EmitX86M;
1968 }
1969 break;
1970
1971 case InstDB::kEncodingX86StrRm:
1972 if (isign3 == ENC_OPS2(Reg, Mem)) {
1973 rmRel = &o1;
1974 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o0.as<Reg>(), Gp::kIdAx)))
1975 goto InvalidInstruction;
1976
1977 uint32_t size = o0.size();
1978 if (o1.hasSize() && ASMJIT_UNLIKELY(o1.size() != size))
1979 goto OperandSizeMismatch;
1980
1981 opcode.addArithBySize(size);
1982 goto EmitX86OpImplicitMem;
1983 }
1984 break;
1985
1986 case InstDB::kEncodingX86StrMr:
1987 if (isign3 == ENC_OPS2(Mem, Reg)) {
1988 rmRel = &o0;
1989 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().offsetLo32() || !Reg::isGp(o1.as<Reg>(), Gp::kIdAx)))
1990 goto InvalidInstruction;
1991
1992 uint32_t size = o1.size();
1993 if (o0.hasSize() && ASMJIT_UNLIKELY(o0.size() != size))
1994 goto OperandSizeMismatch;
1995
1996 opcode.addArithBySize(size);
1997 goto EmitX86OpImplicitMem;
1998 }
1999 break;
2000
2001 case InstDB::kEncodingX86StrMm:
2002 if (isign3 == ENC_OPS2(Mem, Mem)) {
2003 if (ASMJIT_UNLIKELY(o0.as<Mem>().baseAndIndexTypes() !=
2004 o1.as<Mem>().baseAndIndexTypes()))
2005 goto InvalidInstruction;
2006
2007 rmRel = &o1;
2008 if (ASMJIT_UNLIKELY(o0.as<Mem>().hasOffset()))
2009 goto InvalidInstruction;
2010
2011 uint32_t size = o1.size();
2012 if (ASMJIT_UNLIKELY(size == 0))
2013 goto AmbiguousOperandSize;
2014
2015 if (ASMJIT_UNLIKELY(o0.size() != size))
2016 goto OperandSizeMismatch;
2017
2018 opcode.addArithBySize(size);
2019 goto EmitX86OpImplicitMem;
2020 }
2021 break;
2022
2023 case InstDB::kEncodingX86Test:
2024 if (isign3 == ENC_OPS2(Reg, Reg)) {
2025 if (o0.size() != o1.size())
2026 goto OperandSizeMismatch;
2027
2028 opcode.addArithBySize(o0.size());
2029 rbReg = o0.id();
2030 opReg = o1.id();
2031
2032 if (o0.size() != 1)
2033 goto EmitX86R;
2034
2035 FIXUP_GPB(o0, rbReg);
2036 FIXUP_GPB(o1, opReg);
2037 goto EmitX86R;
2038 }
2039
2040 if (isign3 == ENC_OPS2(Mem, Reg)) {
2041 opcode.addArithBySize(o1.size());
2042 opReg = o1.id();
2043 rmRel = &o0;
2044
2045 if (o1.size() != 1)
2046 goto EmitX86M;
2047
2048 FIXUP_GPB(o1, opReg);
2049 goto EmitX86M;
2050 }
2051
2052 // The following instructions use the secondary opcode.
2053 opcode = x86AltOpcodeOf(instInfo);
2054 opReg = opcode.extractO();
2055
2056 if (isign3 == ENC_OPS2(Reg, Imm)) {
2057 opcode.addArithBySize(o0.size());
2058 rbReg = o0.id();
2059
2060 if (o0.size() == 1) {
2061 FIXUP_GPB(o0, rbReg);
2062 immValue = o1.as<Imm>().u8();
2063 immSize = 1;
2064 }
2065 else {
2066 immValue = o1.as<Imm>().i64();
2067 immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2068 }
2069
2070 // Short form - AL, AX, EAX, RAX.
2071 if (rbReg == 0 && !(options & Inst::kOptionLongForm)) {
2072 opcode &= Opcode::kPP_66 | Opcode::kW;
2073 opcode |= 0xA8 + (o0.size() != 1);
2074 goto EmitX86Op;
2075 }
2076
2077 goto EmitX86R;
2078 }
2079
2080 if (isign3 == ENC_OPS2(Mem, Imm)) {
2081 if (ASMJIT_UNLIKELY(o0.size() == 0))
2082 goto AmbiguousOperandSize;
2083
2084 opcode.addArithBySize(o0.size());
2085 rmRel = &o0;
2086
2087 immValue = o1.as<Imm>().i64();
2088 immSize = FastUInt8(Support::min<uint32_t>(o0.size(), 4));
2089 goto EmitX86M;
2090 }
2091 break;
2092
2093 case InstDB::kEncodingX86Xchg:
2094 if (isign3 == ENC_OPS2(Reg, Mem)) {
2095 opcode.addArithBySize(o0.size());
2096 opReg = o0.id();
2097 rmRel = &o1;
2098
2099 if (o0.size() != 1)
2100 goto EmitX86M;
2101
2102 FIXUP_GPB(o0, opReg);
2103 goto EmitX86M;
2104 }
2105 ASMJIT_FALLTHROUGH;
2106
2107 case InstDB::kEncodingX86Xadd:
2108 if (isign3 == ENC_OPS2(Reg, Reg)) {
2109 opcode.addArithBySize(o0.size());
2110 rbReg = o0.id();
2111 opReg = o1.id();
2112
2113 if (o0.size() != o1.size())
2114 goto OperandSizeMismatch;
2115
2116 if (o0.size() == 1) {
2117 FIXUP_GPB(o0, rbReg);
2118 FIXUP_GPB(o1, opReg);
2119 goto EmitX86R;
2120 }
2121
2122 // Special opcode for 'xchg ?ax, reg'.
2123 if (instId == Inst::kIdXchg && (opReg == 0 || rbReg == 0)) {
2124 opcode &= Opcode::kPP_66 | Opcode::kW;
2125 opcode |= 0x90;
2126 // One of `xchg a, b` or `xchg b, a` is AX/EAX/RAX.
2127 opReg += rbReg;
2128 goto EmitX86OpReg;
2129 }
2130 else {
2131 goto EmitX86R;
2132 }
2133 }
2134
2135 if (isign3 == ENC_OPS2(Mem, Reg)) {
2136 opcode.addArithBySize(o1.size());
2137 opReg = o1.id();
2138 rmRel = &o0;
2139
2140 if (o1.size() == 1) {
2141 FIXUP_GPB(o1, opReg);
2142 }
2143
2144 goto EmitX86M;
2145 }
2146 break;
2147
2148 case InstDB::kEncodingX86Fence:
2149 rbReg = 0;
2150 goto EmitX86R;
2151
2152 case InstDB::kEncodingX86Bndmov:
2153 if (isign3 == ENC_OPS2(Reg, Reg)) {
2154 opReg = o0.id();
2155 rbReg = o1.id();
2156
2157 // ModRM encoding:
2158 if (!(options & Inst::kOptionModMR))
2159 goto EmitX86R;
2160
2161 // ModMR encoding:
2162 opcode = x86AltOpcodeOf(instInfo);
2163 std::swap(opReg, rbReg);
2164 goto EmitX86R;
2165 }
2166
2167 if (isign3 == ENC_OPS2(Reg, Mem)) {
2168 opReg = o0.id();
2169 rmRel = &o1;
2170 goto EmitX86M;
2171 }
2172
2173 if (isign3 == ENC_OPS2(Mem, Reg)) {
2174 opcode = x86AltOpcodeOf(instInfo);
2175
2176 rmRel = &o0;
2177 opReg = o1.id();
2178 goto EmitX86M;
2179 }
2180 break;
2181
2182 // ------------------------------------------------------------------------
2183 // [FPU]
2184 // ------------------------------------------------------------------------
2185
2186 case InstDB::kEncodingFpuOp:
2187 goto EmitFpuOp;
2188
2189 case InstDB::kEncodingFpuArith:
2190 if (isign3 == ENC_OPS2(Reg, Reg)) {
2191 opReg = o0.id();
2192 rbReg = o1.id();
2193
2194 // We switch to the alternative opcode if the first operand is zero.
2195 if (opReg == 0) {
2196 CaseFpuArith_Reg:
2197 opcode = ((0xD8 << Opcode::kFPU_2B_Shift) ) +
2198 ((opcode >> Opcode::kFPU_2B_Shift) & 0xFF) + rbReg;
2199 goto EmitFpuOp;
2200 }
2201 else if (rbReg == 0) {
2202 rbReg = opReg;
2203 opcode = ((0xDC << Opcode::kFPU_2B_Shift) ) +
2204 ((opcode ) & 0xFF) + rbReg;
2205 goto EmitFpuOp;
2206 }
2207 else {
2208 goto InvalidInstruction;
2209 }
2210 }
2211
2212 if (isign3 == ENC_OPS1(Mem)) {
2213 CaseFpuArith_Mem:
2214 // 0xD8/0xDC, depends on the size of the memory operand; opReg is valid.
2215 opcode = (o0.size() == 4) ? 0xD8 : 0xDC;
2216 // Clear compressed displacement before going to EmitX86M.
2217 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2218
2219 rmRel = &o0;
2220 goto EmitX86M;
2221 }
2222 break;
2223
2224 case InstDB::kEncodingFpuCom:
2225 if (isign3 == 0) {
2226 rbReg = 1;
2227 goto CaseFpuArith_Reg;
2228 }
2229
2230 if (isign3 == ENC_OPS1(Reg)) {
2231 rbReg = o0.id();
2232 goto CaseFpuArith_Reg;
2233 }
2234
2235 if (isign3 == ENC_OPS1(Mem)) {
2236 goto CaseFpuArith_Mem;
2237 }
2238 break;
2239
2240 case InstDB::kEncodingFpuFldFst:
2241 if (isign3 == ENC_OPS1(Mem)) {
2242 rmRel = &o0;
2243
2244 if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2245 goto EmitX86M;
2246 }
2247
2248 if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2249 opcode += 4;
2250 goto EmitX86M;
2251 }
2252
2253 if (o0.size() == 10 && commonInfo->hasFlag(InstDB::kFlagFpuM80)) {
2254 opcode = x86AltOpcodeOf(instInfo);
2255 opReg = opcode.extractO();
2256 goto EmitX86M;
2257 }
2258 }
2259
2260 if (isign3 == ENC_OPS1(Reg)) {
2261 if (instId == Inst::kIdFld ) { opcode = (0xD9 << Opcode::kFPU_2B_Shift) + 0xC0 + o0.id(); goto EmitFpuOp; }
2262 if (instId == Inst::kIdFst ) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD0 + o0.id(); goto EmitFpuOp; }
2263 if (instId == Inst::kIdFstp) { opcode = (0xDD << Opcode::kFPU_2B_Shift) + 0xD8 + o0.id(); goto EmitFpuOp; }
2264 }
2265 break;
2266
2267 case InstDB::kEncodingFpuM:
2268 if (isign3 == ENC_OPS1(Mem)) {
2269 // Clear compressed displacement before going to EmitX86M.
2270 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2271
2272 rmRel = &o0;
2273 if (o0.size() == 2 && commonInfo->hasFlag(InstDB::kFlagFpuM16)) {
2274 opcode += 4;
2275 goto EmitX86M;
2276 }
2277
2278 if (o0.size() == 4 && commonInfo->hasFlag(InstDB::kFlagFpuM32)) {
2279 goto EmitX86M;
2280 }
2281
2282 if (o0.size() == 8 && commonInfo->hasFlag(InstDB::kFlagFpuM64)) {
2283 opcode = x86AltOpcodeOf(instInfo) & ~uint32_t(Opcode::kCDSHL_Mask);
2284 opReg = opcode.extractO();
2285 goto EmitX86M;
2286 }
2287 }
2288 break;
2289
2290 case InstDB::kEncodingFpuRDef:
2291 if (isign3 == 0) {
2292 opcode += 1;
2293 goto EmitFpuOp;
2294 }
2295 ASMJIT_FALLTHROUGH;
2296
2297 case InstDB::kEncodingFpuR:
2298 if (isign3 == ENC_OPS1(Reg)) {
2299 opcode += o0.id();
2300 goto EmitFpuOp;
2301 }
2302 break;
2303
2304 case InstDB::kEncodingFpuStsw:
2305 if (isign3 == ENC_OPS1(Reg)) {
2306 if (ASMJIT_UNLIKELY(o0.id() != Gp::kIdAx))
2307 goto InvalidInstruction;
2308
2309 opcode = x86AltOpcodeOf(instInfo);
2310 goto EmitFpuOp;
2311 }
2312
2313 if (isign3 == ENC_OPS1(Mem)) {
2314 // Clear compressed displacement before going to EmitX86M.
2315 opcode &= ~uint32_t(Opcode::kCDSHL_Mask);
2316
2317 rmRel = &o0;
2318 goto EmitX86M;
2319 }
2320 break;
2321
2322 // ------------------------------------------------------------------------
2323 // [Ext]
2324 // ------------------------------------------------------------------------
2325
2326 case InstDB::kEncodingExtPextrw:
2327 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2328 opcode.add66hIf(Reg::isXmm(o1));
2329
2330 immValue = o2.as<Imm>().i64();
2331 immSize = 1;
2332
2333 opReg = o0.id();
2334 rbReg = o1.id();
2335 goto EmitX86R;
2336 }
2337
2338 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2339 // Secondary opcode of 'pextrw' instruction (SSE4.1).
2340 opcode = x86AltOpcodeOf(instInfo);
2341 opcode.add66hIf(Reg::isXmm(o1));
2342
2343 immValue = o2.as<Imm>().i64();
2344 immSize = 1;
2345
2346 opReg = o1.id();
2347 rmRel = &o0;
2348 goto EmitX86M;
2349 }
2350 break;
2351
2352 case InstDB::kEncodingExtExtract:
2353 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2354 opcode.add66hIf(Reg::isXmm(o1));
2355
2356 immValue = o2.as<Imm>().i64();
2357 immSize = 1;
2358
2359 opReg = o1.id();
2360 rbReg = o0.id();
2361 goto EmitX86R;
2362 }
2363
2364 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2365 opcode.add66hIf(Reg::isXmm(o1));
2366
2367 immValue = o2.as<Imm>().i64();
2368 immSize = 1;
2369
2370 opReg = o1.id();
2371 rmRel = &o0;
2372 goto EmitX86M;
2373 }
2374 break;
2375
2376 case InstDB::kEncodingExtMov:
2377 // GP|MM|XMM <- GP|MM|XMM
2378 if (isign3 == ENC_OPS2(Reg, Reg)) {
2379 opReg = o0.id();
2380 rbReg = o1.id();
2381
2382 if (!(options & Inst::kOptionModMR) || !instInfo->_altOpcodeIndex)
2383 goto EmitX86R;
2384
2385 opcode = x86AltOpcodeOf(instInfo);
2386 std::swap(opReg, rbReg);
2387 goto EmitX86R;
2388 }
2389
2390 // GP|MM|XMM <- Mem
2391 if (isign3 == ENC_OPS2(Reg, Mem)) {
2392 opReg = o0.id();
2393 rmRel = &o1;
2394 goto EmitX86M;
2395 }
2396
2397 // The following instruction uses opcode[1].
2398 opcode = x86AltOpcodeOf(instInfo);
2399
2400 // Mem <- GP|MM|XMM
2401 if (isign3 == ENC_OPS2(Mem, Reg)) {
2402 opReg = o1.id();
2403 rmRel = &o0;
2404 goto EmitX86M;
2405 }
2406 break;
2407
2408 case InstDB::kEncodingExtMovbe:
2409 if (isign3 == ENC_OPS2(Reg, Mem)) {
2410 if (o0.size() == 1)
2411 goto InvalidInstruction;
2412
2413 opcode.addPrefixBySize(o0.size());
2414 opReg = o0.id();
2415 rmRel = &o1;
2416 goto EmitX86M;
2417 }
2418
2419 // The following instruction uses the secondary opcode.
2420 opcode = x86AltOpcodeOf(instInfo);
2421
2422 if (isign3 == ENC_OPS2(Mem, Reg)) {
2423 if (o1.size() == 1)
2424 goto InvalidInstruction;
2425
2426 opcode.addPrefixBySize(o1.size());
2427 opReg = o1.id();
2428 rmRel = &o0;
2429 goto EmitX86M;
2430 }
2431 break;
2432
2433 case InstDB::kEncodingExtMovd:
2434 CaseExtMovd:
2435 opReg = o0.id();
2436 opcode.add66hIf(Reg::isXmm(o0));
2437
2438 // MM/XMM <- Gp
2439 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1)) {
2440 rbReg = o1.id();
2441 goto EmitX86R;
2442 }
2443
2444 // MM/XMM <- Mem
2445 if (isign3 == ENC_OPS2(Reg, Mem)) {
2446 rmRel = &o1;
2447 goto EmitX86M;
2448 }
2449
2450 // The following instructions use the secondary opcode.
2451 opcode &= Opcode::kW;
2452 opcode |= x86AltOpcodeOf(instInfo);
2453 opReg = o1.id();
2454 opcode.add66hIf(Reg::isXmm(o1));
2455
2456 // GP <- MM/XMM
2457 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o0)) {
2458 rbReg = o0.id();
2459 goto EmitX86R;
2460 }
2461
2462 // Mem <- MM/XMM
2463 if (isign3 == ENC_OPS2(Mem, Reg)) {
2464 rmRel = &o0;
2465 goto EmitX86M;
2466 }
2467 break;
2468
2469 case InstDB::kEncodingExtMovq:
2470 if (isign3 == ENC_OPS2(Reg, Reg)) {
2471 opReg = o0.id();
2472 rbReg = o1.id();
2473
2474 // MM <- MM
2475 if (Reg::isMm(o0) && Reg::isMm(o1)) {
2476 opcode = Opcode::k000F00 | 0x6F;
2477
2478 if (!(options & Inst::kOptionModMR))
2479 goto EmitX86R;
2480
2481 opcode += 0x10;
2482 std::swap(opReg, rbReg);
2483 goto EmitX86R;
2484 }
2485
2486 // XMM <- XMM
2487 if (Reg::isXmm(o0) && Reg::isXmm(o1)) {
2488 opcode = Opcode::kF30F00 | 0x7E;
2489
2490 if (!(options & Inst::kOptionModMR))
2491 goto EmitX86R;
2492
2493 opcode = Opcode::k660F00 | 0xD6;
2494 std::swap(opReg, rbReg);
2495 goto EmitX86R;
2496 }
2497 }
2498
2499 if (isign3 == ENC_OPS2(Reg, Mem)) {
2500 opReg = o0.id();
2501 rmRel = &o1;
2502
2503 // MM <- Mem
2504 if (Reg::isMm(o0)) {
2505 opcode = Opcode::k000F00 | 0x6F;
2506 goto EmitX86M;
2507 }
2508
2509 // XMM <- Mem
2510 if (Reg::isXmm(o0)) {
2511 opcode = Opcode::kF30F00 | 0x7E;
2512 goto EmitX86M;
2513 }
2514 }
2515
2516 if (isign3 == ENC_OPS2(Mem, Reg)) {
2517 opReg = o1.id();
2518 rmRel = &o0;
2519
2520 // Mem <- MM
2521 if (Reg::isMm(o1)) {
2522 opcode = Opcode::k000F00 | 0x7F;
2523 goto EmitX86M;
2524 }
2525
2526 // Mem <- XMM
2527 if (Reg::isXmm(o1)) {
2528 opcode = Opcode::k660F00 | 0xD6;
2529 goto EmitX86M;
2530 }
2531 }
2532
2533 // MOVQ in other case is simply a MOVD instruction promoted to 64-bit.
2534 opcode |= Opcode::kW;
2535 goto CaseExtMovd;
2536
2537 case InstDB::kEncodingExtRm_XMM0:
2538 if (ASMJIT_UNLIKELY(!o2.isNone() && !Reg::isXmm(o2, 0)))
2539 goto InvalidInstruction;
2540
2541 isign3 &= 0x3F;
2542 goto CaseExtRm;
2543
2544 case InstDB::kEncodingExtRm_ZDI:
2545 if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
2546 goto InvalidInstruction;
2547
2548 isign3 &= 0x3F;
2549 goto CaseExtRm;
2550
2551 case InstDB::kEncodingExtRm_Wx:
2552 opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
2553 ASMJIT_FALLTHROUGH;
2554
2555 case InstDB::kEncodingExtRm:
2556 CaseExtRm:
2557 if (isign3 == ENC_OPS2(Reg, Reg)) {
2558 opReg = o0.id();
2559 rbReg = o1.id();
2560 goto EmitX86R;
2561 }
2562
2563 if (isign3 == ENC_OPS2(Reg, Mem)) {
2564 opReg = o0.id();
2565 rmRel = &o1;
2566 goto EmitX86M;
2567 }
2568 break;
2569
2570 case InstDB::kEncodingExtRm_P:
2571 if (isign3 == ENC_OPS2(Reg, Reg)) {
2572 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2573
2574 opReg = o0.id();
2575 rbReg = o1.id();
2576 goto EmitX86R;
2577 }
2578
2579 if (isign3 == ENC_OPS2(Reg, Mem)) {
2580 opcode.add66hIf(Reg::isXmm(o0));
2581
2582 opReg = o0.id();
2583 rmRel = &o1;
2584 goto EmitX86M;
2585 }
2586 break;
2587
2588 case InstDB::kEncodingExtRmRi:
2589 if (isign3 == ENC_OPS2(Reg, Reg)) {
2590 opReg = o0.id();
2591 rbReg = o1.id();
2592 goto EmitX86R;
2593 }
2594
2595 if (isign3 == ENC_OPS2(Reg, Mem)) {
2596 opReg = o0.id();
2597 rmRel = &o1;
2598 goto EmitX86M;
2599 }
2600
2601 // The following instruction uses the secondary opcode.
2602 opcode = x86AltOpcodeOf(instInfo);
2603 opReg = opcode.extractO();
2604
2605 if (isign3 == ENC_OPS2(Reg, Imm)) {
2606 immValue = o1.as<Imm>().i64();
2607 immSize = 1;
2608
2609 rbReg = o0.id();
2610 goto EmitX86R;
2611 }
2612 break;
2613
2614 case InstDB::kEncodingExtRmRi_P:
2615 if (isign3 == ENC_OPS2(Reg, Reg)) {
2616 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2617
2618 opReg = o0.id();
2619 rbReg = o1.id();
2620 goto EmitX86R;
2621 }
2622
2623 if (isign3 == ENC_OPS2(Reg, Mem)) {
2624 opcode.add66hIf(Reg::isXmm(o0));
2625
2626 opReg = o0.id();
2627 rmRel = &o1;
2628 goto EmitX86M;
2629 }
2630
2631 // The following instruction uses the secondary opcode.
2632 opcode = x86AltOpcodeOf(instInfo);
2633 opReg = opcode.extractO();
2634
2635 if (isign3 == ENC_OPS2(Reg, Imm)) {
2636 opcode.add66hIf(Reg::isXmm(o0));
2637
2638 immValue = o1.as<Imm>().i64();
2639 immSize = 1;
2640
2641 rbReg = o0.id();
2642 goto EmitX86R;
2643 }
2644 break;
2645
2646 case InstDB::kEncodingExtRmi:
2647 immValue = o2.as<Imm>().i64();
2648 immSize = 1;
2649
2650 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2651 opReg = o0.id();
2652 rbReg = o1.id();
2653 goto EmitX86R;
2654 }
2655
2656 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2657 opReg = o0.id();
2658 rmRel = &o1;
2659 goto EmitX86M;
2660 }
2661 break;
2662
2663 case InstDB::kEncodingExtRmi_P:
2664 immValue = o2.as<Imm>().i64();
2665 immSize = 1;
2666
2667 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2668 opcode.add66hIf(Reg::isXmm(o0) | Reg::isXmm(o1));
2669
2670 opReg = o0.id();
2671 rbReg = o1.id();
2672 goto EmitX86R;
2673 }
2674
2675 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2676 opcode.add66hIf(Reg::isXmm(o0));
2677
2678 opReg = o0.id();
2679 rmRel = &o1;
2680 goto EmitX86M;
2681 }
2682 break;
2683
2684 // ------------------------------------------------------------------------
2685 // [Extrq / Insertq (SSE4A)]
2686 // ------------------------------------------------------------------------
2687
2688 case InstDB::kEncodingExtExtrq:
2689 opReg = o0.id();
2690 rbReg = o1.id();
2691
2692 if (isign3 == ENC_OPS2(Reg, Reg))
2693 goto EmitX86R;
2694
2695 // The following instruction uses the secondary opcode.
2696 opcode = x86AltOpcodeOf(instInfo);
2697
2698 if (isign3 == ENC_OPS3(Reg, Imm, Imm)) {
2699 immValue = (o1.as<Imm>().u32() ) +
2700 (o2.as<Imm>().u32() << 8) ;
2701 immSize = 2;
2702
2703 rbReg = opcode.extractO();
2704 goto EmitX86R;
2705 }
2706 break;
2707
2708 case InstDB::kEncodingExtInsertq: {
2709 const uint32_t isign4 = isign3 + (o3.opType() << 9);
2710 opReg = o0.id();
2711 rbReg = o1.id();
2712
2713 if (isign4 == ENC_OPS2(Reg, Reg))
2714 goto EmitX86R;
2715
2716 // The following instruction uses the secondary opcode.
2717 opcode = x86AltOpcodeOf(instInfo);
2718
2719 if (isign4 == ENC_OPS4(Reg, Reg, Imm, Imm)) {
2720 immValue = (o2.as<Imm>().u32() ) +
2721 (o3.as<Imm>().u32() << 8) ;
2722 immSize = 2;
2723 goto EmitX86R;
2724 }
2725 break;
2726 }
2727
2728 // ------------------------------------------------------------------------
2729 // [3dNow]
2730 // ------------------------------------------------------------------------
2731
2732 case InstDB::kEncodingExt3dNow:
2733 // Every 3dNow instruction starts with 0x0F0F and the actual opcode is
2734 // stored as 8-bit immediate.
2735 immValue = opcode.v & 0xFFu;
2736 immSize = 1;
2737
2738 opcode = Opcode::k000F00 | 0x0F;
2739 opReg = o0.id();
2740
2741 if (isign3 == ENC_OPS2(Reg, Reg)) {
2742 rbReg = o1.id();
2743 goto EmitX86R;
2744 }
2745
2746 if (isign3 == ENC_OPS2(Reg, Mem)) {
2747 rmRel = &o1;
2748 goto EmitX86M;
2749 }
2750 break;
2751
2752 // ------------------------------------------------------------------------
2753 // [VEX/EVEX]
2754 // ------------------------------------------------------------------------
2755
2756 case InstDB::kEncodingVexOp:
2757 goto EmitVexEvexOp;
2758
2759 case InstDB::kEncodingVexKmov:
2760 if (isign3 == ENC_OPS2(Reg, Reg)) {
2761 opReg = o0.id();
2762 rbReg = o1.id();
2763
2764 // Form 'k, reg'.
2765 if (Reg::isGp(o1)) {
2766 opcode = x86AltOpcodeOf(instInfo);
2767 goto EmitVexEvexR;
2768 }
2769
2770 // Form 'reg, k'.
2771 if (Reg::isGp(o0)) {
2772 opcode = x86AltOpcodeOf(instInfo) + 1;
2773 goto EmitVexEvexR;
2774 }
2775
2776 // Form 'k, k'.
2777 if (!(options & Inst::kOptionModMR))
2778 goto EmitVexEvexR;
2779
2780 opcode.add(1);
2781 std::swap(opReg, rbReg);
2782 goto EmitVexEvexR;
2783 }
2784
2785 if (isign3 == ENC_OPS2(Reg, Mem)) {
2786 opReg = o0.id();
2787 rmRel = &o1;
2788
2789 goto EmitVexEvexM;
2790 }
2791
2792 if (isign3 == ENC_OPS2(Mem, Reg)) {
2793 opcode.add(1);
2794 opReg = o1.id();
2795 rmRel = &o0;
2796 goto EmitVexEvexM;
2797 }
2798 break;
2799
2800 case InstDB::kEncodingVexR_Wx:
2801 if (isign3 == ENC_OPS1(Reg)) {
2802 rbReg = o0.id();
2803 opcode.addWIf(o0.as<Reg>().isGpq());
2804 goto EmitVexEvexR;
2805 }
2806 break;
2807
2808 case InstDB::kEncodingVexM:
2809 if (isign3 == ENC_OPS1(Mem)) {
2810 rmRel = &o0;
2811 goto EmitVexEvexM;
2812 }
2813 break;
2814
2815 case InstDB::kEncodingVexM_VM:
2816 if (isign3 == ENC_OPS1(Mem)) {
2817 opcode |= x86OpcodeLByVMem(o0);
2818 rmRel = &o0;
2819 goto EmitVexEvexM;
2820 }
2821 break;
2822
2823 case InstDB::kEncodingVexMr_Lx:
2824 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2825
2826 if (isign3 == ENC_OPS2(Reg, Reg)) {
2827 opReg = o1.id();
2828 rbReg = o0.id();
2829 goto EmitVexEvexR;
2830 }
2831
2832 if (isign3 == ENC_OPS2(Mem, Reg)) {
2833 opReg = o1.id();
2834 rmRel = &o0;
2835 goto EmitVexEvexM;
2836 }
2837 break;
2838
2839 case InstDB::kEncodingVexMr_VM:
2840 if (isign3 == ENC_OPS2(Mem, Reg)) {
2841 opcode |= Support::max(x86OpcodeLByVMem(o0), x86OpcodeLBySize(o1.size()));
2842
2843 opReg = o1.id();
2844 rmRel = &o0;
2845 goto EmitVexEvexM;
2846 }
2847 break;
2848
2849 case InstDB::kEncodingVexMri_Lx:
2850 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2851 ASMJIT_FALLTHROUGH;
2852
2853 case InstDB::kEncodingVexMri:
2854 immValue = o2.as<Imm>().i64();
2855 immSize = 1;
2856
2857 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2858 opReg = o1.id();
2859 rbReg = o0.id();
2860 goto EmitVexEvexR;
2861 }
2862
2863 if (isign3 == ENC_OPS3(Mem, Reg, Imm)) {
2864 opReg = o1.id();
2865 rmRel = &o0;
2866 goto EmitVexEvexM;
2867 }
2868 break;
2869
2870 case InstDB::kEncodingVexRm_ZDI:
2871 if (ASMJIT_UNLIKELY(!o2.isNone() && !x86IsImplicitMem(o2, Gp::kIdDi)))
2872 goto InvalidInstruction;
2873
2874 isign3 &= 0x3F;
2875 goto CaseVexRm;
2876
2877 case InstDB::kEncodingVexRm_Wx:
2878 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
2879 goto CaseVexRm;
2880
2881 case InstDB::kEncodingVexRm_Lx_Bcst:
2882 if (isign3 == ENC_OPS2(Reg, Reg) && Reg::isGp(o1.as<Reg>())) {
2883 opcode = x86AltOpcodeOf(instInfo) | x86OpcodeLBySize(o0.size() | o1.size());
2884 opReg = o0.id();
2885 rbReg = o1.id();
2886 goto EmitVexEvexR;
2887 }
2888 ASMJIT_FALLTHROUGH;
2889
2890 case InstDB::kEncodingVexRm_Lx:
2891 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2892 ASMJIT_FALLTHROUGH;
2893
2894 case InstDB::kEncodingVexRm:
2895 CaseVexRm:
2896 if (isign3 == ENC_OPS2(Reg, Reg)) {
2897 opReg = o0.id();
2898 rbReg = o1.id();
2899 goto EmitVexEvexR;
2900 }
2901
2902 if (isign3 == ENC_OPS2(Reg, Mem)) {
2903 opReg = o0.id();
2904 rmRel = &o1;
2905 goto EmitVexEvexM;
2906 }
2907 break;
2908
2909 case InstDB::kEncodingVexRm_VM:
2910 if (isign3 == ENC_OPS2(Reg, Mem)) {
2911 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
2912 opReg = o0.id();
2913 rmRel = &o1;
2914 goto EmitVexEvexM;
2915 }
2916 break;
2917
2918 case InstDB::kEncodingVexRm_T1_4X: {
2919 if (!(options & Inst::kOptionOp4Op5Used))
2920 goto InvalidInstruction;
2921
2922 if (Reg::isZmm(o0 ) && Reg::isZmm(o1) &&
2923 Reg::isZmm(o2 ) && Reg::isZmm(o3) &&
2924 Reg::isZmm(_op4) && _op5.isMem()) {
2925
2926 // Registers [o1, o2, o3, _op4] must start aligned and must be consecutive.
2927 uint32_t i1 = o1.id();
2928 uint32_t i2 = o2.id();
2929 uint32_t i3 = o3.id();
2930 uint32_t i4 = _op4.id();
2931
2932 if (ASMJIT_UNLIKELY((i1 & 0x3) != 0 || i2 != i1 + 1 || i3 != i1 + 2 || i4 != i1 + 3))
2933 goto NotConsecutiveRegs;
2934
2935 opReg = o0.id();
2936 rmRel = &_op5;
2937 goto EmitVexEvexM;
2938 }
2939 break;
2940 }
2941
2942 case InstDB::kEncodingVexRmi_Wx:
2943 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
2944 goto CaseVexRmi;
2945
2946 case InstDB::kEncodingVexRmi_Lx:
2947 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2948 ASMJIT_FALLTHROUGH;
2949
2950 case InstDB::kEncodingVexRmi:
2951 CaseVexRmi:
2952 immValue = o2.as<Imm>().i64();
2953 immSize = 1;
2954
2955 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
2956 opReg = o0.id();
2957 rbReg = o1.id();
2958 goto EmitVexEvexR;
2959 }
2960
2961 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
2962 opReg = o0.id();
2963 rmRel = &o1;
2964 goto EmitVexEvexM;
2965 }
2966 break;
2967
2968 case InstDB::kEncodingVexRvm:
2969 CaseVexRvm:
2970 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
2971 CaseVexRvm_R:
2972 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
2973 rbReg = o2.id();
2974 goto EmitVexEvexR;
2975 }
2976
2977 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
2978 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
2979 rmRel = &o2;
2980 goto EmitVexEvexM;
2981 }
2982 break;
2983
2984 case InstDB::kEncodingVexRvm_ZDX_Wx:
2985 if (ASMJIT_UNLIKELY(!o3.isNone() && !Reg::isGp(o3, Gp::kIdDx)))
2986 goto InvalidInstruction;
2987 ASMJIT_FALLTHROUGH;
2988
2989 case InstDB::kEncodingVexRvm_Wx:
2990 opcode.addWIf(Reg::isGpq(o0) | (o2.size() == 8));
2991 goto CaseVexRvm;
2992
2993 case InstDB::kEncodingVexRvm_Lx:
2994 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2995 goto CaseVexRvm;
2996
2997 case InstDB::kEncodingVexRvmr_Lx:
2998 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
2999 ASMJIT_FALLTHROUGH;
3000
3001 case InstDB::kEncodingVexRvmr: {
3002 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3003 immValue = o3.id() << 4;
3004 immSize = 1;
3005
3006 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3007 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3008 rbReg = o2.id();
3009 goto EmitVexEvexR;
3010 }
3011
3012 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3013 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3014 rmRel = &o2;
3015 goto EmitVexEvexM;
3016 }
3017 break;
3018 }
3019
3020 case InstDB::kEncodingVexRvmi_Lx:
3021 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3022 ASMJIT_FALLTHROUGH;
3023
3024 case InstDB::kEncodingVexRvmi: {
3025 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3026 immValue = o3.as<Imm>().i64();
3027 immSize = 1;
3028
3029 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3030 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3031 rbReg = o2.id();
3032 goto EmitVexEvexR;
3033 }
3034
3035 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Imm)) {
3036 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3037 rmRel = &o2;
3038 goto EmitVexEvexM;
3039 }
3040 break;
3041 }
3042
3043 case InstDB::kEncodingVexRmv_Wx:
3044 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o2));
3045 ASMJIT_FALLTHROUGH;
3046
3047 case InstDB::kEncodingVexRmv:
3048 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3049 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3050 rbReg = o1.id();
3051 goto EmitVexEvexR;
3052 }
3053
3054 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3055 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3056 rmRel = &o1;
3057 goto EmitVexEvexM;
3058 }
3059 break;
3060
3061 case InstDB::kEncodingVexRmvRm_VM:
3062 if (isign3 == ENC_OPS2(Reg, Mem)) {
3063 opcode = x86AltOpcodeOf(instInfo);
3064 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size()));
3065
3066 opReg = o0.id();
3067 rmRel = &o1;
3068 goto EmitVexEvexM;
3069 }
3070
3071 ASMJIT_FALLTHROUGH;
3072
3073 case InstDB::kEncodingVexRmv_VM:
3074 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3075 opcode |= Support::max(x86OpcodeLByVMem(o1), x86OpcodeLBySize(o0.size() | o2.size()));
3076
3077 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3078 rmRel = &o1;
3079 goto EmitVexEvexM;
3080 }
3081 break;
3082
3083
3084 case InstDB::kEncodingVexRmvi: {
3085 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3086 immValue = o3.as<Imm>().i64();
3087 immSize = 1;
3088
3089 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Imm)) {
3090 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3091 rbReg = o1.id();
3092 goto EmitVexEvexR;
3093 }
3094
3095 if (isign4 == ENC_OPS4(Reg, Mem, Reg, Imm)) {
3096 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3097 rmRel = &o1;
3098 goto EmitVexEvexM;
3099 }
3100 break;
3101 }
3102
3103 case InstDB::kEncodingVexMovdMovq:
3104 if (isign3 == ENC_OPS2(Reg, Reg)) {
3105 if (Reg::isGp(o0)) {
3106 opcode = x86AltOpcodeOf(instInfo);
3107 opcode.addWBySize(o0.size());
3108 opReg = o1.id();
3109 rbReg = o0.id();
3110 goto EmitVexEvexR;
3111 }
3112
3113 if (Reg::isGp(o1)) {
3114 opcode.addWBySize(o1.size());
3115 opReg = o0.id();
3116 rbReg = o1.id();
3117 goto EmitVexEvexR;
3118 }
3119
3120 // If this is a 'W' version (movq) then allow also vmovq 'xmm|xmm' form.
3121 if (opcode & Opcode::kEvex_W_1) {
3122 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3123 opcode |= (Opcode::kF30F00 | 0x7E);
3124
3125 opReg = o0.id();
3126 rbReg = o1.id();
3127 goto EmitVexEvexR;
3128 }
3129 }
3130
3131 if (isign3 == ENC_OPS2(Reg, Mem)) {
3132 if (opcode & Opcode::kEvex_W_1) {
3133 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3134 opcode |= (Opcode::kF30F00 | 0x7E);
3135 }
3136
3137 opReg = o0.id();
3138 rmRel = &o1;
3139 goto EmitVexEvexM;
3140 }
3141
3142 // The following instruction uses the secondary opcode.
3143 opcode = x86AltOpcodeOf(instInfo);
3144
3145 if (isign3 == ENC_OPS2(Mem, Reg)) {
3146 if (opcode & Opcode::kEvex_W_1) {
3147 opcode &= ~(Opcode::kPP_VEXMask | Opcode::kMM_Mask | 0xFF);
3148 opcode |= (Opcode::k660F00 | 0xD6);
3149 }
3150
3151 opReg = o1.id();
3152 rmRel = &o0;
3153 goto EmitVexEvexM;
3154 }
3155 break;
3156
3157 case InstDB::kEncodingVexRmMr_Lx:
3158 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3159 ASMJIT_FALLTHROUGH;
3160
3161 case InstDB::kEncodingVexRmMr:
3162 if (isign3 == ENC_OPS2(Reg, Reg)) {
3163 opReg = o0.id();
3164 rbReg = o1.id();
3165 goto EmitVexEvexR;
3166 }
3167
3168 if (isign3 == ENC_OPS2(Reg, Mem)) {
3169 opReg = o0.id();
3170 rmRel = &o1;
3171 goto EmitVexEvexM;
3172 }
3173
3174 // The following instruction uses the secondary opcode.
3175 opcode &= Opcode::kLL_Mask;
3176 opcode |= x86AltOpcodeOf(instInfo);
3177
3178 if (isign3 == ENC_OPS2(Mem, Reg)) {
3179 opReg = o1.id();
3180 rmRel = &o0;
3181 goto EmitVexEvexM;
3182 }
3183 break;
3184
3185 case InstDB::kEncodingVexRvmRmv:
3186 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3187 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3188 rbReg = o1.id();
3189
3190 if (!(options & Inst::kOptionModMR))
3191 goto EmitVexEvexR;
3192
3193 opcode.addW();
3194 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3195 rbReg = o2.id();
3196 goto EmitVexEvexR;
3197 }
3198
3199 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3200 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3201 rmRel = &o1;
3202 goto EmitVexEvexM;
3203 }
3204
3205 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3206 opcode.addW();
3207 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3208 rmRel = &o2;
3209 goto EmitVexEvexM;
3210 }
3211 break;
3212
3213 case InstDB::kEncodingVexRvmRmi_Lx:
3214 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3215 ASMJIT_FALLTHROUGH;
3216
3217 case InstDB::kEncodingVexRvmRmi:
3218 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3219 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3220 rbReg = o2.id();
3221 goto EmitVexEvexR;
3222 }
3223
3224 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3225 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3226 rmRel = &o2;
3227 goto EmitVexEvexM;
3228 }
3229
3230 // The following instructions use the secondary opcode.
3231 opcode &= Opcode::kLL_Mask;
3232 opcode |= x86AltOpcodeOf(instInfo);
3233
3234 immValue = o2.as<Imm>().i64();
3235 immSize = 1;
3236
3237 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3238 opReg = o0.id();
3239 rbReg = o1.id();
3240 goto EmitVexEvexR;
3241 }
3242
3243 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3244 opReg = o0.id();
3245 rmRel = &o1;
3246 goto EmitVexEvexM;
3247 }
3248 break;
3249
3250 case InstDB::kEncodingVexRvmRmvRmi:
3251 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3252 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3253 rbReg = o1.id();
3254
3255 if (!(options & Inst::kOptionModMR))
3256 goto EmitVexEvexR;
3257
3258 opcode.addW();
3259 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3260 rbReg = o2.id();
3261 goto EmitVexEvexR;
3262 }
3263
3264 if (isign3 == ENC_OPS3(Reg, Mem, Reg)) {
3265 opReg = x86PackRegAndVvvvv(o0.id(), o2.id());
3266 rmRel = &o1;
3267 goto EmitVexEvexM;
3268 }
3269
3270 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3271 opcode.addW();
3272 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3273 rmRel = &o2;
3274 goto EmitVexEvexM;
3275 }
3276
3277 // The following instructions use the secondary opcode.
3278 opcode = x86AltOpcodeOf(instInfo);
3279
3280 immValue = o2.as<Imm>().i64();
3281 immSize = 1;
3282
3283 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3284 opReg = o0.id();
3285 rbReg = o1.id();
3286 goto EmitVexEvexR;
3287 }
3288
3289 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3290 opReg = o0.id();
3291 rmRel = &o1;
3292 goto EmitVexEvexM;
3293 }
3294 break;
3295
3296 case InstDB::kEncodingVexRvmMr:
3297 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3298 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3299 rbReg = o2.id();
3300 goto EmitVexEvexR;
3301 }
3302
3303 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3304 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3305 rmRel = &o2;
3306 goto EmitVexEvexM;
3307 }
3308
3309 // The following instructions use the secondary opcode.
3310 opcode = x86AltOpcodeOf(instInfo);
3311
3312 if (isign3 == ENC_OPS2(Reg, Reg)) {
3313 opReg = o1.id();
3314 rbReg = o0.id();
3315 goto EmitVexEvexR;
3316 }
3317
3318 if (isign3 == ENC_OPS2(Mem, Reg)) {
3319 opReg = o1.id();
3320 rmRel = &o0;
3321 goto EmitVexEvexM;
3322 }
3323 break;
3324
3325 case InstDB::kEncodingVexRvmMvr_Lx:
3326 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3327 ASMJIT_FALLTHROUGH;
3328
3329 case InstDB::kEncodingVexRvmMvr:
3330 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3331 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3332 rbReg = o2.id();
3333 goto EmitVexEvexR;
3334 }
3335
3336 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3337 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3338 rmRel = &o2;
3339 goto EmitVexEvexM;
3340 }
3341
3342 // The following instruction uses the secondary opcode.
3343 opcode &= Opcode::kLL_Mask;
3344 opcode |= x86AltOpcodeOf(instInfo);
3345
3346 if (isign3 == ENC_OPS3(Mem, Reg, Reg)) {
3347 opReg = x86PackRegAndVvvvv(o2.id(), o1.id());
3348 rmRel = &o0;
3349 goto EmitVexEvexM;
3350 }
3351 break;
3352
3353 case InstDB::kEncodingVexRvmVmi_Lx:
3354 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3355 ASMJIT_FALLTHROUGH;
3356
3357 case InstDB::kEncodingVexRvmVmi:
3358 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3359 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3360 rbReg = o2.id();
3361 goto EmitVexEvexR;
3362 }
3363
3364 if (isign3 == ENC_OPS3(Reg, Reg, Mem)) {
3365 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3366 rmRel = &o2;
3367 goto EmitVexEvexM;
3368 }
3369
3370 // The following instruction uses the secondary opcode.
3371 opcode &= Opcode::kLL_Mask;
3372 opcode |= x86AltOpcodeOf(instInfo);
3373 opReg = opcode.extractO();
3374
3375 immValue = o2.as<Imm>().i64();
3376 immSize = 1;
3377
3378 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3379 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3380 rbReg = o1.id();
3381 goto EmitVexEvexR;
3382 }
3383
3384 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3385 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3386 rmRel = &o1;
3387 goto EmitVexEvexM;
3388 }
3389 break;
3390
3391 case InstDB::kEncodingVexVm_Wx:
3392 opcode.addWIf(Reg::isGpq(o0) | Reg::isGpq(o1));
3393 ASMJIT_FALLTHROUGH;
3394
3395 case InstDB::kEncodingVexVm:
3396 if (isign3 == ENC_OPS2(Reg, Reg)) {
3397 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3398 rbReg = o1.id();
3399 goto EmitVexEvexR;
3400 }
3401
3402 if (isign3 == ENC_OPS2(Reg, Mem)) {
3403 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3404 rmRel = &o1;
3405 goto EmitVexEvexM;
3406 }
3407 break;
3408
3409 case InstDB::kEncodingVexEvexVmi_Lx:
3410 if (isign3 == ENC_OPS3(Reg, Mem, Imm))
3411 opcode |= Opcode::kMM_ForceEvex;
3412 ASMJIT_FALLTHROUGH;
3413
3414 case InstDB::kEncodingVexVmi_Lx:
3415 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3416 ASMJIT_FALLTHROUGH;
3417
3418 case InstDB::kEncodingVexVmi:
3419 immValue = o2.as<Imm>().i64();
3420 immSize = 1;
3421
3422 CaseVexVmi_AfterImm:
3423 if (isign3 == ENC_OPS3(Reg, Reg, Imm)) {
3424 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3425 rbReg = o1.id();
3426 goto EmitVexEvexR;
3427 }
3428
3429 if (isign3 == ENC_OPS3(Reg, Mem, Imm)) {
3430 opReg = x86PackRegAndVvvvv(opReg, o0.id());
3431 rmRel = &o1;
3432 goto EmitVexEvexM;
3433 }
3434 break;
3435
3436 case InstDB::kEncodingVexVmi4_Wx:
3437 opcode.addWIf(Reg::isGpq(o0) || o1.size() == 8);
3438 immValue = o2.as<Imm>().i64();
3439 immSize = 4;
3440 goto CaseVexVmi_AfterImm;
3441
3442 case InstDB::kEncodingVexRvrmRvmr_Lx:
3443 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3444 ASMJIT_FALLTHROUGH;
3445
3446 case InstDB::kEncodingVexRvrmRvmr: {
3447 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3448
3449 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3450 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3451 rbReg = o2.id();
3452
3453 immValue = o3.id() << 4;
3454 immSize = 1;
3455 goto EmitVexEvexR;
3456 }
3457
3458 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3459 opcode.addW();
3460 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3461 rmRel = &o3;
3462
3463 immValue = o2.id() << 4;
3464 immSize = 1;
3465 goto EmitVexEvexM;
3466 }
3467
3468 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3469 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3470 rmRel = &o2;
3471
3472 immValue = o3.id() << 4;
3473 immSize = 1;
3474 goto EmitVexEvexM;
3475 }
3476 break;
3477 }
3478
3479 case InstDB::kEncodingVexRvrmiRvmri_Lx: {
3480 if (!(options & Inst::kOptionOp4Op5Used) || !_op4.isImm())
3481 goto InvalidInstruction;
3482
3483 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3484 opcode |= x86OpcodeLBySize(o0.size() | o1.size() | o2.size() | o3.size());
3485
3486 immValue = _op4.as<Imm>().u8() & 0x0F;
3487 immSize = 1;
3488
3489 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3490 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3491 rbReg = o2.id();
3492
3493 immValue |= o3.id() << 4;
3494 goto EmitVexEvexR;
3495 }
3496
3497 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3498 opcode.addW();
3499 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3500 rmRel = &o3;
3501
3502 immValue |= o2.id() << 4;
3503 goto EmitVexEvexM;
3504 }
3505
3506 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3507 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3508 rmRel = &o2;
3509
3510 immValue |= o3.id() << 4;
3511 goto EmitVexEvexM;
3512 }
3513 break;
3514 }
3515
3516 case InstDB::kEncodingVexMovssMovsd:
3517 if (isign3 == ENC_OPS3(Reg, Reg, Reg)) {
3518 goto CaseVexRvm_R;
3519 }
3520
3521 if (isign3 == ENC_OPS2(Reg, Mem)) {
3522 opReg = o0.id();
3523 rmRel = &o1;
3524 goto EmitVexEvexM;
3525 }
3526
3527 if (isign3 == ENC_OPS2(Mem, Reg)) {
3528 opcode = x86AltOpcodeOf(instInfo);
3529 opReg = o1.id();
3530 rmRel = &o0;
3531 goto EmitVexEvexM;
3532 }
3533 break;
3534
3535 // ------------------------------------------------------------------------
3536 // [FMA4]
3537 // ------------------------------------------------------------------------
3538
3539 case InstDB::kEncodingFma4_Lx:
3540 // It's fine to just check the first operand, second is just for sanity.
3541 opcode |= x86OpcodeLBySize(o0.size() | o1.size());
3542 ASMJIT_FALLTHROUGH;
3543
3544 case InstDB::kEncodingFma4: {
3545 const uint32_t isign4 = isign3 + (o3.opType() << 9);
3546
3547 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Reg)) {
3548 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3549 rbReg = o2.id();
3550
3551 immValue = o3.id() << 4;
3552 immSize = 1;
3553 goto EmitVexEvexR;
3554 }
3555
3556 if (isign4 == ENC_OPS4(Reg, Reg, Reg, Mem)) {
3557 opcode.addW();
3558 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3559 rmRel = &o3;
3560
3561 immValue = o2.id() << 4;
3562 immSize = 1;
3563 goto EmitVexEvexM;
3564 }
3565
3566 if (isign4 == ENC_OPS4(Reg, Reg, Mem, Reg)) {
3567 opReg = x86PackRegAndVvvvv(o0.id(), o1.id());
3568 rmRel = &o2;
3569
3570 immValue = o3.id() << 4;
3571 immSize = 1;
3572 goto EmitVexEvexM;
3573 }
3574 break;
3575 }
3576 }
3577
3578 goto InvalidInstruction;
3579
3580 // --------------------------------------------------------------------------
3581 // [Emit - X86]
3582 // --------------------------------------------------------------------------
3583
3584 EmitX86OpMovAbs:
3585 immSize = FastUInt8(gpSize());
3586 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3587
3588 EmitX86Op:
3589 // Emit mandatory instruction prefix.
3590 writer.emitPP(opcode.v);
3591
3592 // Emit REX prefix (64-bit only).
3593 {
3594 uint32_t rex = opcode.extractRex(options);
3595 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3596 goto InvalidRexPrefix;
3597 rex &= ~kX86ByteInvalidRex & 0xFF;
3598 writer.emit8If(rex | kX86ByteRex, rex != 0);
3599 }
3600
3601 // Emit instruction opcodes.
3602 writer.emitMMAndOpcode(opcode.v);
3603 writer.emitImmediate(uint64_t(immValue), immSize);
3604 goto EmitDone;
3605
3606 EmitX86OpReg:
3607 // Emit mandatory instruction prefix.
3608 writer.emitPP(opcode.v);
3609
3610 // Emit REX prefix (64-bit only).
3611 {
3612 uint32_t rex = opcode.extractRex(options) | (opReg >> 3); // Rex.B (0x01).
3613 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3614 goto InvalidRexPrefix;
3615 rex &= ~kX86ByteInvalidRex & 0xFF;
3616 writer.emit8If(rex | kX86ByteRex, rex != 0);
3617
3618 opReg &= 0x7;
3619 }
3620
3621 // Emit instruction opcodes.
3622 opcode += opReg;
3623 writer.emitMMAndOpcode(opcode.v);
3624 writer.emitImmediate(uint64_t(immValue), immSize);
3625 goto EmitDone;
3626
3627 EmitX86OpImplicitMem:
3628 // NOTE: Don't change the emit order here, it's compatible with KeyStone/LLVM.
3629 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
3630 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().hasOffset() || (rmInfo & kX86MemInfo_Index)))
3631 goto InvalidInstruction;
3632
3633 // Emit mandatory instruction prefix.
3634 writer.emitPP(opcode.v);
3635
3636 // Emit REX prefix (64-bit only).
3637 {
3638 uint32_t rex = opcode.extractRex(options);
3639 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3640 goto InvalidRexPrefix;
3641 rex &= ~kX86ByteInvalidRex & 0xFF;
3642 writer.emit8If(rex | kX86ByteRex, rex != 0);
3643 }
3644
3645 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3646 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
3647
3648 // Emit instruction opcodes.
3649 writer.emitMMAndOpcode(opcode.v);
3650 writer.emitImmediate(uint64_t(immValue), immSize);
3651 goto EmitDone;
3652
3653 EmitX86R:
3654 // Mandatory instruction prefix.
3655 writer.emitPP(opcode.v);
3656
3657 // Rex prefix (64-bit only).
3658 {
3659 uint32_t rex = opcode.extractRex(options) |
3660 ((opReg & 0x08) >> 1) | // REX.R (0x04).
3661 ((rbReg ) >> 3) ; // REX.B (0x01).
3662
3663 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3664 goto InvalidRexPrefix;
3665 rex &= ~kX86ByteInvalidRex & 0xFF;
3666 writer.emit8If(rex | kX86ByteRex, rex != 0);
3667
3668 opReg &= 0x07;
3669 rbReg &= 0x07;
3670 }
3671
3672 // Instruction opcodes.
3673 writer.emitMMAndOpcode(opcode.v);
3674 // ModR.
3675 writer.emit8(x86EncodeMod(3, opReg, rbReg));
3676 writer.emitImmediate(uint64_t(immValue), immSize);
3677 goto EmitDone;
3678
3679 EmitX86M:
3680 // `rmRel` operand must be memory.
3681 ASMJIT_ASSERT(rmRel != nullptr);
3682 ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
3683 ASMJIT_ASSERT((opcode & Opcode::kCDSHL_Mask) == 0);
3684
3685 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
3686 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
3687
3688 memOpAOMark = writer.cursor();
3689 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
3690
3691 // Mandatory instruction prefix.
3692 writer.emitPP(opcode.v);
3693
3694 rbReg = rmRel->as<Mem>().baseId();
3695 rxReg = rmRel->as<Mem>().indexId();
3696
3697 // REX prefix (64-bit only).
3698 {
3699 uint32_t rex;
3700
3701 rex = (rbReg >> 3) & 0x01; // REX.B (0x01).
3702 rex |= (rxReg >> 2) & 0x02; // REX.X (0x02).
3703 rex |= (opReg >> 1) & 0x04; // REX.R (0x04).
3704
3705 rex &= rmInfo;
3706 rex |= opcode.extractRex(options);
3707
3708 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
3709 goto InvalidRexPrefix;
3710 rex &= ~kX86ByteInvalidRex & 0xFF;
3711 writer.emit8If(rex | kX86ByteRex, rex != 0);
3712
3713 opReg &= 0x07;
3714 }
3715
3716 // Instruction opcodes.
3717 writer.emitMMAndOpcode(opcode.v);
3718 // ... Fall through ...
3719
3720 // --------------------------------------------------------------------------
3721 // [Emit - MOD/SIB]
3722 // --------------------------------------------------------------------------
3723
3724 EmitModSib:
3725 if (!(rmInfo & (kX86MemInfo_Index | kX86MemInfo_67H_X86))) {
3726 // ==========|> [BASE + DISP8|DISP32].
3727 if (rmInfo & kX86MemInfo_BaseGp) {
3728 rbReg &= 0x7;
3729 relOffset = rmRel->as<Mem>().offsetLo32();
3730
3731 uint32_t mod = x86EncodeMod(0, opReg, rbReg);
3732 if (rbReg == Gp::kIdSp) {
3733 // [XSP|R12].
3734 if (relOffset == 0) {
3735 writer.emit8(mod);
3736 writer.emit8(x86EncodeSib(0, 4, 4));
3737 }
3738 // [XSP|R12 + DISP8|DISP32].
3739 else {
3740 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
3741 int32_t cdOffset = relOffset >> cdShift;
3742
3743 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
3744 writer.emit8(mod + 0x40); // <- MOD(1, opReg, rbReg).
3745 writer.emit8(x86EncodeSib(0, 4, 4));
3746 writer.emit8(cdOffset & 0xFF);
3747 }
3748 else {
3749 writer.emit8(mod + 0x80); // <- MOD(2, opReg, rbReg).
3750 writer.emit8(x86EncodeSib(0, 4, 4));
3751 writer.emit32uLE(uint32_t(relOffset));
3752 }
3753 }
3754 }
3755 else if (rbReg != Gp::kIdBp && relOffset == 0) {
3756 // [BASE].
3757 writer.emit8(mod);
3758 }
3759 else {
3760 // [BASE + DISP8|DISP32].
3761 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
3762 int32_t cdOffset = relOffset >> cdShift;
3763
3764 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
3765 writer.emit8(mod + 0x40);
3766 writer.emit8(cdOffset & 0xFF);
3767 }
3768 else {
3769 writer.emit8(mod + 0x80);
3770 writer.emit32uLE(uint32_t(relOffset));
3771 }
3772 }
3773 }
3774 // ==========|> [ABSOLUTE | DISP32].
3775 else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
3776 uint32_t addrType = rmRel->as<Mem>().addrType();
3777 relOffset = rmRel->as<Mem>().offsetLo32();
3778
3779 if (is32Bit()) {
3780 // Explicit relative addressing doesn't work in 32-bit mode.
3781 if (ASMJIT_UNLIKELY(addrType == BaseMem::kAddrTypeRel))
3782 goto InvalidAddress;
3783
3784 writer.emit8(x86EncodeMod(0, opReg, 5));
3785 writer.emit32uLE(uint32_t(relOffset));
3786 }
3787 else {
3788 bool isOffsetI32 = rmRel->as<Mem>().offsetHi32() == (relOffset >> 31);
3789 bool isOffsetU32 = rmRel->as<Mem>().offsetHi32() == 0;
3790 uint64_t baseAddress = codeInfo().baseAddress();
3791
3792 // If relative addressing was not explicitly set then we can try to guess.
3793 // By guessing we check some properties of the memory operand and try to
3794 // base the decision on the segment prefix and the address type.
3795 if (addrType == BaseMem::kAddrTypeDefault) {
3796 if (baseAddress == Globals::kNoBaseAddress) {
3797 // Prefer absolute addressing mode if the offset is 32-bit.
3798 addrType = isOffsetI32 || isOffsetU32 ? BaseMem::kAddrTypeAbs
3799 : BaseMem::kAddrTypeRel;
3800 }
3801 else {
3802 // Prefer absolute addressing mode if FS|GS segment override is present.
3803 bool hasFsGs = rmRel->as<Mem>().segmentId() >= SReg::kIdFs;
3804 // Prefer absolute addressing mode if this is LEA with 32-bit immediate.
3805 bool isLea32 = (instId == Inst::kIdLea) && (isOffsetI32 || isOffsetU32);
3806
3807 addrType = hasFsGs || isLea32 ? BaseMem::kAddrTypeAbs
3808 : BaseMem::kAddrTypeRel;
3809 }
3810 }
3811
3812 if (addrType == BaseMem::kAddrTypeRel) {
3813 uint32_t kModRel32Size = 5;
3814 uint64_t virtualOffset = uint64_t(writer.offsetFrom(_bufferData)) + immSize + kModRel32Size;
3815
3816 if (baseAddress == Globals::kNoBaseAddress) {
3817 // Create a new RelocEntry as we cannot calculate the offset right now.
3818 err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 4);
3819 if (ASMJIT_UNLIKELY(err))
3820 goto Failed;
3821
3822 writer.emit8(x86EncodeMod(0, opReg, 5));
3823 writer.emit32uLE(0);
3824
3825 re->_sourceSectionId = _section->id();
3826 re->_sourceOffset = offset();
3827 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4);
3828 re->_trailingSize = uint8_t(immSize);
3829 re->_payload = uint64_t(rmRel->as<Mem>().offset());
3830
3831 writer.emitImmediate(uint64_t(immValue), immSize);
3832 goto EmitDone;
3833 }
3834 else {
3835 uint64_t rip64 = baseAddress + _section->offset() + virtualOffset;
3836 uint64_t rel64 = uint64_t(rmRel->as<Mem>().offset()) - rip64;
3837
3838 if (Support::isInt32(int64_t(rel64))) {
3839 writer.emit8(x86EncodeMod(0, opReg, 5));
3840 writer.emit32uLE(uint32_t(rel64 & 0xFFFFFFFFu));
3841 writer.emitImmediate(uint64_t(immValue), immSize);
3842 goto EmitDone;
3843 }
3844 else {
3845 // We must check the original address type as we have modified
3846 // `addrType`. We failed if the original address type is 'rel'.
3847 if (ASMJIT_UNLIKELY(rmRel->as<Mem>().isRel()))
3848 goto InvalidAddress;
3849 }
3850 }
3851 }
3852
3853 // Handle unsigned 32-bit address that doesn't work with sign extension.
3854 // Consider the following instructions:
3855 //
3856 // 1. lea rax, [-1] - Sign extended to 0xFFFFFFFFFFFFFFFF
3857 // 2. lea rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
3858 // 3. add rax, [-1] - Sign extended to 0xFFFFFFFFFFFFFFFF
3859 // 4. add rax, [0xFFFFFFFF] - Zero extended to 0x00000000FFFFFFFF
3860 //
3861 // Sign extension is naturally performed by the CPU so we don't have to
3862 // bother, however, zero extension requires address-size override prefix,
3863 // which we probably don't have at this moment. So to make the address
3864 // valid we need to insert it at `memOpAOMark` if it's not already there.
3865 //
3866 // If this is 'lea' instruction then it's possible to remove REX.W part
3867 // from REX prefix (if it's there), which would be one-byte shorter than
3868 // inserting address-size override.
3869 //
3870 // NOTE: If we don't do this then these instructions are unencodable.
3871 if (!isOffsetI32) {
3872 // 64-bit absolute address is unencodable.
3873 if (ASMJIT_UNLIKELY(!isOffsetU32))
3874 goto InvalidAddress64Bit;
3875
3876 // We only patch the existing code if we don't have address-size override.
3877 if (*memOpAOMark != 0x67) {
3878 if (instId == Inst::kIdLea) {
3879 // LEA: Remove REX.W, if present. This is easy as we know that 'lea'
3880 // doesn't use any PP prefix so if REX prefix was emitted it would be
3881 // at `memOpAOMark`.
3882 uint32_t rex = *memOpAOMark;
3883 if (rex & kX86ByteRex) {
3884 rex &= (~kX86ByteRexW) & 0xFF;
3885 *memOpAOMark = uint8_t(rex);
3886
3887 // We can remove the REX prefix completely if it was not forced.
3888 if (rex == kX86ByteRex && !(options & Inst::kOptionRex))
3889 writer.remove8(memOpAOMark);
3890 }
3891 }
3892 else {
3893 // Any other instruction: Insert address-size override prefix.
3894 writer.insert8(memOpAOMark, 0x67);
3895 }
3896 }
3897 }
3898
3899 // Emit 32-bit absolute address.
3900 writer.emit8(x86EncodeMod(0, opReg, 4));
3901 writer.emit8(x86EncodeSib(0, 4, 5));
3902 writer.emit32uLE(uint32_t(relOffset));
3903 }
3904 }
3905 // ==========|> [LABEL|RIP + DISP32]
3906 else {
3907 writer.emit8(x86EncodeMod(0, opReg, 5));
3908
3909 if (is32Bit()) {
3910 EmitModSib_LabelRip_X86:
3911 if (ASMJIT_UNLIKELY(_code->_relocations.willGrow(_code->allocator()) != kErrorOk))
3912 goto OutOfMemory;
3913
3914 relOffset = rmRel->as<Mem>().offsetLo32();
3915 if (rmInfo & kX86MemInfo_BaseLabel) {
3916 // [LABEL->ABS].
3917 label = _code->labelEntry(rmRel->as<Mem>().baseId());
3918 if (ASMJIT_UNLIKELY(!label))
3919 goto InvalidLabel;
3920
3921 err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
3922 if (ASMJIT_UNLIKELY(err))
3923 goto Failed;
3924
3925 re->_sourceSectionId = _section->id();
3926 re->_sourceOffset = offset();
3927 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr));
3928 re->_trailingSize = uint8_t(immSize);
3929 re->_payload = uint64_t(int64_t(relOffset));
3930
3931 if (label->isBound()) {
3932 // Label bound to the current section.
3933 re->_payload += label->offset();
3934 re->_targetSectionId = label->section()->id();
3935 writer.emit32uLE(0);
3936 }
3937 else {
3938 // Non-bound label or label bound to a different section.
3939 relOffset = -4 - immSize;
3940 relSize = 4;
3941 goto EmitRel;
3942 }
3943 }
3944 else {
3945 // [RIP->ABS].
3946 err = _code->newRelocEntry(&re, RelocEntry::kTypeRelToAbs, 4);
3947 if (ASMJIT_UNLIKELY(err))
3948 goto Failed;
3949
3950 re->_sourceSectionId = _section->id();
3951 re->_targetSectionId = _section->id();
3952 re->_sourceOffset = offset();
3953 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr));
3954 re->_trailingSize = uint8_t(immSize);
3955 re->_payload = re->_sourceOffset + re->_leadingSize + 4 + re->_trailingSize + uint64_t(int64_t(relOffset));
3956
3957 writer.emit32uLE(0);
3958 }
3959 }
3960 else {
3961 relOffset = rmRel->as<Mem>().offsetLo32();
3962 if (rmInfo & kX86MemInfo_BaseLabel) {
3963 // [RIP].
3964 label = _code->labelEntry(rmRel->as<Mem>().baseId());
3965 if (ASMJIT_UNLIKELY(!label))
3966 goto InvalidLabel;
3967
3968 relOffset -= (4 + immSize);
3969 if (label->isBoundTo(_section)) {
3970 // Label bound to the current section.
3971 relOffset += int32_t(label->offset() - writer.offsetFrom(_bufferData));
3972 writer.emit32uLE(uint32_t(relOffset));
3973 }
3974 else {
3975 // Non-bound label or label bound to a different section.
3976 relSize = 4;
3977 goto EmitRel;
3978 }
3979 }
3980 else {
3981 // [RIP].
3982 writer.emit32uLE(uint32_t(relOffset));
3983 }
3984 }
3985 }
3986 }
3987 else if (!(rmInfo & kX86MemInfo_67H_X86)) {
3988 // ESP|RSP can't be used as INDEX in pure SIB mode, however, VSIB mode
3989 // allows XMM4|YMM4|ZMM4 (that's why the check is before the label).
3990 if (ASMJIT_UNLIKELY(rxReg == Gp::kIdSp))
3991 goto InvalidAddressIndex;
3992
3993 EmitModVSib:
3994 rxReg &= 0x7;
3995
3996 // ==========|> [BASE + INDEX + DISP8|DISP32].
3997 if (rmInfo & kX86MemInfo_BaseGp) {
3998 rbReg &= 0x7;
3999 relOffset = rmRel->as<Mem>().offsetLo32();
4000
4001 uint32_t mod = x86EncodeMod(0, opReg, 4);
4002 uint32_t sib = x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, rbReg);
4003
4004 if (relOffset == 0 && rbReg != Gp::kIdBp) {
4005 // [BASE + INDEX << SHIFT].
4006 writer.emit8(mod);
4007 writer.emit8(sib);
4008 }
4009 else {
4010 uint32_t cdShift = (opcode & Opcode::kCDSHL_Mask) >> Opcode::kCDSHL_Shift;
4011 int32_t cdOffset = relOffset >> cdShift;
4012
4013 if (Support::isInt8(cdOffset) && relOffset == int32_t(uint32_t(cdOffset) << cdShift)) {
4014 // [BASE + INDEX << SHIFT + DISP8].
4015 writer.emit8(mod + 0x40); // <- MOD(1, opReg, 4).
4016 writer.emit8(sib);
4017 writer.emit8(uint32_t(cdOffset));
4018 }
4019 else {
4020 // [BASE + INDEX << SHIFT + DISP32].
4021 writer.emit8(mod + 0x80); // <- MOD(2, opReg, 4).
4022 writer.emit8(sib);
4023 writer.emit32uLE(uint32_t(relOffset));
4024 }
4025 }
4026 }
4027 // ==========|> [INDEX + DISP32].
4028 else if (!(rmInfo & (kX86MemInfo_BaseLabel | kX86MemInfo_BaseRip))) {
4029 // [INDEX << SHIFT + DISP32].
4030 writer.emit8(x86EncodeMod(0, opReg, 4));
4031 writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4032
4033 relOffset = rmRel->as<Mem>().offsetLo32();
4034 writer.emit32uLE(uint32_t(relOffset));
4035 }
4036 // ==========|> [LABEL|RIP + INDEX + DISP32].
4037 else {
4038 if (is32Bit()) {
4039 writer.emit8(x86EncodeMod(0, opReg, 4));
4040 writer.emit8(x86EncodeSib(rmRel->as<Mem>().shift(), rxReg, 5));
4041 goto EmitModSib_LabelRip_X86;
4042 }
4043 else {
4044 // NOTE: This also handles VSIB+RIP, which is not allowed in 64-bit mode.
4045 goto InvalidAddress;
4046 }
4047 }
4048 }
4049 else {
4050 // 16-bit address mode (32-bit mode with 67 override prefix).
4051 relOffset = (int32_t(rmRel->as<Mem>().offsetLo32()) << 16) >> 16;
4052
4053 // NOTE: 16-bit addresses don't use SIB byte and their encoding differs. We
4054 // use a table-based approach to calculate the proper MOD byte as it's easier.
4055 // Also, not all BASE [+ INDEX] combinations are supported in 16-bit mode, so
4056 // this may fail.
4057 const uint32_t kBaseGpIdx = (kX86MemInfo_BaseGp | kX86MemInfo_Index);
4058
4059 if (rmInfo & kBaseGpIdx) {
4060 // ==========|> [BASE + INDEX + DISP16].
4061 uint32_t mod;
4062
4063 rbReg &= 0x7;
4064 rxReg &= 0x7;
4065
4066 if ((rmInfo & kBaseGpIdx) == kBaseGpIdx) {
4067 uint32_t shf = rmRel->as<Mem>().shift();
4068 if (ASMJIT_UNLIKELY(shf != 0))
4069 goto InvalidAddress;
4070 mod = x86Mod16BaseIndexTable[(rbReg << 3) + rxReg];
4071 }
4072 else {
4073 if (rmInfo & kX86MemInfo_Index)
4074 rbReg = rxReg;
4075 mod = x86Mod16BaseTable[rbReg];
4076 }
4077
4078 if (ASMJIT_UNLIKELY(mod == 0xFF))
4079 goto InvalidAddress;
4080
4081 mod += opReg << 3;
4082 if (relOffset == 0 && mod != 0x06) {
4083 writer.emit8(mod);
4084 }
4085 else if (Support::isInt8(relOffset)) {
4086 writer.emit8(mod + 0x40);
4087 writer.emit8(uint32_t(relOffset));
4088 }
4089 else {
4090 writer.emit8(mod + 0x80);
4091 writer.emit16uLE(uint32_t(relOffset));
4092 }
4093 }
4094 else {
4095 // Not supported in 16-bit addresses.
4096 if (rmInfo & (kX86MemInfo_BaseRip | kX86MemInfo_BaseLabel))
4097 goto InvalidAddress;
4098
4099 // ==========|> [DISP16].
4100 writer.emit8(opReg | 0x06);
4101 writer.emit16uLE(uint32_t(relOffset));
4102 }
4103 }
4104
4105 writer.emitImmediate(uint64_t(immValue), immSize);
4106 goto EmitDone;
4107
4108 // --------------------------------------------------------------------------
4109 // [Emit - FPU]
4110 // --------------------------------------------------------------------------
4111
4112 EmitFpuOp:
4113 // Mandatory instruction prefix.
4114 writer.emitPP(opcode.v);
4115
4116 // FPU instructions consist of two opcodes.
4117 writer.emit8(opcode.v >> Opcode::kFPU_2B_Shift);
4118 writer.emit8(opcode.v);
4119 goto EmitDone;
4120
4121 // --------------------------------------------------------------------------
4122 // [Emit - VEX / EVEX]
4123 // --------------------------------------------------------------------------
4124
4125 EmitVexEvexOp:
4126 {
4127 // These don't use immediate.
4128 ASMJIT_ASSERT(immSize == 0);
4129
4130 // Only 'vzeroall' and 'vzeroupper' instructions use this encoding, they
4131 // don't define 'W' to be '1' so we can just check the 'mmmmm' field. Both
4132 // functions can encode by using VEX2 prefix so VEX3 is basically only used
4133 // when specified as instruction option.
4134 ASMJIT_ASSERT((opcode & Opcode::kW) == 0);
4135
4136 uint32_t x = ((opcode & Opcode::kMM_Mask ) >> (Opcode::kMM_Shift )) |
4137 ((opcode & Opcode::kLL_Mask ) >> (Opcode::kLL_Shift - 10)) |
4138 ((opcode & Opcode::kPP_VEXMask ) >> (Opcode::kPP_Shift - 8)) |
4139 ((options & Inst::kOptionVex3 ) >> (Opcode::kMM_Shift )) ;
4140 if (x & 0x04u) {
4141 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|00000Lpp|0000m0mm|00000000].
4142 x ^= (kX86ByteVex3) | // [........|00000Lpp|0000m0mm|__VEX3__].
4143 (0x07u << 13) | // [........|00000Lpp|1110m0mm|__VEX3__].
4144 (0x0Fu << 19) | // [........|01111Lpp|1110m0mm|__VEX3__].
4145 (opcode << 24) ; // [_OPCODE_|01111Lpp|1110m0mm|__VEX3__].
4146
4147 writer.emit32uLE(x);
4148 goto EmitDone;
4149 }
4150 else {
4151 x = ((x >> 8) ^ x) ^ 0xF9;
4152 writer.emit8(kX86ByteVex2);
4153 writer.emit8(x);
4154 writer.emit8(opcode.v);
4155 goto EmitDone;
4156 }
4157 }
4158
4159 EmitVexEvexR:
4160 {
4161 // Construct `x` - a complete EVEX|VEX prefix.
4162 uint32_t x = ((opReg << 4) & 0xF980u) | // [........|........|Vvvvv..R|R.......].
4163 ((rbReg << 2) & 0x0060u) | // [........|........|........|.BB.....].
4164 (opcode.extractLLMM(options)) | // [........|.LL.....|Vvvvv..R|RBBmmmmm].
4165 (_extraReg.id() << 16); // [........|.LL..aaa|Vvvvv..R|RBBmmmmm].
4166 opReg &= 0x7;
4167
4168 // Handle AVX512 options by a single branch.
4169 const uint32_t kAvx512Options = Inst::kOptionZMask | Inst::kOptionER | Inst::kOptionSAE;
4170 if (options & kAvx512Options) {
4171 uint32_t kBcstMask = 0x1 << 20;
4172 uint32_t kLLMask10 = 0x2 << 21;
4173 uint32_t kLLMask11 = 0x3 << 21;
4174
4175 // Designed to be easily encodable so the position must be exact.
4176 // The {rz-sae} is encoded as {11}, so it should match the mask.
4177 ASMJIT_ASSERT(Inst::kOptionRZ_SAE == kLLMask11);
4178
4179 x |= options & Inst::kOptionZMask; // [........|zLLb.aaa|Vvvvv..R|RBBmmmmm].
4180
4181 // Support embedded-rounding {er} and suppress-all-exceptions {sae}.
4182 if (options & (Inst::kOptionER | Inst::kOptionSAE)) {
4183 // Embedded rounding is only encodable if the instruction is either
4184 // scalar or it's a 512-bit operation as the {er} rounding predicate
4185 // collides with LL part of the instruction.
4186 if ((x & kLLMask11) != kLLMask10) {
4187 // Ok, so LL is not 10, thus the instruction must be scalar.
4188 // Scalar instructions don't support broadcast so if this
4189 // instruction supports it {er} nor {sae} would be encodable.
4190 if (ASMJIT_UNLIKELY(commonInfo->hasAvx512B()))
4191 goto InvalidEROrSAE;
4192 }
4193
4194 if (options & Inst::kOptionER) {
4195 if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512ER()))
4196 goto InvalidEROrSAE;
4197
4198 x &=~kLLMask11; // [........|.00..aaa|Vvvvv..R|RBBmmmmm].
4199 x |= kBcstMask | (options & kLLMask11); // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4200 }
4201 else {
4202 if (ASMJIT_UNLIKELY(!commonInfo->hasAvx512SAE()))
4203 goto InvalidEROrSAE;
4204
4205 x |= kBcstMask; // [........|.LLb.aaa|Vvvvv..R|RBBmmmmm].
4206 }
4207 }
4208 }
4209
4210 // Check if EVEX is required by checking bits in `x` : [........|xx.x.xxx|x......x|.x.x....].
4211 if (x & 0x00D78150u) {
4212 uint32_t y = ((x << 4) & 0x00080000u) | // [........|...bV...|........|........].
4213 ((x >> 4) & 0x00000010u) ; // [........|...bV...|........|...R....].
4214 x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RBBR00mm].
4215 x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4216 x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4217 x |= (opcode >> kVSHR_PP_EW) & 0x00830000u; // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4218 // _ ____ ____
4219 x ^= 0x087CF000u | kX86ByteEvex; // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4220
4221 writer.emit32uLE(x);
4222 writer.emit8(opcode.v);
4223
4224 rbReg &= 0x7;
4225 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4226 writer.emitImmByteOrDWord(immValue, immSize);
4227 goto EmitDone;
4228 }
4229
4230 // Not EVEX, prepare `x` for VEX2 or VEX3: x = [........|00L00000|0vvvv000|R0B0mmmm].
4231 x |= ((opcode >> (kVSHR_W + 8)) & 0x8000u) | // [00000000|00L00000|Wvvvv000|R0B0mmmm].
4232 ((opcode >> (kVSHR_PP + 8)) & 0x0300u) | // [00000000|00L00000|0vvvv0pp|R0B0mmmm].
4233 ((x >> 11 ) & 0x0400u) ; // [00000000|00L00000|WvvvvLpp|R0B0mmmm].
4234
4235 // Check if VEX3 is required / forced: [........|........|x.......|..x..x..].
4236 if (x & 0x0008024u) {
4237 uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4238
4239 // Clear 'FORCE-VEX3' bit and all high bits.
4240 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|WvvvvLpp|R0B0m0mm|00000000].
4241 // ____ _ _
4242 x ^= xorMsk; // [_OPCODE_|WvvvvLpp|R1Bmmmmm|VEX3|XOP].
4243 writer.emit32uLE(x);
4244
4245 rbReg &= 0x7;
4246 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4247 writer.emitImmByteOrDWord(immValue, immSize);
4248 goto EmitDone;
4249 }
4250 else {
4251 // 'mmmmm' must be '00001'.
4252 ASMJIT_ASSERT((x & 0x1F) == 0x01);
4253
4254 x = ((x >> 8) ^ x) ^ 0xF9;
4255 writer.emit8(kX86ByteVex2);
4256 writer.emit8(x);
4257 writer.emit8(opcode.v);
4258
4259 rbReg &= 0x7;
4260 writer.emit8(x86EncodeMod(3, opReg, rbReg));
4261 writer.emitImmByteOrDWord(immValue, immSize);
4262 goto EmitDone;
4263 }
4264 }
4265
4266 EmitVexEvexM:
4267 ASMJIT_ASSERT(rmRel != nullptr);
4268 ASMJIT_ASSERT(rmRel->opType() == Operand::kOpMem);
4269
4270 rmInfo = x86MemInfo[rmRel->as<Mem>().baseAndIndexTypes()];
4271 writer.emitSegmentOverride(rmRel->as<Mem>().segmentId());
4272
4273 memOpAOMark = writer.cursor();
4274 writer.emitAddressOverride((rmInfo & _addressOverrideMask()) != 0);
4275
4276 rbReg = rmRel->as<Mem>().hasBaseReg() ? rmRel->as<Mem>().baseId() : uint32_t(0);
4277 rxReg = rmRel->as<Mem>().hasIndexReg() ? rmRel->as<Mem>().indexId() : uint32_t(0);
4278
4279 {
4280 uint32_t broadcastBit = uint32_t(rmRel->as<Mem>().hasBroadcast());
4281
4282 // Construct `x` - a complete EVEX|VEX prefix.
4283 uint32_t x = ((opReg << 4) & 0x0000F980u) | // [........|........|Vvvvv..R|R.......].
4284 ((rxReg << 3) & 0x00000040u) | // [........|........|........|.X......].
4285 ((rxReg << 15) & 0x00080000u) | // [........|....X...|........|........].
4286 ((rbReg << 2) & 0x00000020u) | // [........|........|........|..B.....].
4287 opcode.extractLLMM(options) | // [........|.LL.X...|Vvvvv..R|RXBmmmmm].
4288 (_extraReg.id() << 16) | // [........|.LL.Xaaa|Vvvvv..R|RXBmmmmm].
4289 (broadcastBit << 20) ; // [........|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4290 opReg &= 0x07u;
4291
4292 // Mark invalid VEX (force EVEX) case: // [@.......|.LLbXaaa|Vvvvv..R|RXBmmmmm].
4293 x |= (~commonInfo->flags() & InstDB::kFlagVex) << (31 - Support::constCtz(InstDB::kFlagVex));
4294
4295 // Handle AVX512 options by a single branch.
4296 const uint32_t kAvx512Options = Inst::kOptionZMask |
4297 Inst::kOptionER |
4298 Inst::kOptionSAE ;
4299 if (options & kAvx512Options) {
4300 // {er} and {sae} are both invalid if memory operand is used.
4301 if (ASMJIT_UNLIKELY(options & (Inst::kOptionER | Inst::kOptionSAE)))
4302 goto InvalidEROrSAE;
4303
4304 x |= options & (Inst::kOptionZMask); // [@.......|zLLbXaaa|Vvvvv..R|RXBmmmmm].
4305 }
4306
4307 // Check if EVEX is required by checking bits in `x` : [@.......|xx.xxxxx|x......x|...x....].
4308 if (x & 0x80DF8110u) {
4309 uint32_t y = ((x << 4) & 0x00080000u) | // [@.......|....V...|........|........].
4310 ((x >> 4) & 0x00000010u) ; // [@.......|....V...|........|...R....].
4311 x = (x & 0x00FF78E3u) | y; // [........|zLLbVaaa|0vvvv000|RXBR00mm].
4312 x = x << 8; // [zLLbVaaa|0vvvv000|RBBR00mm|00000000].
4313 x |= (opcode >> kVSHR_W ) & 0x00800000u; // [zLLbVaaa|Wvvvv000|RBBR00mm|00000000].
4314 x |= (opcode >> kVSHR_PP_EW) & 0x00830000u; // [zLLbVaaa|Wvvvv0pp|RBBR00mm|00000000] (added PP and EVEX.W).
4315 // _ ____ ____
4316 x ^= 0x087CF000u | kX86ByteEvex; // [zLLbVaaa|Wvvvv1pp|RBBR00mm|01100010].
4317
4318 writer.emit32uLE(x);
4319 writer.emit8(opcode.v);
4320
4321 if (x & 0x10000000u) {
4322 // Broadcast, change the compressed displacement scale to either x4 (SHL 2) or x8 (SHL 3)
4323 // depending on instruction's W. If 'W' is 1 'SHL' must be 3, otherwise it must be 2.
4324 opcode &=~uint32_t(Opcode::kCDSHL_Mask);
4325 opcode |= ((x & 0x00800000u) ? 3u : 2u) << Opcode::kCDSHL_Shift;
4326 }
4327 else {
4328 // Add the compressed displacement 'SHF' to the opcode based on 'TTWLL'.
4329 // The index to `x86CDisp8SHL` is composed as `CDTT[4:3] | W[2] | LL[1:0]`.
4330 uint32_t TTWLL = ((opcode >> (Opcode::kCDTT_Shift - 3)) & 0x18) +
4331 ((opcode >> (Opcode::kW_Shift - 2)) & 0x04) +
4332 ((x >> 29) & 0x3);
4333 opcode += x86CDisp8SHL[TTWLL];
4334 }
4335 }
4336 else {
4337 // Not EVEX, prepare `x` for VEX2 or VEX3: x = [........|00L00000|0vvvv000|RXB0mmmm].
4338 x |= ((opcode >> (kVSHR_W + 8)) & 0x8000u) | // [00000000|00L00000|Wvvvv000|RXB0mmmm].
4339 ((opcode >> (kVSHR_PP + 8)) & 0x0300u) | // [00000000|00L00000|Wvvvv0pp|RXB0mmmm].
4340 ((x >> 11 ) & 0x0400u) ; // [00000000|00L00000|WvvvvLpp|RXB0mmmm].
4341
4342 // Clear a possible CDisp specified by EVEX.
4343 opcode &= ~Opcode::kCDSHL_Mask;
4344
4345 // Check if VEX3 is required / forced: [........|........|x.......|.xx..x..].
4346 if (x & 0x0008064u) {
4347 uint32_t xorMsk = x86VEXPrefix[x & 0xF] | (opcode << 24);
4348
4349 // Clear 'FORCE-VEX3' bit and all high bits.
4350 x = (x & (0x4 ^ 0xFFFF)) << 8; // [00000000|WvvvvLpp|RXB0m0mm|00000000].
4351 // ____ ___
4352 x ^= xorMsk; // [_OPCODE_|WvvvvLpp|RXBmmmmm|VEX3_XOP].
4353 writer.emit32uLE(x);
4354 }
4355 else {
4356 // 'mmmmm' must be '00001'.
4357 ASMJIT_ASSERT((x & 0x1F) == 0x01);
4358
4359 x = ((x >> 8) ^ x) ^ 0xF9;
4360 writer.emit8(kX86ByteVex2);
4361 writer.emit8(x);
4362 writer.emit8(opcode.v);
4363 }
4364 }
4365 }
4366
4367 // MOD|SIB address.
4368 if (!commonInfo->hasFlag(InstDB::kFlagVsib))
4369 goto EmitModSib;
4370
4371 // MOD|VSIB address without INDEX is invalid.
4372 if (rmInfo & kX86MemInfo_Index)
4373 goto EmitModVSib;
4374 goto InvalidInstruction;
4375
4376 // --------------------------------------------------------------------------
4377 // [Emit - Jmp/Jcc/Call]
4378 // --------------------------------------------------------------------------
4379
4380 EmitJmpCall:
4381 {
4382 // Emit REX prefix if asked for (64-bit only).
4383 uint32_t rex = opcode.extractRex(options);
4384 if (ASMJIT_UNLIKELY(x86IsRexInvalid(rex)))
4385 goto InvalidRexPrefix;
4386 rex &= ~kX86ByteInvalidRex & 0xFF;
4387 writer.emit8If(rex | kX86ByteRex, rex != 0);
4388
4389 uint64_t ip = uint64_t(writer.offsetFrom(_bufferData));
4390 uint32_t rel32 = 0;
4391 uint32_t opCode8 = x86AltOpcodeOf(instInfo);
4392
4393 uint32_t inst8Size = 1 + 1; // OPCODE + REL8 .
4394 uint32_t inst32Size = 1 + 4; // [PREFIX] OPCODE + REL32.
4395
4396 // Jcc instructions with 32-bit displacement use 0x0F prefix,
4397 // other instructions don't. No other prefixes are used by X86.
4398 ASMJIT_ASSERT((opCode8 & Opcode::kMM_Mask) == 0);
4399 ASMJIT_ASSERT((opcode & Opcode::kMM_Mask) == 0 ||
4400 (opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
4401
4402 // Only one of these should be used at the same time.
4403 inst32Size += uint32_t(opReg != 0);
4404 inst32Size += uint32_t((opcode & Opcode::kMM_Mask) == Opcode::kMM_0F);
4405
4406 if (rmRel->isLabel()) {
4407 label = _code->labelEntry(rmRel->as<Label>());
4408 if (ASMJIT_UNLIKELY(!label))
4409 goto InvalidLabel;
4410
4411 if (label->isBoundTo(_section)) {
4412 // Label bound to the current section.
4413 rel32 = uint32_t((label->offset() - ip - inst32Size) & 0xFFFFFFFFu);
4414 goto EmitJmpCallRel;
4415 }
4416 else {
4417 // Non-bound label or label bound to a different section.
4418 if (opCode8 && (!opcode.v || (options & Inst::kOptionShortForm))) {
4419 writer.emit8(opCode8);
4420
4421 // Record DISP8 (non-bound label).
4422 relOffset = -1;
4423 relSize = 1;
4424 goto EmitRel;
4425 }
4426 else {
4427 // Refuse also 'short' prefix, if specified.
4428 if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4429 goto InvalidDisplacement;
4430
4431 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0);// Emit 0F prefix.
4432 writer.emit8(opcode.v); // Emit opcode.
4433 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4434
4435 // Record DISP32 (non-bound label).
4436 relOffset = -4;
4437 relSize = 4;
4438 goto EmitRel;
4439 }
4440 }
4441 }
4442
4443 if (rmRel->isImm()) {
4444 uint64_t baseAddress = codeInfo().baseAddress();
4445 uint64_t jumpAddress = rmRel->as<Imm>().u64();
4446
4447 // If the base-address is known calculate a relative displacement and
4448 // check if it fits in 32 bits (which is always true in 32-bit mode).
4449 // Emit relative displacement as it was a bound label if all checks are ok.
4450 if (baseAddress != Globals::kNoBaseAddress) {
4451 uint64_t rel64 = jumpAddress - (ip + baseAddress) - inst32Size;
4452 if (archId() == ArchInfo::kIdX86 || Support::isInt32(int64_t(rel64))) {
4453 rel32 = uint32_t(rel64 & 0xFFFFFFFFu);
4454 goto EmitJmpCallRel;
4455 }
4456 else {
4457 // Relative displacement exceeds 32-bits - relocator can only
4458 // insert trampoline for jmp/call, but not for jcc/jecxz.
4459 if (ASMJIT_UNLIKELY(!x86IsJmpOrCall(instId)))
4460 goto InvalidDisplacement;
4461 }
4462 }
4463
4464 err = _code->newRelocEntry(&re, RelocEntry::kTypeAbsToRel, 0);
4465 if (ASMJIT_UNLIKELY(err))
4466 goto Failed;
4467
4468 re->_sourceOffset = offset();
4469 re->_sourceSectionId = _section->id();
4470 re->_payload = jumpAddress;
4471
4472 if (ASMJIT_LIKELY(opcode.v)) {
4473 // 64-bit: Emit REX prefix so the instruction can be patched later.
4474 // REX prefix does nothing if not patched, but allows to patch the
4475 // instruction to use MOD/M and to point to a memory where the final
4476 // 64-bit address is stored.
4477 if (archId() != ArchInfo::kIdX86 && x86IsJmpOrCall(instId)) {
4478 if (!rex)
4479 writer.emit8(kX86ByteRex);
4480
4481 err = _code->addAddressToAddressTable(jumpAddress);
4482 if (ASMJIT_UNLIKELY(err))
4483 goto Failed;
4484
4485 re->_relocType = RelocEntry::kTypeX64AddressEntry;
4486 }
4487
4488 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0); // Emit 0F prefix.
4489 writer.emit8(opcode.v); // Emit opcode.
4490 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4491 writer.emit32uLE(0); // Emit DISP32.
4492
4493 re->_valueSize = 4;
4494 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 4);
4495 re->_trailingSize = uint8_t(immSize);
4496 }
4497 else {
4498 writer.emit8(opCode8); // Emit opcode.
4499 writer.emit8(0); // Emit DISP8 (zero).
4500
4501 re->_valueSize = 1;
4502 re->_leadingSize = uint8_t(writer.offsetFrom(_bufferPtr) - 1);
4503 re->_trailingSize = uint8_t(immSize);
4504 }
4505 goto EmitDone;
4506 }
4507
4508 // Not Label|Imm -> Invalid.
4509 goto InvalidInstruction;
4510
4511 // Emit jmp/call with relative displacement known at assembly-time. Decide
4512 // between 8-bit and 32-bit displacement encoding. Some instructions only
4513 // allow either 8-bit or 32-bit encoding, others allow both encodings.
4514 EmitJmpCallRel:
4515 if (Support::isInt8(int32_t(rel32 + inst32Size - inst8Size)) && opCode8 && !(options & Inst::kOptionLongForm)) {
4516 options |= Inst::kOptionShortForm;
4517 writer.emit8(opCode8); // Emit opcode
4518 writer.emit8(rel32 + inst32Size - inst8Size); // Emit DISP8.
4519 goto EmitDone;
4520 }
4521 else {
4522 if (ASMJIT_UNLIKELY(!opcode.v || (options & Inst::kOptionShortForm) != 0))
4523 goto InvalidDisplacement;
4524
4525 options &= ~Inst::kOptionShortForm;
4526 writer.emit8If(0x0F, (opcode & Opcode::kMM_Mask) != 0); // Emit 0x0F prefix.
4527 writer.emit8(opcode.v); // Emit Opcode.
4528 writer.emit8If(x86EncodeMod(3, opReg, 0), opReg != 0); // Emit MOD.
4529 writer.emit32uLE(rel32); // Emit DISP32.
4530 goto EmitDone;
4531 }
4532 }
4533
4534 // --------------------------------------------------------------------------
4535 // [Emit - Relative]
4536 // --------------------------------------------------------------------------
4537
4538 EmitRel:
4539 {
4540 ASMJIT_ASSERT(relSize == 1 || relSize == 4);
4541
4542 // Chain with label.
4543 size_t offset = size_t(writer.offsetFrom(_bufferData));
4544 LabelLink* link = _code->newLabelLink(label, _section->id(), offset, relOffset);
4545
4546 if (ASMJIT_UNLIKELY(!link))
4547 goto OutOfMemory;
4548
4549 if (re)
4550 link->relocId = re->id();
4551
4552 // Emit label size as dummy data.
4553 if (relSize == 1)
4554 writer.emit8(0x01);
4555 else // if (relSize == 4)
4556 writer.emit32uLE(0x04040404);
4557 }
4558 writer.emitImmediate(uint64_t(immValue), immSize);
4559
4560 // --------------------------------------------------------------------------
4561 // [Done]
4562 // --------------------------------------------------------------------------
4563
4564 EmitDone:
4565 if (ASMJIT_UNLIKELY(options & Inst::kOptionReserved)) {
4566 #ifndef ASMJIT_NO_LOGGING
4567 if (hasEmitterOption(kOptionLoggingEnabled))
4568 _emitLog(instId, options, o0, o1, o2, o3, relSize, immSize, writer.cursor());
4569 #endif
4570 }
4571
4572 resetInstOptions();
4573 resetExtraReg();
4574 resetInlineComment();
4575
4576 writer.done(this);
4577 return kErrorOk;
4578
4579 // --------------------------------------------------------------------------
4580 // [Error Cases]
4581 // --------------------------------------------------------------------------
4582
4583 #define ERROR_HANDLER(ERROR) \
4584 ERROR: \
4585 err = DebugUtils::errored(kError##ERROR); \
4586 goto Failed;
4587
4588 ERROR_HANDLER(OutOfMemory)
4589 ERROR_HANDLER(InvalidLabel)
4590 ERROR_HANDLER(InvalidInstruction)
4591 ERROR_HANDLER(InvalidLockPrefix)
4592 ERROR_HANDLER(InvalidXAcquirePrefix)
4593 ERROR_HANDLER(InvalidXReleasePrefix)
4594 ERROR_HANDLER(InvalidRepPrefix)
4595 ERROR_HANDLER(InvalidRexPrefix)
4596 ERROR_HANDLER(InvalidEROrSAE)
4597 ERROR_HANDLER(InvalidAddress)
4598 ERROR_HANDLER(InvalidAddressIndex)
4599 ERROR_HANDLER(InvalidAddress64Bit)
4600 ERROR_HANDLER(InvalidDisplacement)
4601 ERROR_HANDLER(InvalidSegment)
4602 ERROR_HANDLER(InvalidImmediate)
4603 ERROR_HANDLER(OperandSizeMismatch)
4604 ERROR_HANDLER(AmbiguousOperandSize)
4605 ERROR_HANDLER(NotConsecutiveRegs)
4606
4607 #undef ERROR_HANDLER
4608
4609 Failed:
4610 return _emitFailed(err, instId, options, o0, o1, o2, o3);
4611 }
4612
4613 // ============================================================================
4614 // [asmjit::x86::Assembler - Align]
4615 // ============================================================================
4616
4617 Error Assembler::align(uint32_t alignMode, uint32_t alignment) {
4618 if (ASMJIT_UNLIKELY(alignMode >= kAlignCount))
4619 return reportError(DebugUtils::errored(kErrorInvalidArgument));
4620
4621 if (alignment <= 1)
4622 return kErrorOk;
4623
4624 if (ASMJIT_UNLIKELY(!Support::isPowerOf2(alignment) || alignment > Globals::kMaxAlignment))
4625 return reportError(DebugUtils::errored(kErrorInvalidArgument));
4626
4627 uint32_t i = uint32_t(Support::alignUpDiff<size_t>(offset(), alignment));
4628 if (i > 0) {
4629 CodeBufferWriter writer(this);
4630 ASMJIT_PROPAGATE(writer.ensureSpace(this, i));
4631
4632 uint8_t pattern = 0x00;
4633 switch (alignMode) {
4634 case kAlignCode: {
4635 if (hasEmitterOption(kOptionOptimizedAlign)) {
4636 // Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 2B (NOP).
4637 enum { kMaxNopSize = 9 };
4638
4639 static const uint8_t nopData[kMaxNopSize][kMaxNopSize] = {
4640 { 0x90 },
4641 { 0x66, 0x90 },
4642 { 0x0F, 0x1F, 0x00 },
4643 { 0x0F, 0x1F, 0x40, 0x00 },
4644 { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
4645 { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
4646 { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
4647 { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
4648 { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }
4649 };
4650
4651 do {
4652 uint32_t n = Support::min<uint32_t>(i, kMaxNopSize);
4653 const uint8_t* src = nopData[n - 1];
4654
4655 i -= n;
4656 do {
4657 writer.emit8(*src++);
4658 } while (--n);
4659 } while (i);
4660 }
4661
4662 pattern = 0x90;
4663 break;
4664 }
4665
4666 case kAlignData:
4667 pattern = 0xCC;
4668 break;
4669
4670 case kAlignZero:
4671 // Pattern already set to zero.
4672 break;
4673 }
4674
4675 while (i) {
4676 writer.emit8(pattern);
4677 i--;
4678 }
4679
4680 writer.done(this);
4681 }
4682
4683 #ifndef ASMJIT_NO_LOGGING
4684 if (hasEmitterOption(kOptionLoggingEnabled)) {
4685 Logger* logger = _code->logger();
4686 StringTmp<128> sb;
4687 sb.appendChars(' ', logger->indentation(FormatOptions::kIndentationCode));
4688 sb.appendFormat("align %u\n", alignment);
4689 logger->log(sb);
4690 }
4691 #endif
4692
4693 return kErrorOk;
4694 }
4695
4696 // ============================================================================
4697 // [asmjit::x86::Assembler - Events]
4698 // ============================================================================
4699
4700 Error Assembler::onAttach(CodeHolder* code) noexcept {
4701 uint32_t archId = code->archId();
4702 if (!ArchInfo::isX86Family(archId))
4703 return DebugUtils::errored(kErrorInvalidArch);
4704
4705 ASMJIT_PROPAGATE(Base::onAttach(code));
4706
4707 if (archId == ArchInfo::kIdX86) {
4708 // 32 bit architecture - X86.
4709 _gpRegInfo.setSignature(Gpd::kSignature);
4710 _globalInstOptions |= Inst::_kOptionInvalidRex;
4711 _setAddressOverrideMask(kX86MemInfo_67H_X86);
4712 }
4713 else {
4714 // 64 bit architecture - X64.
4715 _gpRegInfo.setSignature(Gpq::kSignature);
4716 _globalInstOptions &= ~Inst::_kOptionInvalidRex;
4717 _setAddressOverrideMask(kX86MemInfo_67H_X64);
4718 }
4719
4720 return kErrorOk;
4721 }
4722
4723 Error Assembler::onDetach(CodeHolder* code) noexcept {
4724 return Base::onDetach(code);
4725 }
4726
4727 ASMJIT_END_SUB_NAMESPACE
4728
4729 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86ASSEMBLER_H
7 #define _ASMJIT_X86_X86ASSEMBLER_H
8
9 #include "../core/assembler.h"
10 #include "../x86/x86emitter.h"
11 #include "../x86/x86operand.h"
12
13 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
14
15 //! \addtogroup asmjit_x86
16 //! \{
17
18 // ============================================================================
19 // [asmjit::Assembler]
20 // ============================================================================
21
22 //! Assembler (X86).
23 //!
24 //! Emits X86 machine-code into buffers managed by `CodeHolder`.
25 class ASMJIT_VIRTAPI Assembler
26 : public BaseAssembler,
27 public EmitterImplicitT<Assembler> {
28 public:
29 ASMJIT_NONCOPYABLE(Assembler)
30 typedef BaseAssembler Base;
31
32 //! \name Construction & Destruction
33 //! \{
34
35 ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
36 ASMJIT_API virtual ~Assembler() noexcept;
37
38 //! \}
39
40 //! \cond INTERNAL
41 //! \name Internal
42 //! \{
43
44 // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that
45 // is used to decide whether to emit address-override (67H) prefix based on
46 // the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
47 // `kX86MemInfo_67H_X64`.
48 inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
49 inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
50
51 //! \}
52 //! \endcond
53
54 //! \cond INTERNAL
55 //! \name Emit
56 //! \{
57
58 using BaseEmitter::_emit;
59 ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_& o3) override;
60
61 //! \}
62 //! \endcond
63
64 //! \name Align
65 //! \{
66
67 ASMJIT_API Error align(uint32_t alignMode, uint32_t alignment) override;
68
69 //! \}
70
71 //! \name Events
72 //! \{
73
74 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
75 ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
76
77 //! \}
78 };
79
80 //! \}
81
82 ASMJIT_END_SUB_NAMESPACE
83
84 #endif // _ASMJIT_X86_X86ASSEMBLER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_NO_COMPILER)
8
9 #include "../x86/x86assembler.h"
10 #include "../x86/x86builder.h"
11
12 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
13
14 // ============================================================================
15 // [asmjit::x86::Builder - Construction / Destruction]
16 // ============================================================================
17
18 Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
19 if (code)
20 code->attach(this);
21 }
22 Builder::~Builder() noexcept {}
23
24 // ============================================================================
25 // [asmjit::x86::Builder - Finalize]
26 // ============================================================================
27
28 Error Builder::finalize() {
29 ASMJIT_PROPAGATE(runPasses());
30 Assembler a(_code);
31 return serialize(&a);
32 }
33
34 // ============================================================================
35 // [asmjit::x86::Builder - Events]
36 // ============================================================================
37
38 Error Builder::onAttach(CodeHolder* code) noexcept {
39 uint32_t archId = code->archId();
40 if (!ArchInfo::isX86Family(archId))
41 return DebugUtils::errored(kErrorInvalidArch);
42
43 ASMJIT_PROPAGATE(Base::onAttach(code));
44
45 _gpRegInfo.setSignature(archId == ArchInfo::kIdX86 ? uint32_t(Gpd::kSignature) : uint32_t(Gpq::kSignature));
46 return kErrorOk;
47 }
48
49 ASMJIT_END_SUB_NAMESPACE
50
51 #endif // ASMJIT_BUILD_X86 && !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86BUILDER_H
7 #define _ASMJIT_X86_X86BUILDER_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_BUILDER
11
12 #include "../core/builder.h"
13 #include "../core/datatypes.h"
14 #include "../x86/x86emitter.h"
15
16 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
17
18 //! \addtogroup asmjit_x86
19 //! \{
20
21 // ============================================================================
22 // [asmjit::x86::Builder]
23 // ============================================================================
24
25 //! Architecture-dependent asm-builder (X86).
26 class ASMJIT_VIRTAPI Builder
27 : public BaseBuilder,
28 public EmitterImplicitT<Builder> {
29 public:
30 ASMJIT_NONCOPYABLE(Builder)
31 typedef BaseBuilder Base;
32
33 //! \name Construction & Destruction
34 //! \{
35
36 ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
37 ASMJIT_API virtual ~Builder() noexcept;
38
39 //! \}
40
41 //! \name Finalize
42 //! \{
43
44 ASMJIT_API Error finalize() override;
45
46 //! \}
47
48 //! \name Events
49 //! \{
50
51 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
52
53 //! \}
54 };
55
56 //! \}
57
58 ASMJIT_END_SUB_NAMESPACE
59
60 #endif // !ASMJIT_NO_BUILDER
61 #endif // _ASMJIT_X86_X86BUILDER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifdef ASMJIT_BUILD_X86
8
9 #include "../x86/x86callconv_p.h"
10 #include "../x86/x86operand.h"
11
12 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
13
14 // ============================================================================
15 // [asmjit::x86::CallConvInternal - Init]
16 // ============================================================================
17
18 static inline void CallConv_initX86Common(CallConv& cc) noexcept {
19 cc.setNaturalStackAlignment(4);
20 cc.setArchType(ArchInfo::kIdX86);
21 cc.setPreservedRegs(Reg::kGroupGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi));
22 }
23
24 ASMJIT_FAVOR_SIZE Error CallConvInternal::init(CallConv& cc, uint32_t ccId) noexcept {
25 constexpr uint32_t kGroupGp = Reg::kGroupGp;
26 constexpr uint32_t kGroupVec = Reg::kGroupVec;
27 constexpr uint32_t kGroupMm = Reg::kGroupMm;
28 constexpr uint32_t kGroupKReg = Reg::kGroupKReg;
29
30 constexpr uint32_t kZax = Gp::kIdAx;
31 constexpr uint32_t kZbx = Gp::kIdBx;
32 constexpr uint32_t kZcx = Gp::kIdCx;
33 constexpr uint32_t kZdx = Gp::kIdDx;
34 constexpr uint32_t kZsp = Gp::kIdSp;
35 constexpr uint32_t kZbp = Gp::kIdBp;
36 constexpr uint32_t kZsi = Gp::kIdSi;
37 constexpr uint32_t kZdi = Gp::kIdDi;
38
39 switch (ccId) {
40 case CallConv::kIdX86StdCall:
41 cc.setFlags(CallConv::kFlagCalleePopsStack);
42 CallConv_initX86Common(cc);
43 break;
44
45 case CallConv::kIdX86MsThisCall:
46 cc.setFlags(CallConv::kFlagCalleePopsStack);
47 cc.setPassedOrder(kGroupGp, kZcx);
48 CallConv_initX86Common(cc);
49 break;
50
51 case CallConv::kIdX86MsFastCall:
52 case CallConv::kIdX86GccFastCall:
53 cc.setFlags(CallConv::kFlagCalleePopsStack);
54 cc.setPassedOrder(kGroupGp, kZcx, kZdx);
55 CallConv_initX86Common(cc);
56 break;
57
58 case CallConv::kIdX86GccRegParm1:
59 cc.setPassedOrder(kGroupGp, kZax);
60 CallConv_initX86Common(cc);
61 break;
62
63 case CallConv::kIdX86GccRegParm2:
64 cc.setPassedOrder(kGroupGp, kZax, kZdx);
65 CallConv_initX86Common(cc);
66 break;
67
68 case CallConv::kIdX86GccRegParm3:
69 cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx);
70 CallConv_initX86Common(cc);
71 break;
72
73 case CallConv::kIdX86CDecl:
74 CallConv_initX86Common(cc);
75 break;
76
77 case CallConv::kIdX86Win64:
78 cc.setArchType(ArchInfo::kIdX64);
79 cc.setStrategy(CallConv::kStrategyWin64);
80 cc.setFlags(CallConv::kFlagPassFloatsByVec | CallConv::kFlagIndirectVecArgs);
81 cc.setNaturalStackAlignment(16);
82 cc.setSpillZoneSize(32);
83 cc.setPassedOrder(kGroupGp, kZcx, kZdx, 8, 9);
84 cc.setPassedOrder(kGroupVec, 0, 1, 2, 3);
85 cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
86 cc.setPreservedRegs(kGroupVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
87 break;
88
89 case CallConv::kIdX86SysV64:
90 cc.setArchType(ArchInfo::kIdX64);
91 cc.setFlags(CallConv::kFlagPassFloatsByVec);
92 cc.setNaturalStackAlignment(16);
93 cc.setRedZoneSize(128);
94 cc.setPassedOrder(kGroupGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
95 cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7);
96 cc.setPreservedRegs(kGroupGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
97 break;
98
99 case CallConv::kIdX86LightCall2:
100 case CallConv::kIdX86LightCall3:
101 case CallConv::kIdX86LightCall4: {
102 uint32_t n = (ccId - CallConv::kIdX86LightCall2) + 2;
103
104 cc.setArchType(ArchInfo::kIdX86);
105 cc.setFlags(CallConv::kFlagPassFloatsByVec);
106 cc.setNaturalStackAlignment(16);
107 cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi);
108 cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7);
109 cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7);
110 cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7);
111
112 cc.setPreservedRegs(kGroupGp , Support::lsbMask<uint32_t>(8));
113 cc.setPreservedRegs(kGroupVec , Support::lsbMask<uint32_t>(8) & ~Support::lsbMask<uint32_t>(n));
114 break;
115 }
116
117 case CallConv::kIdX64LightCall2:
118 case CallConv::kIdX64LightCall3:
119 case CallConv::kIdX64LightCall4: {
120 uint32_t n = (ccId - CallConv::kIdX64LightCall2) + 2;
121
122 cc.setArchType(ArchInfo::kIdX64);
123 cc.setFlags(CallConv::kFlagPassFloatsByVec);
124 cc.setNaturalStackAlignment(16);
125 cc.setPassedOrder(kGroupGp, kZax, kZdx, kZcx, kZsi, kZdi);
126 cc.setPassedOrder(kGroupMm, 0, 1, 2, 3, 4, 5, 6, 7);
127 cc.setPassedOrder(kGroupVec, 0, 1, 2, 3, 4, 5, 6, 7);
128 cc.setPassedOrder(kGroupKReg, 0, 1, 2, 3, 4, 5, 6, 7);
129
130 cc.setPreservedRegs(kGroupGp , Support::lsbMask<uint32_t>(16));
131 cc.setPreservedRegs(kGroupVec ,~Support::lsbMask<uint32_t>(n));
132 break;
133 }
134
135 default:
136 return DebugUtils::errored(kErrorInvalidArgument);
137 }
138
139 cc.setId(ccId);
140 return kErrorOk;
141 }
142
143 ASMJIT_END_SUB_NAMESPACE
144
145 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86CALLCONV_P_H
7 #define _ASMJIT_X86_X86CALLCONV_P_H
8
9 #include "../core/callconv.h"
10
11 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
12
13 //! \cond INTERNAL
14 //! \addtogroup asmjit_x86
15 //! \{
16
17 // ============================================================================
18 // [asmjit::x86::CallConvInternal]
19 // ============================================================================
20
21 //! X86-specific function API (calling conventions and other utilities).
22 namespace CallConvInternal {
23 //! Initialize `CallConv` structure (X86 specific).
24 Error init(CallConv& cc, uint32_t ccId) noexcept;
25 }
26
27 //! \}
28 //! \endcond
29
30 ASMJIT_END_SUB_NAMESPACE
31
32 #endif // _ASMJIT_X86_X86CALLCONV_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_NO_COMPILER)
8
9 #include "../x86/x86assembler.h"
10 #include "../x86/x86compiler.h"
11 #include "../x86/x86rapass_p.h"
12
13 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
14
15 // ============================================================================
16 // [asmjit::x86::Compiler - Construction / Destruction]
17 // ============================================================================
18
19 Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
20 if (code)
21 code->attach(this);
22 }
23 Compiler::~Compiler() noexcept {}
24
25 // ============================================================================
26 // [asmjit::x86::Compiler - Finalize]
27 // ============================================================================
28
29 Error Compiler::finalize() {
30 ASMJIT_PROPAGATE(runPasses());
31 Assembler a(_code);
32 return serialize(&a);
33 }
34
35 // ============================================================================
36 // [asmjit::x86::Compiler - Events]
37 // ============================================================================
38
39 Error Compiler::onAttach(CodeHolder* code) noexcept {
40 uint32_t archId = code->archId();
41 if (!ArchInfo::isX86Family(archId))
42 return DebugUtils::errored(kErrorInvalidArch);
43
44 ASMJIT_PROPAGATE(Base::onAttach(code));
45 _gpRegInfo.setSignature(archId == ArchInfo::kIdX86 ? uint32_t(Gpd::kSignature) : uint32_t(Gpq::kSignature));
46
47 Error err = addPassT<X86RAPass>();
48 if (ASMJIT_UNLIKELY(err)) {
49 onDetach(code);
50 return err;
51 }
52
53 return kErrorOk;
54 }
55
56 ASMJIT_END_SUB_NAMESPACE
57
58 #endif // ASMJIT_BUILD_X86 && !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86COMPILER_H
7 #define _ASMJIT_X86_X86COMPILER_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/compiler.h"
13 #include "../core/datatypes.h"
14 #include "../core/type.h"
15 #include "../x86/x86emitter.h"
16
17 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
18
19 //! \addtogroup asmjit_x86
20 //! \{
21
22 // ============================================================================
23 // [asmjit::x86::Compiler]
24 // ============================================================================
25
26 //! Architecture-dependent asm-compiler (X86).
27 class ASMJIT_VIRTAPI Compiler
28 : public BaseCompiler,
29 public EmitterExplicitT<Compiler> {
30 public:
31 ASMJIT_NONCOPYABLE(Compiler)
32 typedef BaseCompiler Base;
33
34 //! \name Construction & Destruction
35 //! \{
36
37 ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
38 ASMJIT_API virtual ~Compiler() noexcept;
39
40 //! \}
41
42 //! \name Virtual Registers
43 //! \{
44
45 #ifndef ASMJIT_NO_LOGGING
46 # define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \
47 _newRegFmt(OUT, PARAM, FORMAT, ARGS)
48 #else
49 # define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS) \
50 ASMJIT_UNUSED(FORMAT); \
51 _newReg(OUT, PARAM)
52 #endif
53
54 #define ASMJIT_NEW_REG_CUSTOM(FUNC, REG) \
55 inline REG FUNC(uint32_t typeId) { \
56 REG reg(Globals::NoInit); \
57 _newReg(reg, typeId); \
58 return reg; \
59 } \
60 \
61 template<typename... Args> \
62 inline REG FUNC(uint32_t typeId, const char* fmt, Args&&... args) { \
63 REG reg(Globals::NoInit); \
64 ASMJIT_NEW_REG_FMT(reg, typeId, fmt, std::forward<Args>(args)...); \
65 return reg; \
66 }
67
68 #define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID) \
69 inline REG FUNC() { \
70 REG reg(Globals::NoInit); \
71 _newReg(reg, TYPE_ID); \
72 return reg; \
73 } \
74 \
75 template<typename... Args> \
76 inline REG FUNC(const char* fmt, Args&&... args) { \
77 REG reg(Globals::NoInit); \
78 ASMJIT_NEW_REG_FMT(reg, TYPE_ID, fmt, std::forward<Args>(args)...); \
79 return reg; \
80 }
81
82 template<typename RegT>
83 inline RegT newSimilarReg(const RegT& ref) {
84 RegT reg(Globals::NoInit);
85 _newReg(reg, ref);
86 return reg;
87 }
88
89 template<typename RegT, typename... Args>
90 inline RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args) {
91 RegT reg(Globals::NoInit);
92 ASMJIT_NEW_REG_FMT(reg, ref, fmt, std::forward<Args>(args)...);
93 return reg;
94 }
95
96 ASMJIT_NEW_REG_CUSTOM(newReg , Reg )
97 ASMJIT_NEW_REG_CUSTOM(newGp , Gp )
98 ASMJIT_NEW_REG_CUSTOM(newVec , Vec )
99 ASMJIT_NEW_REG_CUSTOM(newK , KReg)
100
101 ASMJIT_NEW_REG_TYPED(newI8 , Gp , Type::kIdI8 )
102 ASMJIT_NEW_REG_TYPED(newU8 , Gp , Type::kIdU8 )
103 ASMJIT_NEW_REG_TYPED(newI16 , Gp , Type::kIdI16 )
104 ASMJIT_NEW_REG_TYPED(newU16 , Gp , Type::kIdU16 )
105 ASMJIT_NEW_REG_TYPED(newI32 , Gp , Type::kIdI32 )
106 ASMJIT_NEW_REG_TYPED(newU32 , Gp , Type::kIdU32 )
107 ASMJIT_NEW_REG_TYPED(newI64 , Gp , Type::kIdI64 )
108 ASMJIT_NEW_REG_TYPED(newU64 , Gp , Type::kIdU64 )
109 ASMJIT_NEW_REG_TYPED(newInt8 , Gp , Type::kIdI8 )
110 ASMJIT_NEW_REG_TYPED(newUInt8 , Gp , Type::kIdU8 )
111 ASMJIT_NEW_REG_TYPED(newInt16 , Gp , Type::kIdI16 )
112 ASMJIT_NEW_REG_TYPED(newUInt16 , Gp , Type::kIdU16 )
113 ASMJIT_NEW_REG_TYPED(newInt32 , Gp , Type::kIdI32 )
114 ASMJIT_NEW_REG_TYPED(newUInt32 , Gp , Type::kIdU32 )
115 ASMJIT_NEW_REG_TYPED(newInt64 , Gp , Type::kIdI64 )
116 ASMJIT_NEW_REG_TYPED(newUInt64 , Gp , Type::kIdU64 )
117 ASMJIT_NEW_REG_TYPED(newIntPtr , Gp , Type::kIdIntPtr )
118 ASMJIT_NEW_REG_TYPED(newUIntPtr, Gp , Type::kIdUIntPtr)
119
120 ASMJIT_NEW_REG_TYPED(newGpb , Gp , Type::kIdU8 )
121 ASMJIT_NEW_REG_TYPED(newGpw , Gp , Type::kIdU16 )
122 ASMJIT_NEW_REG_TYPED(newGpd , Gp , Type::kIdU32 )
123 ASMJIT_NEW_REG_TYPED(newGpq , Gp , Type::kIdU64 )
124 ASMJIT_NEW_REG_TYPED(newGpz , Gp , Type::kIdUIntPtr)
125 ASMJIT_NEW_REG_TYPED(newXmm , Xmm , Type::kIdI32x4 )
126 ASMJIT_NEW_REG_TYPED(newXmmSs , Xmm , Type::kIdF32x1 )
127 ASMJIT_NEW_REG_TYPED(newXmmSd , Xmm , Type::kIdF64x1 )
128 ASMJIT_NEW_REG_TYPED(newXmmPs , Xmm , Type::kIdF32x4 )
129 ASMJIT_NEW_REG_TYPED(newXmmPd , Xmm , Type::kIdF64x2 )
130 ASMJIT_NEW_REG_TYPED(newYmm , Ymm , Type::kIdI32x8 )
131 ASMJIT_NEW_REG_TYPED(newYmmPs , Ymm , Type::kIdF32x8 )
132 ASMJIT_NEW_REG_TYPED(newYmmPd , Ymm , Type::kIdF64x4 )
133 ASMJIT_NEW_REG_TYPED(newZmm , Zmm , Type::kIdI32x16 )
134 ASMJIT_NEW_REG_TYPED(newZmmPs , Zmm , Type::kIdF32x16 )
135 ASMJIT_NEW_REG_TYPED(newZmmPd , Zmm , Type::kIdF64x8 )
136 ASMJIT_NEW_REG_TYPED(newMm , Mm , Type::kIdMmx64 )
137 ASMJIT_NEW_REG_TYPED(newKb , KReg, Type::kIdMask8 )
138 ASMJIT_NEW_REG_TYPED(newKw , KReg, Type::kIdMask16 )
139 ASMJIT_NEW_REG_TYPED(newKd , KReg, Type::kIdMask32 )
140 ASMJIT_NEW_REG_TYPED(newKq , KReg, Type::kIdMask64 )
141
142 #undef ASMJIT_NEW_REG_TYPED
143 #undef ASMJIT_NEW_REG_CUSTOM
144 #undef ASMJIT_NEW_REG_FMT
145
146 //! \}
147
148 //! \name Stack
149 //! \{
150
151 //! Creates a new memory chunk allocated on the current function's stack.
152 inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
153 Mem m(Globals::NoInit);
154 _newStack(m, size, alignment, name);
155 return m;
156 }
157
158 //! \}
159
160 //! \name Constants
161 //! \{
162
163 //! Put data to a constant-pool and get a memory reference to it.
164 inline Mem newConst(uint32_t scope, const void* data, size_t size) {
165 Mem m(Globals::NoInit);
166 _newConst(m, scope, data, size);
167 return m;
168 }
169
170 //! Put a BYTE `val` to a constant-pool.
171 inline Mem newByteConst(uint32_t scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
172 //! Put a WORD `val` to a constant-pool.
173 inline Mem newWordConst(uint32_t scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
174 //! Put a DWORD `val` to a constant-pool.
175 inline Mem newDWordConst(uint32_t scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
176 //! Put a QWORD `val` to a constant-pool.
177 inline Mem newQWordConst(uint32_t scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
178
179 //! Put a WORD `val` to a constant-pool.
180 inline Mem newInt16Const(uint32_t scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
181 //! Put a WORD `val` to a constant-pool.
182 inline Mem newUInt16Const(uint32_t scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
183 //! Put a DWORD `val` to a constant-pool.
184 inline Mem newInt32Const(uint32_t scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
185 //! Put a DWORD `val` to a constant-pool.
186 inline Mem newUInt32Const(uint32_t scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
187 //! Put a QWORD `val` to a constant-pool.
188 inline Mem newInt64Const(uint32_t scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
189 //! Put a QWORD `val` to a constant-pool.
190 inline Mem newUInt64Const(uint32_t scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
191
192 //! Put a SP-FP `val` to a constant-pool.
193 inline Mem newFloatConst(uint32_t scope, float val) noexcept { return newConst(scope, &val, 4); }
194 //! Put a DP-FP `val` to a constant-pool.
195 inline Mem newDoubleConst(uint32_t scope, double val) noexcept { return newConst(scope, &val, 8); }
196
197 //! Put a MMX `val` to a constant-pool.
198 inline Mem newMmConst(uint32_t scope, const Data64& val) noexcept { return newConst(scope, &val, 8); }
199 //! Put a XMM `val` to a constant-pool.
200 inline Mem newXmmConst(uint32_t scope, const Data128& val) noexcept { return newConst(scope, &val, 16); }
201 //! Put a YMM `val` to a constant-pool.
202 inline Mem newYmmConst(uint32_t scope, const Data256& val) noexcept { return newConst(scope, &val, 32); }
203
204 //! \}
205
206 //! \name Instruction Options
207 //! \{
208
209 //! Force the compiler to not follow the conditional or unconditional jump.
210 inline Compiler& unfollow() noexcept { _instOptions |= Inst::kOptionUnfollow; return *this; }
211 //! Tell the compiler that the destination variable will be overwritten.
212 inline Compiler& overwrite() noexcept { _instOptions |= Inst::kOptionOverwrite; return *this; }
213
214 //! \}
215
216 //! \name Function Call & Ret Intrinsics
217 //! \{
218
219 //! Call a function.
220 inline FuncCallNode* call(const Gp& dst, const FuncSignature& sign) { return addCall(Inst::kIdCall, dst, sign); }
221 //! \overload
222 inline FuncCallNode* call(const Mem& dst, const FuncSignature& sign) { return addCall(Inst::kIdCall, dst, sign); }
223 //! \overload
224 inline FuncCallNode* call(const Label& label, const FuncSignature& sign) { return addCall(Inst::kIdCall, label, sign); }
225 //! \overload
226 inline FuncCallNode* call(const Imm& dst, const FuncSignature& sign) { return addCall(Inst::kIdCall, dst, sign); }
227 //! \overload
228 inline FuncCallNode* call(uint64_t dst, const FuncSignature& sign) { return addCall(Inst::kIdCall, Imm(int64_t(dst)), sign); }
229
230 //! Return.
231 inline FuncRetNode* ret() { return addRet(Operand(), Operand()); }
232 //! \overload
233 inline FuncRetNode* ret(const BaseReg& o0) { return addRet(o0, Operand()); }
234 //! \overload
235 inline FuncRetNode* ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
236
237 //! \}
238
239 //! \name Finalize
240 //! \{
241
242 ASMJIT_API Error finalize() override;
243
244 //! \}
245
246 //! \name Events
247 //! \{
248
249 ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
250
251 //! \}
252 };
253
254 //! \}
255
256 ASMJIT_END_SUB_NAMESPACE
257
258 #endif // !ASMJIT_NO_COMPILER
259 #endif // _ASMJIT_X86_X86COMPILER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86EMITTER_H
7 #define _ASMJIT_X86_X86EMITTER_H
8
9 #include "../core/emitter.h"
10 #include "../core/support.h"
11 #include "../x86/x86globals.h"
12 #include "../x86/x86operand.h"
13
14 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
15
16 #define ASMJIT_INST_0x(NAME, ID) \
17 inline Error NAME() { return _emitter()->emit(Inst::kId##ID); }
18
19 #define ASMJIT_INST_1x(NAME, ID, T0) \
20 inline Error NAME(const T0& o0) { return _emitter()->emit(Inst::kId##ID, o0); }
21
22 #define ASMJIT_INST_1i(NAME, ID, T0) \
23 inline Error NAME(const T0& o0) { return _emitter()->emit(Inst::kId##ID, o0); } \
24 inline Error NAME(int o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \
25 inline Error NAME(unsigned int o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \
26 inline Error NAME(int64_t o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); } \
27 inline Error NAME(uint64_t o0) { return _emitter()->emit(Inst::kId##ID, Support::asInt(o0)); }
28
29 #define ASMJIT_INST_1c(NAME, ID, CONV, T0) \
30 inline Error NAME(uint32_t cc, const T0& o0) { return _emitter()->emit(CONV(cc), o0); } \
31 inline Error NAME##a(const T0& o0) { return _emitter()->emit(Inst::kId##ID##a, o0); } \
32 inline Error NAME##ae(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ae, o0); } \
33 inline Error NAME##b(const T0& o0) { return _emitter()->emit(Inst::kId##ID##b, o0); } \
34 inline Error NAME##be(const T0& o0) { return _emitter()->emit(Inst::kId##ID##be, o0); } \
35 inline Error NAME##c(const T0& o0) { return _emitter()->emit(Inst::kId##ID##c, o0); } \
36 inline Error NAME##e(const T0& o0) { return _emitter()->emit(Inst::kId##ID##e, o0); } \
37 inline Error NAME##g(const T0& o0) { return _emitter()->emit(Inst::kId##ID##g, o0); } \
38 inline Error NAME##ge(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ge, o0); } \
39 inline Error NAME##l(const T0& o0) { return _emitter()->emit(Inst::kId##ID##l, o0); } \
40 inline Error NAME##le(const T0& o0) { return _emitter()->emit(Inst::kId##ID##le, o0); } \
41 inline Error NAME##na(const T0& o0) { return _emitter()->emit(Inst::kId##ID##na, o0); } \
42 inline Error NAME##nae(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nae, o0); } \
43 inline Error NAME##nb(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nb, o0); } \
44 inline Error NAME##nbe(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nbe, o0); } \
45 inline Error NAME##nc(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nc, o0); } \
46 inline Error NAME##ne(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ne, o0); } \
47 inline Error NAME##ng(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ng, o0); } \
48 inline Error NAME##nge(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nge, o0); } \
49 inline Error NAME##nl(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nl, o0); } \
50 inline Error NAME##nle(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nle, o0); } \
51 inline Error NAME##no(const T0& o0) { return _emitter()->emit(Inst::kId##ID##no, o0); } \
52 inline Error NAME##np(const T0& o0) { return _emitter()->emit(Inst::kId##ID##np, o0); } \
53 inline Error NAME##ns(const T0& o0) { return _emitter()->emit(Inst::kId##ID##ns, o0); } \
54 inline Error NAME##nz(const T0& o0) { return _emitter()->emit(Inst::kId##ID##nz, o0); } \
55 inline Error NAME##o(const T0& o0) { return _emitter()->emit(Inst::kId##ID##o, o0); } \
56 inline Error NAME##p(const T0& o0) { return _emitter()->emit(Inst::kId##ID##p, o0); } \
57 inline Error NAME##pe(const T0& o0) { return _emitter()->emit(Inst::kId##ID##pe, o0); } \
58 inline Error NAME##po(const T0& o0) { return _emitter()->emit(Inst::kId##ID##po, o0); } \
59 inline Error NAME##s(const T0& o0) { return _emitter()->emit(Inst::kId##ID##s, o0); } \
60 inline Error NAME##z(const T0& o0) { return _emitter()->emit(Inst::kId##ID##z, o0); }
61
62 #define ASMJIT_INST_2x(NAME, ID, T0, T1) \
63 inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID, o0, o1); }
64
65 #define ASMJIT_INST_2i(NAME, ID, T0, T1) \
66 inline Error NAME(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID, o0, o1); } \
67 inline Error NAME(const T0& o0, int o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \
68 inline Error NAME(const T0& o0, unsigned int o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \
69 inline Error NAME(const T0& o0, int64_t o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); } \
70 inline Error NAME(const T0& o0, uint64_t o1) { return _emitter()->emit(Inst::kId##ID, o0, Support::asInt(o1)); }
71
72 #define ASMJIT_INST_2c(NAME, ID, CONV, T0, T1) \
73 inline Error NAME(uint32_t cc, const T0& o0, const T1& o1) { return _emitter()->emit(CONV(cc), o0, o1); } \
74 inline Error NAME##a(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##a, o0, o1); } \
75 inline Error NAME##ae(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ae, o0, o1); } \
76 inline Error NAME##b(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##b, o0, o1); } \
77 inline Error NAME##be(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##be, o0, o1); } \
78 inline Error NAME##c(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##c, o0, o1); } \
79 inline Error NAME##e(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##e, o0, o1); } \
80 inline Error NAME##g(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##g, o0, o1); } \
81 inline Error NAME##ge(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ge, o0, o1); } \
82 inline Error NAME##l(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##l, o0, o1); } \
83 inline Error NAME##le(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##le, o0, o1); } \
84 inline Error NAME##na(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##na, o0, o1); } \
85 inline Error NAME##nae(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nae, o0, o1); } \
86 inline Error NAME##nb(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nb, o0, o1); } \
87 inline Error NAME##nbe(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nbe, o0, o1); } \
88 inline Error NAME##nc(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nc, o0, o1); } \
89 inline Error NAME##ne(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ne, o0, o1); } \
90 inline Error NAME##ng(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ng, o0, o1); } \
91 inline Error NAME##nge(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nge, o0, o1); } \
92 inline Error NAME##nl(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nl, o0, o1); } \
93 inline Error NAME##nle(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nle, o0, o1); } \
94 inline Error NAME##no(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##no, o0, o1); } \
95 inline Error NAME##np(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##np, o0, o1); } \
96 inline Error NAME##ns(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##ns, o0, o1); } \
97 inline Error NAME##nz(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##nz, o0, o1); } \
98 inline Error NAME##o(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##o, o0, o1); } \
99 inline Error NAME##p(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##p, o0, o1); } \
100 inline Error NAME##pe(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##pe, o0, o1); } \
101 inline Error NAME##po(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##po, o0, o1); } \
102 inline Error NAME##s(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##s, o0, o1); } \
103 inline Error NAME##z(const T0& o0, const T1& o1) { return _emitter()->emit(Inst::kId##ID##z, o0, o1); }
104
105 #define ASMJIT_INST_3x(NAME, ID, T0, T1, T2) \
106 inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); }
107
108 #define ASMJIT_INST_3i(NAME, ID, T0, T1, T2) \
109 inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); } \
110 inline Error NAME(const T0& o0, const T1& o1, int o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \
111 inline Error NAME(const T0& o0, const T1& o1, unsigned int o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \
112 inline Error NAME(const T0& o0, const T1& o1, int64_t o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); } \
113 inline Error NAME(const T0& o0, const T1& o1, uint64_t o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, Support::asInt(o2)); }
114
115 #define ASMJIT_INST_3ii(NAME, ID, T0, T1, T2) \
116 inline Error NAME(const T0& o0, const T1& o1, const T2& o2) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2); } \
117 inline Error NAME(const T0& o0, int o1, int o2) { return _emitter()->emit(Inst::kId##ID, o0, Imm(o1), Support::asInt(o2)); }
118
119 #define ASMJIT_INST_4x(NAME, ID, T0, T1, T2, T3) \
120 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); }
121
122 #define ASMJIT_INST_4i(NAME, ID, T0, T1, T2, T3) \
123 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); } \
124 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \
125 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, unsigned int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \
126 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, int64_t o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); } \
127 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, uint64_t o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, Support::asInt(o3)); }
128
129 #define ASMJIT_INST_4ii(NAME, ID, T0, T1, T2, T3) \
130 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3); } \
131 inline Error NAME(const T0& o0, const T1& o1, int o2, int o3) { return _emitter()->emit(Inst::kId##ID, o0, o1, Imm(o2), Support::asInt(o3)); }
132
133 #define ASMJIT_INST_5x(NAME, ID, T0, T1, T2, T3, T4) \
134 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4); }
135
136 #define ASMJIT_INST_5i(NAME, ID, T0, T1, T2, T3, T4) \
137 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4); } \
138 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, int o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \
139 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, unsigned int o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \
140 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, int64_t o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); } \
141 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, uint64_t o4) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, Support::asInt(o4)); }
142
143 #define ASMJIT_INST_6x(NAME, ID, T0, T1, T2, T3, T4, T5) \
144 inline Error NAME(const T0& o0, const T1& o1, const T2& o2, const T3& o3, const T4& o4, const T5& o5) { return _emitter()->emit(Inst::kId##ID, o0, o1, o2, o3, o4, o5); }
145
146 //! \addtogroup asmjit_x86
147 //! \{
148
149 // ============================================================================
150 // [asmjit::x86::EmitterExplicitT]
151 // ============================================================================
152
153 template<typename This>
154 struct EmitterExplicitT {
155 //! \cond
156 // These typedefs are used to describe implicit operands passed explicitly.
157 typedef Gp AL;
158 typedef Gp AH;
159 typedef Gp CL;
160 typedef Gp AX;
161 typedef Gp DX;
162
163 typedef Gp EAX;
164 typedef Gp EBX;
165 typedef Gp ECX;
166 typedef Gp EDX;
167
168 typedef Gp RAX;
169 typedef Gp RBX;
170 typedef Gp RCX;
171 typedef Gp RDX;
172
173 typedef Gp ZAX;
174 typedef Gp ZBX;
175 typedef Gp ZCX;
176 typedef Gp ZDX;
177
178 typedef Mem DS_ZAX; // ds:[zax]
179 typedef Mem DS_ZDI; // ds:[zdi]
180 typedef Mem ES_ZDI; // es:[zdi]
181 typedef Mem DS_ZSI; // ds:[zsi]
182
183 typedef Xmm XMM0;
184
185 // These two are unfortunately reported by the sanitizer. We know what we do,
186 // however, the sanitizer doesn't. I have tried to use reinterpret_cast instead,
187 // but that would generate bad code when compiled by MSC.
188 ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline This* _emitter() noexcept { return static_cast<This*>(this); }
189 ASMJIT_ATTRIBUTE_NO_SANITIZE_UNDEF inline const This* _emitter() const noexcept { return static_cast<const This*>(this); }
190
191 //! \endcond
192
193 //! \name Native Registers
194 //! \{
195
196 //! Returns either GPD or GPQ register of the given `id` depending on the emitter's architecture.
197 inline Gp gpz(uint32_t id) const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), id); }
198
199 inline Gp zax() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdAx); }
200 inline Gp zcx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdCx); }
201 inline Gp zdx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdDx); }
202 inline Gp zbx() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdBx); }
203 inline Gp zsp() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdSp); }
204 inline Gp zbp() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdBp); }
205 inline Gp zsi() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdSi); }
206 inline Gp zdi() const noexcept { return Gp(_emitter()->_gpRegInfo.signature(), Gp::kIdDi); }
207
208 //! \}
209
210 //! \name Native Pointers
211 //! \{
212
213 //! Creates a target dependent pointer of which base register's id is `baseId`.
214 inline Mem ptr_base(uint32_t baseId, int32_t off = 0, uint32_t size = 0) const noexcept {
215 return Mem(Mem::Decomposed { _emitter()->_gpRegInfo.type(), baseId, 0, 0, off, size, 0 });
216 }
217
218 inline Mem ptr_zax(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdAx, off, size); }
219 inline Mem ptr_zcx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdCx, off, size); }
220 inline Mem ptr_zdx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDx, off, size); }
221 inline Mem ptr_zbx(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBx, off, size); }
222 inline Mem ptr_zsp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSp, off, size); }
223 inline Mem ptr_zbp(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdBp, off, size); }
224 inline Mem ptr_zsi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdSi, off, size); }
225 inline Mem ptr_zdi(int32_t off = 0, uint32_t size = 0) const noexcept { return ptr_base(Gp::kIdDi, off, size); }
226
227 //! Creates an `intptr_t` memory operand depending on the current architecture.
228 inline Mem intptr_ptr(const Gp& base, int32_t offset = 0) const noexcept {
229 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
230 return Mem(base, offset, nativeGpSize);
231 }
232 //! \overload
233 inline Mem intptr_ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
234 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
235 return Mem(base, index, shift, offset, nativeGpSize);
236 }
237 //! \overload
238 inline Mem intptr_ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) const noexcept {
239 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
240 return Mem(base, index, shift, offset, nativeGpSize);
241 }
242 //! \overload
243 inline Mem intptr_ptr(const Label& base, int32_t offset = 0) const noexcept {
244 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
245 return Mem(base, offset, nativeGpSize);
246 }
247 //! \overload
248 inline Mem intptr_ptr(const Label& base, const Gp& index, uint32_t shift, int32_t offset = 0) const noexcept {
249 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
250 return Mem(base, index, shift, offset, nativeGpSize);
251 }
252 //! \overload
253 inline Mem intptr_ptr(const Label& base, const Vec& index, uint32_t shift, int32_t offset = 0) const noexcept {
254 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
255 return Mem(base, index, shift, offset, nativeGpSize);
256 }
257 //! \overload
258 inline Mem intptr_ptr(const Rip& rip, int32_t offset = 0) const noexcept {
259 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
260 return Mem(rip, offset, nativeGpSize);
261 }
262 //! \overload
263 inline Mem intptr_ptr(uint64_t base) const noexcept {
264 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
265 return Mem(base, nativeGpSize);
266 }
267 //! \overload
268 inline Mem intptr_ptr(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
269 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
270 return Mem(base, index, shift, nativeGpSize);
271 }
272 //! \overload
273 inline Mem intptr_ptr_abs(uint64_t base) const noexcept {
274 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
275 return Mem(base, nativeGpSize, BaseMem::kSignatureMemAbs);
276 }
277 //! \overload
278 inline Mem intptr_ptr_abs(uint64_t base, const Gp& index, uint32_t shift = 0) const noexcept {
279 uint32_t nativeGpSize = static_cast<const This*>(this)->gpSize();
280 return Mem(base, index, shift, nativeGpSize, BaseMem::kSignatureMemAbs);
281 }
282
283 //! \}
284
285 //! \name Embed
286 //! \{
287
288 //! Adds 8-bit integer data to the CodeBuffer.
289 inline Error db(uint8_t x) { return static_cast<This*>(this)->embed(&x, 1); }
290 //! Adds 16-bit integer data to the CodeBuffer.
291 inline Error dw(uint16_t x) { return static_cast<This*>(this)->embed(&x, 2); }
292 //! Adds 32-bit integer data to the CodeBuffer.
293 inline Error dd(uint32_t x) { return static_cast<This*>(this)->embed(&x, 4); }
294 //! Adds 64-bit integer data to the CodeBuffer.
295 inline Error dq(uint64_t x) { return static_cast<This*>(this)->embed(&x, 8); }
296
297 //! Adds 8-bit integer data to the CodeBuffer.
298 inline Error dint8(int8_t x) { return static_cast<This*>(this)->embed(&x, sizeof(int8_t)); }
299 //! Adds 8-bit integer data to the CodeBuffer.
300 inline Error duint8(uint8_t x) { return static_cast<This*>(this)->embed(&x, sizeof(uint8_t)); }
301
302 //! Adds 16-bit integer data to the CodeBuffer.
303 inline Error dint16(int16_t x) { return static_cast<This*>(this)->embed(&x, sizeof(int16_t)); }
304 //! Adds 16-bit integer data to the CodeBuffer.
305 inline Error duint16(uint16_t x) { return static_cast<This*>(this)->embed(&x, sizeof(uint16_t)); }
306
307 //! Adds 32-bit integer data to the CodeBuffer.
308 inline Error dint32(int32_t x) { return static_cast<This*>(this)->embed(&x, sizeof(int32_t)); }
309 //! Adds 32-bit integer data to the CodeBuffer.
310 inline Error duint32(uint32_t x) { return static_cast<This*>(this)->embed(&x, sizeof(uint32_t)); }
311
312 //! Adds 64-bit integer data to the CodeBuffer.
313 inline Error dint64(int64_t x) { return static_cast<This*>(this)->embed(&x, sizeof(int64_t)); }
314 //! Adds 64-bit integer data to the CodeBuffer.
315 inline Error duint64(uint64_t x) { return static_cast<This*>(this)->embed(&x, sizeof(uint64_t)); }
316
317 //! Adds float data to the CodeBuffer.
318 inline Error dfloat(float x) { return static_cast<This*>(this)->embed(&x, sizeof(float)); }
319 //! Adds double data to the CodeBuffer.
320 inline Error ddouble(double x) { return static_cast<This*>(this)->embed(&x, sizeof(double)); }
321
322 //! Adds MMX data to the CodeBuffer.
323 inline Error dmm(const Data64& x) { return static_cast<This*>(this)->embed(&x, sizeof(Data64)); }
324 //! Adds XMM data to the CodeBuffer.
325 inline Error dxmm(const Data128& x) { return static_cast<This*>(this)->embed(&x, sizeof(Data128)); }
326 //! Adds YMM data to the CodeBuffer.
327 inline Error dymm(const Data256& x) { return static_cast<This*>(this)->embed(&x, sizeof(Data256)); }
328
329 //! Adds data in a given structure instance to the CodeBuffer.
330 template<typename T>
331 inline Error dstruct(const T& x) { return static_cast<This*>(this)->embed(&x, uint32_t(sizeof(T))); }
332
333 //! \}
334
335 protected:
336 //! \cond
337 inline This& _addInstOptions(uint32_t options) noexcept {
338 static_cast<This*>(this)->addInstOptions(options);
339 return *static_cast<This*>(this);
340 }
341 //! \endcond
342
343 public:
344 //! \name Short/Long Form Options
345 //! \{
346
347 //! Force short form of jmp/jcc instruction.
348 inline This& short_() noexcept { return _addInstOptions(Inst::kOptionShortForm); }
349 //! Force long form of jmp/jcc instruction.
350 inline This& long_() noexcept { return _addInstOptions(Inst::kOptionLongForm); }
351
352 //! \}
353
354 //! \name Encoding Options
355 //! \{
356
357 //! Prefer MOD_MR encoding over MOD_RM (the default) when encoding instruction
358 //! that allows both. This option is only applicable to instructions where both
359 //! operands are registers.
360 inline This& mod_mr() noexcept { return _addInstOptions(Inst::kOptionModMR); }
361
362 //! \}
363
364 //! \name Prefix Options
365 //! \{
366
367 //! Condition is likely to be taken (has only benefit on P4).
368 inline This& taken() noexcept { return _addInstOptions(Inst::kOptionTaken); }
369 //! Condition is unlikely to be taken (has only benefit on P4).
370 inline This& notTaken() noexcept { return _addInstOptions(Inst::kOptionNotTaken); }
371
372 //! Use LOCK prefix.
373 inline This& lock() noexcept { return _addInstOptions(Inst::kOptionLock); }
374 //! Use XACQUIRE prefix.
375 inline This& xacquire() noexcept { return _addInstOptions(Inst::kOptionXAcquire); }
376 //! Use XRELEASE prefix.
377 inline This& xrelease() noexcept { return _addInstOptions(Inst::kOptionXRelease); }
378
379 //! Use BND/REPNE prefix.
380 //!
381 //! \note This is the same as using `repne()` or `repnz()` prefix.
382 inline This& bnd() noexcept { return _addInstOptions(Inst::kOptionRepne); }
383
384 //! Use REP/REPZ prefix.
385 //!
386 //! \note This is the same as using `repe()` or `repz()` prefix.
387 inline This& rep(const Gp& zcx) noexcept {
388 static_cast<This*>(this)->_extraReg.init(zcx);
389 return _addInstOptions(Inst::kOptionRep);
390 }
391
392 //! Use REP/REPE prefix.
393 //!
394 //! \note This is the same as using `rep()` or `repz()` prefix.
395 inline This& repe(const Gp& zcx) noexcept { return rep(zcx); }
396
397 //! Use REP/REPE prefix.
398 //!
399 //! \note This is the same as using `rep()` or `repe()` prefix.
400 inline This& repz(const Gp& zcx) noexcept { return rep(zcx); }
401
402 //! Use REPNE prefix.
403 //!
404 //! \note This is the same as using `bnd()` or `repnz()` prefix.
405 inline This& repne(const Gp& zcx) noexcept {
406 static_cast<This*>(this)->_extraReg.init(zcx);
407 return _addInstOptions(Inst::kOptionRepne);
408 }
409
410 //! Use REPNE prefix.
411 //!
412 //! \note This is the same as using `bnd()` or `repne()` prefix.
413 inline This& repnz(const Gp& zcx) noexcept { return repne(zcx); }
414
415 //! \}
416
417 //! \name REX Options
418 //! \{
419
420 //! Force REX prefix to be emitted even when it's not needed (X86_64).
421 //!
422 //! \note Don't use when using high 8-bit registers as REX prefix makes them
423 //! inaccessible and `x86::Assembler` would fail to encode such instruction.
424 inline This& rex() noexcept { return _addInstOptions(Inst::kOptionRex); }
425
426 //! Force REX.B prefix (X64) [It exists for special purposes only].
427 inline This& rex_b() noexcept { return _addInstOptions(Inst::kOptionOpCodeB); }
428 //! Force REX.X prefix (X64) [It exists for special purposes only].
429 inline This& rex_x() noexcept { return _addInstOptions(Inst::kOptionOpCodeX); }
430 //! Force REX.R prefix (X64) [It exists for special purposes only].
431 inline This& rex_r() noexcept { return _addInstOptions(Inst::kOptionOpCodeR); }
432 //! Force REX.W prefix (X64) [It exists for special purposes only].
433 inline This& rex_w() noexcept { return _addInstOptions(Inst::kOptionOpCodeW); }
434
435 //! \}
436
437 //! \name VEX and EVEX Options
438 //! \{
439
440 //! Force 3-byte VEX prefix (AVX+).
441 inline This& vex3() noexcept { return _addInstOptions(Inst::kOptionVex3); }
442 //! Force 4-byte EVEX prefix (AVX512+).
443 inline This& evex() noexcept { return _addInstOptions(Inst::kOptionEvex); }
444
445 //! \}
446
447 //! \name AVX-512 Options & Masking
448 //! \{
449
450 //! Use masking {k} (AVX512+).
451 inline This& k(const KReg& kreg) noexcept {
452 static_cast<This*>(this)->_extraReg.init(kreg);
453 return *static_cast<This*>(this);
454 }
455
456 //! Use zeroing instead of merging (AVX512+).
457 inline This& z() noexcept { return _addInstOptions(Inst::kOptionZMask); }
458
459 //! Suppress all exceptions (AVX512+).
460 inline This& sae() noexcept { return _addInstOptions(Inst::kOptionSAE); }
461 //! Static rounding mode {rn} (round-to-nearest even) and {sae} (AVX512+).
462 inline This& rn_sae() noexcept { return _addInstOptions(Inst::kOptionER | Inst::kOptionRN_SAE); }
463 //! Static rounding mode {rd} (round-down, toward -inf) and {sae} (AVX512+).
464 inline This& rd_sae() noexcept { return _addInstOptions(Inst::kOptionER | Inst::kOptionRD_SAE); }
465 //! Static rounding mode {ru} (round-up, toward +inf) and {sae} (AVX512+).
466 inline This& ru_sae() noexcept { return _addInstOptions(Inst::kOptionER | Inst::kOptionRU_SAE); }
467 //! Static rounding mode {rz} (round-toward-zero, truncate) and {sae} (AVX512+).
468 inline This& rz_sae() noexcept { return _addInstOptions(Inst::kOptionER | Inst::kOptionRZ_SAE); }
469
470 //! \}
471
472 //! \name Base Instructions & GP Extensions
473 //! \{
474
475 ASMJIT_INST_2x(adc, Adc, Gp, Gp) // ANY
476 ASMJIT_INST_2x(adc, Adc, Gp, Mem) // ANY
477 ASMJIT_INST_2i(adc, Adc, Gp, Imm) // ANY
478 ASMJIT_INST_2x(adc, Adc, Mem, Gp) // ANY
479 ASMJIT_INST_2i(adc, Adc, Mem, Imm) // ANY
480 ASMJIT_INST_2x(add, Add, Gp, Gp) // ANY
481 ASMJIT_INST_2x(add, Add, Gp, Mem) // ANY
482 ASMJIT_INST_2i(add, Add, Gp, Imm) // ANY
483 ASMJIT_INST_2x(add, Add, Mem, Gp) // ANY
484 ASMJIT_INST_2i(add, Add, Mem, Imm) // ANY
485 ASMJIT_INST_2x(and_, And, Gp, Gp) // ANY
486 ASMJIT_INST_2x(and_, And, Gp, Mem) // ANY
487 ASMJIT_INST_2i(and_, And, Gp, Imm) // ANY
488 ASMJIT_INST_2x(and_, And, Mem, Gp) // ANY
489 ASMJIT_INST_2i(and_, And, Mem, Imm) // ANY
490 ASMJIT_INST_2x(arpl, Arpl, Gp, Gp) // X86
491 ASMJIT_INST_2x(arpl, Arpl, Mem, Gp) // X86
492 ASMJIT_INST_2x(bound, Bound, Gp, Mem) // X86
493 ASMJIT_INST_2x(bsf, Bsf, Gp, Gp) // ANY
494 ASMJIT_INST_2x(bsf, Bsf, Gp, Mem) // ANY
495 ASMJIT_INST_2x(bsr, Bsr, Gp, Gp) // ANY
496 ASMJIT_INST_2x(bsr, Bsr, Gp, Mem) // ANY
497 ASMJIT_INST_1x(bswap, Bswap, Gp) // ANY
498 ASMJIT_INST_2x(bt, Bt, Gp, Gp) // ANY
499 ASMJIT_INST_2i(bt, Bt, Gp, Imm) // ANY
500 ASMJIT_INST_2x(bt, Bt, Mem, Gp) // ANY
501 ASMJIT_INST_2i(bt, Bt, Mem, Imm) // ANY
502 ASMJIT_INST_2x(btc, Btc, Gp, Gp) // ANY
503 ASMJIT_INST_2i(btc, Btc, Gp, Imm) // ANY
504 ASMJIT_INST_2x(btc, Btc, Mem, Gp) // ANY
505 ASMJIT_INST_2i(btc, Btc, Mem, Imm) // ANY
506 ASMJIT_INST_2x(btr, Btr, Gp, Gp) // ANY
507 ASMJIT_INST_2i(btr, Btr, Gp, Imm) // ANY
508 ASMJIT_INST_2x(btr, Btr, Mem, Gp) // ANY
509 ASMJIT_INST_2i(btr, Btr, Mem, Imm) // ANY
510 ASMJIT_INST_2x(bts, Bts, Gp, Gp) // ANY
511 ASMJIT_INST_2i(bts, Bts, Gp, Imm) // ANY
512 ASMJIT_INST_2x(bts, Bts, Mem, Gp) // ANY
513 ASMJIT_INST_2i(bts, Bts, Mem, Imm) // ANY
514 ASMJIT_INST_1x(cbw, Cbw, AX) // ANY [EXPLICIT] AX <- Sign Extend AL
515 ASMJIT_INST_2x(cdq, Cdq, EDX, EAX) // ANY [EXPLICIT] EDX:EAX <- Sign Extend EAX
516 ASMJIT_INST_1x(cdqe, Cdqe, EAX) // X64 [EXPLICIT] RAX <- Sign Extend EAX
517 ASMJIT_INST_2x(cqo, Cqo, RDX, RAX) // X64 [EXPLICIT] RDX:RAX <- Sign Extend RAX
518 ASMJIT_INST_2x(cwd, Cwd, DX, AX) // ANY [EXPLICIT] DX:AX <- Sign Extend AX
519 ASMJIT_INST_1x(cwde, Cwde, EAX) // ANY [EXPLICIT] EAX <- Sign Extend AX
520 ASMJIT_INST_1x(call, Call, Gp) // ANY
521 ASMJIT_INST_1x(call, Call, Mem) // ANY
522 ASMJIT_INST_1x(call, Call, Label) // ANY
523 ASMJIT_INST_1i(call, Call, Imm) // ANY
524 ASMJIT_INST_0x(clc, Clc) // ANY
525 ASMJIT_INST_0x(cld, Cld) // ANY
526 ASMJIT_INST_0x(cli, Cli) // ANY
527 ASMJIT_INST_0x(clts, Clts) // ANY
528 ASMJIT_INST_0x(cmc, Cmc) // ANY
529 ASMJIT_INST_2c(cmov, Cmov, Condition::toCmovcc, Gp, Gp) // CMOV
530 ASMJIT_INST_2c(cmov, Cmov, Condition::toCmovcc, Gp, Mem) // CMOV
531 ASMJIT_INST_2x(cmp, Cmp, Gp, Gp) // ANY
532 ASMJIT_INST_2x(cmp, Cmp, Gp, Mem) // ANY
533 ASMJIT_INST_2i(cmp, Cmp, Gp, Imm) // ANY
534 ASMJIT_INST_2x(cmp, Cmp, Mem, Gp) // ANY
535 ASMJIT_INST_2i(cmp, Cmp, Mem, Imm) // ANY
536 ASMJIT_INST_2x(cmps, Cmps, DS_ZSI, ES_ZDI) // ANY [EXPLICIT]
537 ASMJIT_INST_3x(cmpxchg, Cmpxchg, Gp, Gp, ZAX) // I486 [EXPLICIT]
538 ASMJIT_INST_3x(cmpxchg, Cmpxchg, Mem, Gp, ZAX) // I486 [EXPLICIT]
539 ASMJIT_INST_5x(cmpxchg16b, Cmpxchg16b, Mem, RDX, RAX, RCX, RBX); // CMPXCHG16B[EXPLICIT] m == EDX:EAX ? m <- ECX:EBX
540 ASMJIT_INST_5x(cmpxchg8b, Cmpxchg8b, Mem, EDX, EAX, ECX, EBX); // CMPXCHG8B [EXPLICIT] m == RDX:RAX ? m <- RCX:RBX
541 ASMJIT_INST_4x(cpuid, Cpuid, EAX, EBX, ECX, EDX) // I486 [EXPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX]
542 ASMJIT_INST_1x(daa, Daa, Gp) // X86 [EXPLICIT]
543 ASMJIT_INST_1x(das, Das, Gp) // X86 [EXPLICIT]
544 ASMJIT_INST_1x(dec, Dec, Gp) // ANY
545 ASMJIT_INST_1x(dec, Dec, Mem) // ANY
546 ASMJIT_INST_2x(div, Div, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8
547 ASMJIT_INST_2x(div, Div, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8
548 ASMJIT_INST_3x(div, Div, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
549 ASMJIT_INST_3x(div, Div, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
550 ASMJIT_INST_0x(emms, Emms) // MMX
551 ASMJIT_INST_2x(enter, Enter, Imm, Imm) // ANY
552 ASMJIT_INST_0x(hlt, Hlt) // ANY
553 ASMJIT_INST_2x(idiv, Idiv, Gp, Gp) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / r8
554 ASMJIT_INST_2x(idiv, Idiv, Gp, Mem) // ANY [EXPLICIT] AH[Rem]: AL[Quot] <- AX / m8
555 ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Gp) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / r16|r32|r64
556 ASMJIT_INST_3x(idiv, Idiv, Gp, Gp, Mem) // ANY [EXPLICIT] xDX[Rem]:xAX[Quot] <- xDX:xAX / m16|m32|m64
557 ASMJIT_INST_2x(imul, Imul, Gp, Gp) // ANY [EXPLICIT] AX <- AL * r8 | ra <- ra * rb
558 ASMJIT_INST_2x(imul, Imul, Gp, Mem) // ANY [EXPLICIT] AX <- AL * m8 | ra <- ra * m16|m32|m64
559 ASMJIT_INST_2i(imul, Imul, Gp, Imm) // ANY
560 ASMJIT_INST_3i(imul, Imul, Gp, Gp, Imm) // ANY
561 ASMJIT_INST_3i(imul, Imul, Gp, Mem, Imm) // ANY
562 ASMJIT_INST_3x(imul, Imul, Gp, Gp, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
563 ASMJIT_INST_3x(imul, Imul, Gp, Gp, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
564 ASMJIT_INST_2i(in, In, ZAX, Imm) // ANY
565 ASMJIT_INST_2x(in, In, ZAX, DX) // ANY
566 ASMJIT_INST_1x(inc, Inc, Gp) // ANY
567 ASMJIT_INST_1x(inc, Inc, Mem) // ANY
568 ASMJIT_INST_2x(ins, Ins, ES_ZDI, DX) // ANY
569 ASMJIT_INST_1i(int_, Int, Imm) // ANY
570 ASMJIT_INST_0x(int3, Int3) // ANY
571 ASMJIT_INST_0x(into, Into) // ANY
572 ASMJIT_INST_0x(invd, Invd) // ANY
573 ASMJIT_INST_1x(invlpg, Invlpg, Mem) // ANY
574 ASMJIT_INST_2x(invpcid, Invpcid, Gp, Mem) // ANY
575 ASMJIT_INST_1c(j, J, Condition::toJcc, Label) // ANY
576 ASMJIT_INST_1c(j, J, Condition::toJcc, Imm) // ANY
577 ASMJIT_INST_1c(j, J, Condition::toJcc, uint64_t) // ANY
578 ASMJIT_INST_2x(jecxz, Jecxz, Gp, Label) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
579 ASMJIT_INST_2x(jecxz, Jecxz, Gp, Imm) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
580 ASMJIT_INST_2x(jecxz, Jecxz, Gp, uint64_t) // ANY [EXPLICIT] Short jump if CX/ECX/RCX is zero.
581 ASMJIT_INST_1x(jmp, Jmp, Gp) // ANY
582 ASMJIT_INST_1x(jmp, Jmp, Mem) // ANY
583 ASMJIT_INST_1x(jmp, Jmp, Label) // ANY
584 ASMJIT_INST_1x(jmp, Jmp, Imm) // ANY
585 ASMJIT_INST_1x(jmp, Jmp, uint64_t) // ANY
586 ASMJIT_INST_1x(lahf, Lahf, AH) // LAHFSAHF [EXPLICIT] AH <- EFL
587 ASMJIT_INST_2x(lar, Lar, Gp, Gp) // ANY
588 ASMJIT_INST_2x(lar, Lar, Gp, Mem) // ANY
589 ASMJIT_INST_1x(ldmxcsr, Ldmxcsr, Mem) // SSE
590 ASMJIT_INST_2x(lds, Lds, Gp, Mem) // X86
591 ASMJIT_INST_2x(lea, Lea, Gp, Mem) // ANY
592 ASMJIT_INST_0x(leave, Leave) // ANY
593 ASMJIT_INST_2x(les, Les, Gp, Mem) // X86
594 ASMJIT_INST_0x(lfence, Lfence) // SSE2
595 ASMJIT_INST_2x(lfs, Lfs, Gp, Mem) // ANY
596 ASMJIT_INST_1x(lgdt, Lgdt, Mem) // ANY
597 ASMJIT_INST_2x(lgs, Lgs, Gp, Mem) // ANY
598 ASMJIT_INST_1x(lidt, Lidt, Mem) // ANY
599 ASMJIT_INST_1x(lldt, Lldt, Gp) // ANY
600 ASMJIT_INST_1x(lldt, Lldt, Mem) // ANY
601 ASMJIT_INST_1x(lmsw, Lmsw, Gp) // ANY
602 ASMJIT_INST_1x(lmsw, Lmsw, Mem) // ANY
603 ASMJIT_INST_2x(lods, Lods, ZAX, DS_ZSI) // ANY [EXPLICIT]
604 ASMJIT_INST_2x(loop, Loop, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
605 ASMJIT_INST_2x(loop, Loop, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
606 ASMJIT_INST_2x(loop, Loop, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0.
607 ASMJIT_INST_2x(loope, Loope, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
608 ASMJIT_INST_2x(loope, Loope, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
609 ASMJIT_INST_2x(loope, Loope, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
610 ASMJIT_INST_2x(loopne, Loopne, ZCX, Label) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
611 ASMJIT_INST_2x(loopne, Loopne, ZCX, Imm) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
612 ASMJIT_INST_2x(loopne, Loopne, ZCX, uint64_t) // ANY [EXPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
613 ASMJIT_INST_2x(lsl, Lsl, Gp, Gp) // ANY
614 ASMJIT_INST_2x(lsl, Lsl, Gp, Mem) // ANY
615 ASMJIT_INST_2x(lss, Lss, Gp, Mem) // ANY
616 ASMJIT_INST_1x(ltr, Ltr, Gp) // ANY
617 ASMJIT_INST_1x(ltr, Ltr, Mem) // ANY
618 ASMJIT_INST_0x(mfence, Mfence) // SSE2
619 ASMJIT_INST_2x(mov, Mov, Gp, Gp) // ANY
620 ASMJIT_INST_2x(mov, Mov, Gp, Mem) // ANY
621 ASMJIT_INST_2i(mov, Mov, Gp, Imm) // ANY
622 ASMJIT_INST_2x(mov, Mov, Mem, Gp) // ANY
623 ASMJIT_INST_2i(mov, Mov, Mem, Imm) // ANY
624 ASMJIT_INST_2x(mov, Mov, Gp, CReg) // ANY
625 ASMJIT_INST_2x(mov, Mov, CReg, Gp) // ANY
626 ASMJIT_INST_2x(mov, Mov, Gp, DReg) // ANY
627 ASMJIT_INST_2x(mov, Mov, DReg, Gp) // ANY
628 ASMJIT_INST_2x(mov, Mov, Gp, SReg) // ANY
629 ASMJIT_INST_2x(mov, Mov, Mem, SReg) // ANY
630 ASMJIT_INST_2x(mov, Mov, SReg, Gp) // ANY
631 ASMJIT_INST_2x(mov, Mov, SReg, Mem) // ANY
632 ASMJIT_INST_2x(movnti, Movnti, Mem, Gp) // SSE2
633 ASMJIT_INST_2x(movs, Movs, ES_ZDI, DS_ZSI) // ANY [EXPLICIT]
634 ASMJIT_INST_2x(movsx, Movsx, Gp, Gp) // ANY
635 ASMJIT_INST_2x(movsx, Movsx, Gp, Mem) // ANY
636 ASMJIT_INST_2x(movsxd, Movsxd, Gp, Gp) // X64
637 ASMJIT_INST_2x(movsxd, Movsxd, Gp, Mem) // X64
638 ASMJIT_INST_2x(movzx, Movzx, Gp, Gp) // ANY
639 ASMJIT_INST_2x(movzx, Movzx, Gp, Mem) // ANY
640 ASMJIT_INST_2x(mul, Mul, AX, Gp) // ANY [EXPLICIT] AX <- AL * r8
641 ASMJIT_INST_2x(mul, Mul, AX, Mem) // ANY [EXPLICIT] AX <- AL * m8
642 ASMJIT_INST_3x(mul, Mul, ZDX, ZAX, Gp) // ANY [EXPLICIT] xDX:xAX <- xAX * r16|r32|r64
643 ASMJIT_INST_3x(mul, Mul, ZDX, ZAX, Mem) // ANY [EXPLICIT] xDX:xAX <- xAX * m16|m32|m64
644 ASMJIT_INST_1x(neg, Neg, Gp) // ANY
645 ASMJIT_INST_1x(neg, Neg, Mem) // ANY
646 ASMJIT_INST_0x(nop, Nop) // ANY
647 ASMJIT_INST_1x(nop, Nop, Gp) // ANY
648 ASMJIT_INST_1x(nop, Nop, Mem) // ANY
649 ASMJIT_INST_1x(not_, Not, Gp) // ANY
650 ASMJIT_INST_1x(not_, Not, Mem) // ANY
651 ASMJIT_INST_2x(or_, Or, Gp, Gp) // ANY
652 ASMJIT_INST_2x(or_, Or, Gp, Mem) // ANY
653 ASMJIT_INST_2i(or_, Or, Gp, Imm) // ANY
654 ASMJIT_INST_2x(or_, Or, Mem, Gp) // ANY
655 ASMJIT_INST_2i(or_, Or, Mem, Imm) // ANY
656 ASMJIT_INST_2x(out, Out, Imm, ZAX) // ANY
657 ASMJIT_INST_2i(out, Out, DX, ZAX) // ANY
658 ASMJIT_INST_2i(outs, Outs, DX, DS_ZSI) // ANY
659 ASMJIT_INST_0x(pause, Pause) // SSE2
660 ASMJIT_INST_1x(pop, Pop, Gp) // ANY
661 ASMJIT_INST_1x(pop, Pop, Mem) // ANY
662 ASMJIT_INST_1x(pop, Pop, SReg); // ANY
663 ASMJIT_INST_0x(popa, Popa) // X86
664 ASMJIT_INST_0x(popad, Popad) // X86
665 ASMJIT_INST_0x(popf, Popf) // ANY
666 ASMJIT_INST_0x(popfd, Popfd) // X86
667 ASMJIT_INST_0x(popfq, Popfq) // X64
668 ASMJIT_INST_1x(prefetch, Prefetch, Mem) // 3DNOW
669 ASMJIT_INST_1x(prefetchnta, Prefetchnta, Mem) // SSE
670 ASMJIT_INST_1x(prefetcht0, Prefetcht0, Mem) // SSE
671 ASMJIT_INST_1x(prefetcht1, Prefetcht1, Mem) // SSE
672 ASMJIT_INST_1x(prefetcht2, Prefetcht2, Mem) // SSE
673 ASMJIT_INST_1x(prefetchw, Prefetchw, Mem) // PREFETCHW
674 ASMJIT_INST_1x(prefetchwt1, Prefetchwt1, Mem) // PREFETCHW1
675 ASMJIT_INST_1x(push, Push, Gp) // ANY
676 ASMJIT_INST_1x(push, Push, Mem) // ANY
677 ASMJIT_INST_1x(push, Push, SReg) // ANY
678 ASMJIT_INST_1i(push, Push, Imm) // ANY
679 ASMJIT_INST_0x(pusha, Pusha) // X86
680 ASMJIT_INST_0x(pushad, Pushad) // X86
681 ASMJIT_INST_0x(pushf, Pushf) // ANY
682 ASMJIT_INST_0x(pushfd, Pushfd) // X86
683 ASMJIT_INST_0x(pushfq, Pushfq) // X64
684 ASMJIT_INST_2x(rcl, Rcl, Gp, CL) // ANY
685 ASMJIT_INST_2x(rcl, Rcl, Mem, CL) // ANY
686 ASMJIT_INST_2i(rcl, Rcl, Gp, Imm) // ANY
687 ASMJIT_INST_2i(rcl, Rcl, Mem, Imm) // ANY
688 ASMJIT_INST_2x(rcr, Rcr, Gp, CL) // ANY
689 ASMJIT_INST_2x(rcr, Rcr, Mem, CL) // ANY
690 ASMJIT_INST_2i(rcr, Rcr, Gp, Imm) // ANY
691 ASMJIT_INST_2i(rcr, Rcr, Mem, Imm) // ANY
692 ASMJIT_INST_3x(rdmsr, Rdmsr, EDX, EAX, ECX) // MSR [EXPLICIT] RDX:EAX <- MSR[ECX]
693 ASMJIT_INST_3x(rdpmc, Rdpmc, EDX, EAX, ECX) // ANY [EXPLICIT] RDX:EAX <- PMC[ECX]
694 ASMJIT_INST_2x(rdtsc, Rdtsc, EDX, EAX) // RDTSC [EXPLICIT] EDX:EAX <- Counter
695 ASMJIT_INST_3x(rdtscp, Rdtscp, EDX, EAX, ECX) // RDTSCP [EXPLICIT] EDX:EAX:EXC <- Counter
696 ASMJIT_INST_2x(rol, Rol, Gp, CL) // ANY
697 ASMJIT_INST_2x(rol, Rol, Mem, CL) // ANY
698 ASMJIT_INST_2i(rol, Rol, Gp, Imm) // ANY
699 ASMJIT_INST_2i(rol, Rol, Mem, Imm) // ANY
700 ASMJIT_INST_2x(ror, Ror, Gp, CL) // ANY
701 ASMJIT_INST_2x(ror, Ror, Mem, CL) // ANY
702 ASMJIT_INST_2i(ror, Ror, Gp, Imm) // ANY
703 ASMJIT_INST_2i(ror, Ror, Mem, Imm) // ANY
704 ASMJIT_INST_0x(rsm, Rsm) // X86
705 ASMJIT_INST_2x(sbb, Sbb, Gp, Gp) // ANY
706 ASMJIT_INST_2x(sbb, Sbb, Gp, Mem) // ANY
707 ASMJIT_INST_2i(sbb, Sbb, Gp, Imm) // ANY
708 ASMJIT_INST_2x(sbb, Sbb, Mem, Gp) // ANY
709 ASMJIT_INST_2i(sbb, Sbb, Mem, Imm) // ANY
710 ASMJIT_INST_1x(sahf, Sahf, AH) // LAHFSAHF [EXPLICIT] EFL <- AH
711 ASMJIT_INST_2x(sal, Sal, Gp, CL) // ANY
712 ASMJIT_INST_2x(sal, Sal, Mem, CL) // ANY
713 ASMJIT_INST_2i(sal, Sal, Gp, Imm) // ANY
714 ASMJIT_INST_2i(sal, Sal, Mem, Imm) // ANY
715 ASMJIT_INST_2x(sar, Sar, Gp, CL) // ANY
716 ASMJIT_INST_2x(sar, Sar, Mem, CL) // ANY
717 ASMJIT_INST_2i(sar, Sar, Gp, Imm) // ANY
718 ASMJIT_INST_2i(sar, Sar, Mem, Imm) // ANY
719 ASMJIT_INST_2x(scas, Scas, ZAX, ES_ZDI) // ANY [EXPLICIT]
720 ASMJIT_INST_1c(set, Set, Condition::toSetcc, Gp) // ANY
721 ASMJIT_INST_1c(set, Set, Condition::toSetcc, Mem) // ANY
722 ASMJIT_INST_0x(sfence, Sfence) // SSE
723 ASMJIT_INST_1x(sgdt, Sgdt, Mem) // ANY
724 ASMJIT_INST_2x(shl, Shl, Gp, CL) // ANY
725 ASMJIT_INST_2x(shl, Shl, Mem, CL) // ANY
726 ASMJIT_INST_2i(shl, Shl, Gp, Imm) // ANY
727 ASMJIT_INST_2i(shl, Shl, Mem, Imm) // ANY
728 ASMJIT_INST_2x(shr, Shr, Gp, CL) // ANY
729 ASMJIT_INST_2x(shr, Shr, Mem, CL) // ANY
730 ASMJIT_INST_2i(shr, Shr, Gp, Imm) // ANY
731 ASMJIT_INST_2i(shr, Shr, Mem, Imm) // ANY
732 ASMJIT_INST_3x(shld, Shld, Gp, Gp, CL) // ANY
733 ASMJIT_INST_3x(shld, Shld, Mem, Gp, CL) // ANY
734 ASMJIT_INST_3i(shld, Shld, Gp, Gp, Imm) // ANY
735 ASMJIT_INST_3i(shld, Shld, Mem, Gp, Imm) // ANY
736 ASMJIT_INST_3x(shrd, Shrd, Gp, Gp, CL) // ANY
737 ASMJIT_INST_3x(shrd, Shrd, Mem, Gp, CL) // ANY
738 ASMJIT_INST_3i(shrd, Shrd, Gp, Gp, Imm) // ANY
739 ASMJIT_INST_3i(shrd, Shrd, Mem, Gp, Imm) // ANY
740 ASMJIT_INST_1x(sidt, Sidt, Mem) // ANY
741 ASMJIT_INST_1x(sldt, Sldt, Gp) // ANY
742 ASMJIT_INST_1x(sldt, Sldt, Mem) // ANY
743 ASMJIT_INST_1x(smsw, Smsw, Gp) // ANY
744 ASMJIT_INST_1x(smsw, Smsw, Mem) // ANY
745 ASMJIT_INST_0x(stc, Stc) // ANY
746 ASMJIT_INST_0x(std, Std) // ANY
747 ASMJIT_INST_0x(sti, Sti) // ANY
748 ASMJIT_INST_1x(stmxcsr, Stmxcsr, Mem) // SSE
749 ASMJIT_INST_2x(stos, Stos, ES_ZDI, ZAX) // ANY [EXPLICIT]
750 ASMJIT_INST_1x(str, Str, Gp) // ANY
751 ASMJIT_INST_1x(str, Str, Mem) // ANY
752 ASMJIT_INST_2x(sub, Sub, Gp, Gp) // ANY
753 ASMJIT_INST_2x(sub, Sub, Gp, Mem) // ANY
754 ASMJIT_INST_2i(sub, Sub, Gp, Imm) // ANY
755 ASMJIT_INST_2x(sub, Sub, Mem, Gp) // ANY
756 ASMJIT_INST_2i(sub, Sub, Mem, Imm) // ANY
757 ASMJIT_INST_0x(swapgs, Swapgs) // X64
758 ASMJIT_INST_2x(test, Test, Gp, Gp) // ANY
759 ASMJIT_INST_2i(test, Test, Gp, Imm) // ANY
760 ASMJIT_INST_2x(test, Test, Mem, Gp) // ANY
761 ASMJIT_INST_2i(test, Test, Mem, Imm) // ANY
762 ASMJIT_INST_0x(ud2, Ud2) // ANY
763 ASMJIT_INST_1x(verr, Verr, Gp) // ANY
764 ASMJIT_INST_1x(verr, Verr, Mem) // ANY
765 ASMJIT_INST_1x(verw, Verw, Gp) // ANY
766 ASMJIT_INST_1x(verw, Verw, Mem) // ANY
767 ASMJIT_INST_3x(wrmsr, Wrmsr, EDX, EAX, ECX) // MSR [EXPLICIT] RDX:EAX -> MSR[ECX]
768 ASMJIT_INST_2x(xadd, Xadd, Gp, Gp) // ANY
769 ASMJIT_INST_2x(xadd, Xadd, Mem, Gp) // ANY
770 ASMJIT_INST_2x(xchg, Xchg, Gp, Gp) // ANY
771 ASMJIT_INST_2x(xchg, Xchg, Mem, Gp) // ANY
772 ASMJIT_INST_2x(xchg, Xchg, Gp, Mem) // ANY
773 ASMJIT_INST_2x(xor_, Xor, Gp, Gp) // ANY
774 ASMJIT_INST_2x(xor_, Xor, Gp, Mem) // ANY
775 ASMJIT_INST_2i(xor_, Xor, Gp, Imm) // ANY
776 ASMJIT_INST_2x(xor_, Xor, Mem, Gp) // ANY
777 ASMJIT_INST_2i(xor_, Xor, Mem, Imm) // ANY
778
779 //! \}
780
781 //! \name ADX Instructions
782 //! \{
783
784 ASMJIT_INST_2x(adcx, Adcx, Gp, Gp) // ADX
785 ASMJIT_INST_2x(adcx, Adcx, Gp, Mem) // ADX
786 ASMJIT_INST_2x(adox, Adox, Gp, Gp) // ADX
787 ASMJIT_INST_2x(adox, Adox, Gp, Mem) // ADX
788
789 //! \}
790
791 //! \name BMI Instructions
792 //! \{
793
794 ASMJIT_INST_3x(andn, Andn, Gp, Gp, Gp) // BMI
795 ASMJIT_INST_3x(andn, Andn, Gp, Gp, Mem) // BMI
796 ASMJIT_INST_3x(bextr, Bextr, Gp, Gp, Gp) // BMI
797 ASMJIT_INST_3x(bextr, Bextr, Gp, Mem, Gp) // BMI
798 ASMJIT_INST_2x(blsi, Blsi, Gp, Gp) // BMI
799 ASMJIT_INST_2x(blsi, Blsi, Gp, Mem) // BMI
800 ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Gp) // BMI
801 ASMJIT_INST_2x(blsmsk, Blsmsk, Gp, Mem) // BMI
802 ASMJIT_INST_2x(blsr, Blsr, Gp, Gp) // BMI
803 ASMJIT_INST_2x(blsr, Blsr, Gp, Mem) // BMI
804 ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Gp) // BMI
805 ASMJIT_INST_2x(tzcnt, Tzcnt, Gp, Mem) // BMI
806
807 //! \}
808
809 //! \name BMI2 Instructions
810 //! \{
811
812 ASMJIT_INST_3x(bzhi, Bzhi, Gp, Gp, Gp) // BMI2
813 ASMJIT_INST_3x(bzhi, Bzhi, Gp, Mem, Gp) // BMI2
814 ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Gp, ZDX) // BMI2 [EXPLICIT]
815 ASMJIT_INST_4x(mulx, Mulx, Gp, Gp, Mem, ZDX) // BMI2 [EXPLICIT]
816 ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Gp) // BMI2
817 ASMJIT_INST_3x(pdep, Pdep, Gp, Gp, Mem) // BMI2
818 ASMJIT_INST_3x(pext, Pext, Gp, Gp, Gp) // BMI2
819 ASMJIT_INST_3x(pext, Pext, Gp, Gp, Mem) // BMI2
820 ASMJIT_INST_3i(rorx, Rorx, Gp, Gp, Imm) // BMI2
821 ASMJIT_INST_3i(rorx, Rorx, Gp, Mem, Imm) // BMI2
822 ASMJIT_INST_3x(sarx, Sarx, Gp, Gp, Gp) // BMI2
823 ASMJIT_INST_3x(sarx, Sarx, Gp, Mem, Gp) // BMI2
824 ASMJIT_INST_3x(shlx, Shlx, Gp, Gp, Gp) // BMI2
825 ASMJIT_INST_3x(shlx, Shlx, Gp, Mem, Gp) // BMI2
826 ASMJIT_INST_3x(shrx, Shrx, Gp, Gp, Gp) // BMI2
827 ASMJIT_INST_3x(shrx, Shrx, Gp, Mem, Gp) // BMI2
828
829 //! \}
830
831 //! \name CL Instructions
832 //! \{
833
834 ASMJIT_INST_1x(cldemote, Cldemote, Mem) // CLDEMOTE
835 ASMJIT_INST_1x(clflush, Clflush, Mem) // CLFLUSH
836 ASMJIT_INST_1x(clflushopt, Clflushopt, Mem) // CLFLUSH_OPT
837 ASMJIT_INST_1x(clwb, Clwb, Mem) // CLWB
838 ASMJIT_INST_1x(clzero, Clzero, DS_ZAX) // CLZERO [EXPLICIT]
839 ASMJIT_INST_0x(wbnoinvd, Wbnoinvd) // WBNOINVD
840
841 //! \}
842
843 //! \name CRC32 Instructions
844 //! \{
845
846 ASMJIT_INST_2x(crc32, Crc32, Gp, Gp) // SSE4_2
847 ASMJIT_INST_2x(crc32, Crc32, Gp, Mem) // SSE4_2
848
849 //! \}
850
851 //! \name ENQCMD Instructions
852 //! \{
853
854 ASMJIT_INST_2x(enqcmd, Enqcmd, Mem, Mem) // ENQCMD
855 ASMJIT_INST_2x(enqcmds, Enqcmds, Mem, Mem) // ENQCMD
856
857 //! \}
858
859 //! \name FSGSBASE Instructions
860 //! \{
861
862 ASMJIT_INST_1x(rdfsbase, Rdfsbase, Gp) // FSGSBASE
863 ASMJIT_INST_1x(rdgsbase, Rdgsbase, Gp) // FSGSBASE
864 ASMJIT_INST_1x(wrfsbase, Wrfsbase, Gp) // FSGSBASE
865 ASMJIT_INST_1x(wrgsbase, Wrgsbase, Gp) // FSGSBASE
866
867 //! \}
868
869 //! \name FXSR & XSAVE Instructions
870 //! \{
871
872 ASMJIT_INST_1x(fxrstor, Fxrstor, Mem) // FXSR
873 ASMJIT_INST_1x(fxrstor64, Fxrstor64, Mem) // FXSR
874 ASMJIT_INST_1x(fxsave, Fxsave, Mem) // FXSR
875 ASMJIT_INST_1x(fxsave64, Fxsave64, Mem) // FXSR
876 ASMJIT_INST_3x(xgetbv, Xgetbv, EDX, EAX, ECX) // XSAVE [EXPLICIT] EDX:EAX <- XCR[ECX]
877 ASMJIT_INST_3x(xsetbv, Xsetbv, EDX, EAX, ECX) // XSAVE [EXPLICIT] XCR[ECX] <- EDX:EAX
878
879 //! \}
880
881 //! \name LWP Instructions
882 //! \{
883
884 ASMJIT_INST_1x(llwpcb, Llwpcb, Gp) // LWP
885 ASMJIT_INST_3i(lwpins, Lwpins, Gp, Gp, Imm) // LWP
886 ASMJIT_INST_3i(lwpins, Lwpins, Gp, Mem, Imm) // LWP
887 ASMJIT_INST_3i(lwpval, Lwpval, Gp, Gp, Imm) // LWP
888 ASMJIT_INST_3i(lwpval, Lwpval, Gp, Mem, Imm) // LWP
889 ASMJIT_INST_1x(slwpcb, Slwpcb, Gp) // LWP
890
891 //! \}
892
893 //! \name LZCNT Instructions
894 //! \{
895
896 ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Gp) // LZCNT
897 ASMJIT_INST_2x(lzcnt, Lzcnt, Gp, Mem) // LZCNT
898
899 //! \}
900
901 //! \name MOVBE Instructions
902 //! \{
903
904 ASMJIT_INST_2x(movbe, Movbe, Gp, Mem) // MOVBE
905 ASMJIT_INST_2x(movbe, Movbe, Mem, Gp) // MOVBE
906
907 //! \}
908
909 //! \name MOVDIRI & MOVDIR64B Instructions
910 //! \{
911
912 ASMJIT_INST_2x(movdiri, Movdiri, Mem, Gp) // MOVDIRI
913 ASMJIT_INST_2x(movdir64b, Movdir64b, Mem, Mem) // MOVDIR64B
914
915 //! \}
916
917 //! \name MPX Extensions
918 //! \{
919
920 ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Gp) // MPX
921 ASMJIT_INST_2x(bndcl, Bndcl, Bnd, Mem) // MPX
922 ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Gp) // MPX
923 ASMJIT_INST_2x(bndcn, Bndcn, Bnd, Mem) // MPX
924 ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Gp) // MPX
925 ASMJIT_INST_2x(bndcu, Bndcu, Bnd, Mem) // MPX
926 ASMJIT_INST_2x(bndldx, Bndldx, Bnd, Mem) // MPX
927 ASMJIT_INST_2x(bndmk, Bndmk, Bnd, Mem) // MPX
928 ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Bnd) // MPX
929 ASMJIT_INST_2x(bndmov, Bndmov, Bnd, Mem) // MPX
930 ASMJIT_INST_2x(bndmov, Bndmov, Mem, Bnd) // MPX
931 ASMJIT_INST_2x(bndstx, Bndstx, Mem, Bnd) // MPX
932
933 //! \}
934
935 //! \name POPCNT Instructions
936 //! \{
937
938 ASMJIT_INST_2x(popcnt, Popcnt, Gp, Gp) // POPCNT
939 ASMJIT_INST_2x(popcnt, Popcnt, Gp, Mem) // POPCNT
940
941 //! \}
942
943 //! \name RDRAND & RDSEED Instructions
944 //! \{
945
946 ASMJIT_INST_1x(rdrand, Rdrand, Gp) // RDRAND
947 ASMJIT_INST_1x(rdseed, Rdseed, Gp) // RDSEED
948
949 //! \}
950
951 //! \name RTM & TSX Instructions
952 //! \{
953
954 ASMJIT_INST_0x(xabort, Xabort) // RTM
955 ASMJIT_INST_1x(xbegin, Xbegin, Label) // RTM
956 ASMJIT_INST_1x(xbegin, Xbegin, Imm) // RTM
957 ASMJIT_INST_1x(xbegin, Xbegin, uint64_t) // RTM
958 ASMJIT_INST_0x(xend, Xend) // RTM
959 ASMJIT_INST_0x(xtest, Xtest) // TSX
960
961 //! \}
962
963 //! \name SMAP Instructions
964 //! \{
965
966 ASMJIT_INST_0x(clac, Clac) // SMAP
967 ASMJIT_INST_0x(stac, Stac) // SMAP
968
969 //! \}
970
971 //! \name SVM Instructions
972 //! \{
973
974 ASMJIT_INST_0x(clgi, Clgi) // SVM
975 ASMJIT_INST_2x(invlpga, Invlpga, Gp, Gp) // SVM [EXPLICIT] <eax|rax, ecx>
976 ASMJIT_INST_1x(skinit, Skinit, Gp) // SKINIT [EXPLICIT] <eax>
977 ASMJIT_INST_0x(stgi, Stgi) // SKINIT
978 ASMJIT_INST_1x(vmload, Vmload, Gp) // SVM [EXPLICIT] <zax>
979 ASMJIT_INST_0x(vmmcall, Vmmcall) // SVM
980 ASMJIT_INST_1x(vmrun, Vmrun, Gp) // SVM [EXPLICIT] <zax>
981 ASMJIT_INST_1x(vmsave, Vmsave, Gp) // SVM [EXPLICIT] <zax>
982
983 //! \}
984
985 //! \name TBM Instructions
986 //! \{
987
988 ASMJIT_INST_2x(blcfill, Blcfill, Gp, Gp) // TBM
989 ASMJIT_INST_2x(blcfill, Blcfill, Gp, Mem) // TBM
990 ASMJIT_INST_2x(blci, Blci, Gp, Gp) // TBM
991 ASMJIT_INST_2x(blci, Blci, Gp, Mem) // TBM
992 ASMJIT_INST_2x(blcic, Blcic, Gp, Gp) // TBM
993 ASMJIT_INST_2x(blcic, Blcic, Gp, Mem) // TBM
994 ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Gp) // TBM
995 ASMJIT_INST_2x(blcmsk, Blcmsk, Gp, Mem) // TBM
996 ASMJIT_INST_2x(blcs, Blcs, Gp, Gp) // TBM
997 ASMJIT_INST_2x(blcs, Blcs, Gp, Mem) // TBM
998 ASMJIT_INST_2x(blsfill, Blsfill, Gp, Gp) // TBM
999 ASMJIT_INST_2x(blsfill, Blsfill, Gp, Mem) // TBM
1000 ASMJIT_INST_2x(blsic, Blsic, Gp, Gp) // TBM
1001 ASMJIT_INST_2x(blsic, Blsic, Gp, Mem) // TBM
1002 ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Gp) // TBM
1003 ASMJIT_INST_2x(t1mskc, T1mskc, Gp, Mem) // TBM
1004 ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Gp) // TBM
1005 ASMJIT_INST_2x(tzmsk, Tzmsk, Gp, Mem) // TBM
1006
1007 //! \}
1008
1009 //! \name VMX Instructions
1010 //! \{
1011
1012 ASMJIT_INST_2x(invept, Invept, Gp, Mem) // VMX
1013 ASMJIT_INST_2x(invvpid, Invvpid, Gp, Mem) // VMX
1014 ASMJIT_INST_0x(vmcall, Vmcall) // VMX
1015 ASMJIT_INST_1x(vmclear, Vmclear, Mem) // VMX
1016 ASMJIT_INST_0x(vmfunc, Vmfunc) // VMX
1017 ASMJIT_INST_0x(vmlaunch, Vmlaunch) // VMX
1018 ASMJIT_INST_1x(vmptrld, Vmptrld, Mem) // VMX
1019 ASMJIT_INST_1x(vmptrst, Vmptrst, Mem) // VMX
1020 ASMJIT_INST_2x(vmread, Vmread, Mem, Gp) // VMX
1021 ASMJIT_INST_0x(vmresume, Vmresume) // VMX
1022 ASMJIT_INST_2x(vmwrite, Vmwrite, Gp, Mem) // VMX
1023 ASMJIT_INST_1x(vmxon, Vmxon, Mem) // VMX
1024
1025 //! \}
1026
1027 //! \name Other GP Instructions
1028 //! \{
1029
1030 ASMJIT_INST_0x(getsec, Getsec) // SMX
1031 ASMJIT_INST_0x(pcommit, Pcommit) // PCOMMIT
1032 ASMJIT_INST_1x(rdpid, Rdpid, Gp) // RDPID
1033
1034 //! \}
1035
1036 //! \name FPU Instructions
1037 //! \{
1038
1039 ASMJIT_INST_0x(f2xm1, F2xm1) // FPU
1040 ASMJIT_INST_0x(fabs, Fabs) // FPU
1041 ASMJIT_INST_2x(fadd, Fadd, St, St) // FPU
1042 ASMJIT_INST_1x(fadd, Fadd, Mem) // FPU
1043 ASMJIT_INST_1x(faddp, Faddp, St) // FPU
1044 ASMJIT_INST_0x(faddp, Faddp) // FPU
1045 ASMJIT_INST_1x(fbld, Fbld, Mem) // FPU
1046 ASMJIT_INST_1x(fbstp, Fbstp, Mem) // FPU
1047 ASMJIT_INST_0x(fchs, Fchs) // FPU
1048 ASMJIT_INST_0x(fclex, Fclex) // FPU
1049 ASMJIT_INST_1x(fcmovb, Fcmovb, St) // FPU
1050 ASMJIT_INST_1x(fcmovbe, Fcmovbe, St) // FPU
1051 ASMJIT_INST_1x(fcmove, Fcmove, St) // FPU
1052 ASMJIT_INST_1x(fcmovnb, Fcmovnb, St) // FPU
1053 ASMJIT_INST_1x(fcmovnbe, Fcmovnbe, St) // FPU
1054 ASMJIT_INST_1x(fcmovne, Fcmovne, St) // FPU
1055 ASMJIT_INST_1x(fcmovnu, Fcmovnu, St) // FPU
1056 ASMJIT_INST_1x(fcmovu, Fcmovu, St) // FPU
1057 ASMJIT_INST_1x(fcom, Fcom, St) // FPU
1058 ASMJIT_INST_0x(fcom, Fcom) // FPU
1059 ASMJIT_INST_1x(fcom, Fcom, Mem) // FPU
1060 ASMJIT_INST_1x(fcomp, Fcomp, St) // FPU
1061 ASMJIT_INST_0x(fcomp, Fcomp) // FPU
1062 ASMJIT_INST_1x(fcomp, Fcomp, Mem) // FPU
1063 ASMJIT_INST_0x(fcompp, Fcompp) // FPU
1064 ASMJIT_INST_1x(fcomi, Fcomi, St) // FPU
1065 ASMJIT_INST_1x(fcomip, Fcomip, St) // FPU
1066 ASMJIT_INST_0x(fcos, Fcos) // FPU
1067 ASMJIT_INST_0x(fdecstp, Fdecstp) // FPU
1068 ASMJIT_INST_2x(fdiv, Fdiv, St, St) // FPU
1069 ASMJIT_INST_1x(fdiv, Fdiv, Mem) // FPU
1070 ASMJIT_INST_1x(fdivp, Fdivp, St) // FPU
1071 ASMJIT_INST_0x(fdivp, Fdivp) // FPU
1072 ASMJIT_INST_2x(fdivr, Fdivr, St, St) // FPU
1073 ASMJIT_INST_1x(fdivr, Fdivr, Mem) // FPU
1074 ASMJIT_INST_1x(fdivrp, Fdivrp, St) // FPU
1075 ASMJIT_INST_0x(fdivrp, Fdivrp) // FPU
1076 ASMJIT_INST_1x(ffree, Ffree, St) // FPU
1077 ASMJIT_INST_1x(fiadd, Fiadd, Mem) // FPU
1078 ASMJIT_INST_1x(ficom, Ficom, Mem) // FPU
1079 ASMJIT_INST_1x(ficomp, Ficomp, Mem) // FPU
1080 ASMJIT_INST_1x(fidiv, Fidiv, Mem) // FPU
1081 ASMJIT_INST_1x(fidivr, Fidivr, Mem) // FPU
1082 ASMJIT_INST_1x(fild, Fild, Mem) // FPU
1083 ASMJIT_INST_1x(fimul, Fimul, Mem) // FPU
1084 ASMJIT_INST_0x(fincstp, Fincstp) // FPU
1085 ASMJIT_INST_0x(finit, Finit) // FPU
1086 ASMJIT_INST_1x(fisub, Fisub, Mem) // FPU
1087 ASMJIT_INST_1x(fisubr, Fisubr, Mem) // FPU
1088 ASMJIT_INST_0x(fninit, Fninit) // FPU
1089 ASMJIT_INST_1x(fist, Fist, Mem) // FPU
1090 ASMJIT_INST_1x(fistp, Fistp, Mem) // FPU
1091 ASMJIT_INST_1x(fisttp, Fisttp, Mem) // FPU+SSE3
1092 ASMJIT_INST_1x(fld, Fld, Mem) // FPU
1093 ASMJIT_INST_1x(fld, Fld, St) // FPU
1094 ASMJIT_INST_0x(fld1, Fld1) // FPU
1095 ASMJIT_INST_0x(fldl2t, Fldl2t) // FPU
1096 ASMJIT_INST_0x(fldl2e, Fldl2e) // FPU
1097 ASMJIT_INST_0x(fldpi, Fldpi) // FPU
1098 ASMJIT_INST_0x(fldlg2, Fldlg2) // FPU
1099 ASMJIT_INST_0x(fldln2, Fldln2) // FPU
1100 ASMJIT_INST_0x(fldz, Fldz) // FPU
1101 ASMJIT_INST_1x(fldcw, Fldcw, Mem) // FPU
1102 ASMJIT_INST_1x(fldenv, Fldenv, Mem) // FPU
1103 ASMJIT_INST_2x(fmul, Fmul, St, St) // FPU
1104 ASMJIT_INST_1x(fmul, Fmul, Mem) // FPU
1105 ASMJIT_INST_1x(fmulp, Fmulp, St) // FPU
1106 ASMJIT_INST_0x(fmulp, Fmulp) // FPU
1107 ASMJIT_INST_0x(fnclex, Fnclex) // FPU
1108 ASMJIT_INST_0x(fnop, Fnop) // FPU
1109 ASMJIT_INST_1x(fnsave, Fnsave, Mem) // FPU
1110 ASMJIT_INST_1x(fnstenv, Fnstenv, Mem) // FPU
1111 ASMJIT_INST_1x(fnstcw, Fnstcw, Mem) // FPU
1112 ASMJIT_INST_0x(fpatan, Fpatan) // FPU
1113 ASMJIT_INST_0x(fprem, Fprem) // FPU
1114 ASMJIT_INST_0x(fprem1, Fprem1) // FPU
1115 ASMJIT_INST_0x(fptan, Fptan) // FPU
1116 ASMJIT_INST_0x(frndint, Frndint) // FPU
1117 ASMJIT_INST_1x(frstor, Frstor, Mem) // FPU
1118 ASMJIT_INST_1x(fsave, Fsave, Mem) // FPU
1119 ASMJIT_INST_0x(fscale, Fscale) // FPU
1120 ASMJIT_INST_0x(fsin, Fsin) // FPU
1121 ASMJIT_INST_0x(fsincos, Fsincos) // FPU
1122 ASMJIT_INST_0x(fsqrt, Fsqrt) // FPU
1123 ASMJIT_INST_1x(fst, Fst, Mem) // FPU
1124 ASMJIT_INST_1x(fst, Fst, St) // FPU
1125 ASMJIT_INST_1x(fstp, Fstp, Mem) // FPU
1126 ASMJIT_INST_1x(fstp, Fstp, St) // FPU
1127 ASMJIT_INST_1x(fstcw, Fstcw, Mem) // FPU
1128 ASMJIT_INST_1x(fstenv, Fstenv, Mem) // FPU
1129 ASMJIT_INST_2x(fsub, Fsub, St, St) // FPU
1130 ASMJIT_INST_1x(fsub, Fsub, Mem) // FPU
1131 ASMJIT_INST_1x(fsubp, Fsubp, St) // FPU
1132 ASMJIT_INST_0x(fsubp, Fsubp) // FPU
1133 ASMJIT_INST_2x(fsubr, Fsubr, St, St) // FPU
1134 ASMJIT_INST_1x(fsubr, Fsubr, Mem) // FPU
1135 ASMJIT_INST_1x(fsubrp, Fsubrp, St) // FPU
1136 ASMJIT_INST_0x(fsubrp, Fsubrp) // FPU
1137 ASMJIT_INST_0x(ftst, Ftst) // FPU
1138 ASMJIT_INST_1x(fucom, Fucom, St) // FPU
1139 ASMJIT_INST_0x(fucom, Fucom) // FPU
1140 ASMJIT_INST_1x(fucomi, Fucomi, St) // FPU
1141 ASMJIT_INST_1x(fucomip, Fucomip, St) // FPU
1142 ASMJIT_INST_1x(fucomp, Fucomp, St) // FPU
1143 ASMJIT_INST_0x(fucomp, Fucomp) // FPU
1144 ASMJIT_INST_0x(fucompp, Fucompp) // FPU
1145 ASMJIT_INST_0x(fwait, Fwait) // FPU
1146 ASMJIT_INST_0x(fxam, Fxam) // FPU
1147 ASMJIT_INST_1x(fxch, Fxch, St) // FPU
1148 ASMJIT_INST_0x(fxtract, Fxtract) // FPU
1149 ASMJIT_INST_0x(fyl2x, Fyl2x) // FPU
1150 ASMJIT_INST_0x(fyl2xp1, Fyl2xp1) // FPU
1151 ASMJIT_INST_1x(fstsw, Fstsw, Gp) // FPU
1152 ASMJIT_INST_1x(fstsw, Fstsw, Mem) // FPU
1153 ASMJIT_INST_1x(fnstsw, Fnstsw, Gp) // FPU
1154 ASMJIT_INST_1x(fnstsw, Fnstsw, Mem) // FPU
1155
1156 //! \}
1157
1158 //! \name MMX & SSE+ Instructions
1159 //! \{
1160
1161 ASMJIT_INST_2x(addpd, Addpd, Xmm, Xmm) // SSE2
1162 ASMJIT_INST_2x(addpd, Addpd, Xmm, Mem) // SSE2
1163 ASMJIT_INST_2x(addps, Addps, Xmm, Xmm) // SSE
1164 ASMJIT_INST_2x(addps, Addps, Xmm, Mem) // SSE
1165 ASMJIT_INST_2x(addsd, Addsd, Xmm, Xmm) // SSE2
1166 ASMJIT_INST_2x(addsd, Addsd, Xmm, Mem) // SSE2
1167 ASMJIT_INST_2x(addss, Addss, Xmm, Xmm) // SSE
1168 ASMJIT_INST_2x(addss, Addss, Xmm, Mem) // SSE
1169 ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Xmm) // SSE3
1170 ASMJIT_INST_2x(addsubpd, Addsubpd, Xmm, Mem) // SSE3
1171 ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Xmm) // SSE3
1172 ASMJIT_INST_2x(addsubps, Addsubps, Xmm, Mem) // SSE3
1173 ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Xmm) // SSE2
1174 ASMJIT_INST_2x(andnpd, Andnpd, Xmm, Mem) // SSE2
1175 ASMJIT_INST_2x(andnps, Andnps, Xmm, Xmm) // SSE
1176 ASMJIT_INST_2x(andnps, Andnps, Xmm, Mem) // SSE
1177 ASMJIT_INST_2x(andpd, Andpd, Xmm, Xmm) // SSE2
1178 ASMJIT_INST_2x(andpd, Andpd, Xmm, Mem) // SSE2
1179 ASMJIT_INST_2x(andps, Andps, Xmm, Xmm) // SSE
1180 ASMJIT_INST_2x(andps, Andps, Xmm, Mem) // SSE
1181 ASMJIT_INST_3i(blendpd, Blendpd, Xmm, Xmm, Imm) // SSE4_1
1182 ASMJIT_INST_3i(blendpd, Blendpd, Xmm, Mem, Imm) // SSE4_1
1183 ASMJIT_INST_3i(blendps, Blendps, Xmm, Xmm, Imm) // SSE4_1
1184 ASMJIT_INST_3i(blendps, Blendps, Xmm, Mem, Imm) // SSE4_1
1185 ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Xmm, XMM0) // SSE4_1 [EXPLICIT]
1186 ASMJIT_INST_3x(blendvpd, Blendvpd, Xmm, Mem, XMM0) // SSE4_1 [EXPLICIT]
1187 ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Xmm, XMM0) // SSE4_1 [EXPLICIT]
1188 ASMJIT_INST_3x(blendvps, Blendvps, Xmm, Mem, XMM0) // SSE4_1 [EXPLICIT]
1189 ASMJIT_INST_3i(cmppd, Cmppd, Xmm, Xmm, Imm) // SSE2
1190 ASMJIT_INST_3i(cmppd, Cmppd, Xmm, Mem, Imm) // SSE2
1191 ASMJIT_INST_3i(cmpps, Cmpps, Xmm, Xmm, Imm) // SSE
1192 ASMJIT_INST_3i(cmpps, Cmpps, Xmm, Mem, Imm) // SSE
1193 ASMJIT_INST_3i(cmpsd, Cmpsd, Xmm, Xmm, Imm) // SSE2
1194 ASMJIT_INST_3i(cmpsd, Cmpsd, Xmm, Mem, Imm) // SSE2
1195 ASMJIT_INST_3i(cmpss, Cmpss, Xmm, Xmm, Imm) // SSE
1196 ASMJIT_INST_3i(cmpss, Cmpss, Xmm, Mem, Imm) // SSE
1197 ASMJIT_INST_2x(comisd, Comisd, Xmm, Xmm) // SSE2
1198 ASMJIT_INST_2x(comisd, Comisd, Xmm, Mem) // SSE2
1199 ASMJIT_INST_2x(comiss, Comiss, Xmm, Xmm) // SSE
1200 ASMJIT_INST_2x(comiss, Comiss, Xmm, Mem) // SSE
1201 ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Xmm) // SSE2
1202 ASMJIT_INST_2x(cvtdq2pd, Cvtdq2pd, Xmm, Mem) // SSE2
1203 ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Xmm) // SSE2
1204 ASMJIT_INST_2x(cvtdq2ps, Cvtdq2ps, Xmm, Mem) // SSE2
1205 ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Xmm) // SSE2
1206 ASMJIT_INST_2x(cvtpd2dq, Cvtpd2dq, Xmm, Mem) // SSE2
1207 ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Xmm) // SSE2
1208 ASMJIT_INST_2x(cvtpd2pi, Cvtpd2pi, Mm, Mem) // SSE2
1209 ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Xmm) // SSE2
1210 ASMJIT_INST_2x(cvtpd2ps, Cvtpd2ps, Xmm, Mem) // SSE2
1211 ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mm) // SSE2
1212 ASMJIT_INST_2x(cvtpi2pd, Cvtpi2pd, Xmm, Mem) // SSE2
1213 ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mm) // SSE
1214 ASMJIT_INST_2x(cvtpi2ps, Cvtpi2ps, Xmm, Mem) // SSE
1215 ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Xmm) // SSE2
1216 ASMJIT_INST_2x(cvtps2dq, Cvtps2dq, Xmm, Mem) // SSE2
1217 ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Xmm) // SSE2
1218 ASMJIT_INST_2x(cvtps2pd, Cvtps2pd, Xmm, Mem) // SSE2
1219 ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Xmm) // SSE
1220 ASMJIT_INST_2x(cvtps2pi, Cvtps2pi, Mm, Mem) // SSE
1221 ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Xmm) // SSE2
1222 ASMJIT_INST_2x(cvtsd2si, Cvtsd2si, Gp, Mem) // SSE2
1223 ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Xmm) // SSE2
1224 ASMJIT_INST_2x(cvtsd2ss, Cvtsd2ss, Xmm, Mem) // SSE2
1225 ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Gp) // SSE2
1226 ASMJIT_INST_2x(cvtsi2sd, Cvtsi2sd, Xmm, Mem) // SSE2
1227 ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Gp) // SSE
1228 ASMJIT_INST_2x(cvtsi2ss, Cvtsi2ss, Xmm, Mem) // SSE
1229 ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Xmm) // SSE2
1230 ASMJIT_INST_2x(cvtss2sd, Cvtss2sd, Xmm, Mem) // SSE2
1231 ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Xmm) // SSE
1232 ASMJIT_INST_2x(cvtss2si, Cvtss2si, Gp, Mem) // SSE
1233 ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Xmm) // SSE2
1234 ASMJIT_INST_2x(cvttpd2pi, Cvttpd2pi, Mm, Mem) // SSE2
1235 ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Xmm) // SSE2
1236 ASMJIT_INST_2x(cvttpd2dq, Cvttpd2dq, Xmm, Mem) // SSE2
1237 ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Xmm) // SSE2
1238 ASMJIT_INST_2x(cvttps2dq, Cvttps2dq, Xmm, Mem) // SSE2
1239 ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Xmm) // SSE
1240 ASMJIT_INST_2x(cvttps2pi, Cvttps2pi, Mm, Mem) // SSE
1241 ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Xmm) // SSE2
1242 ASMJIT_INST_2x(cvttsd2si, Cvttsd2si, Gp, Mem) // SSE2
1243 ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Xmm) // SSE
1244 ASMJIT_INST_2x(cvttss2si, Cvttss2si, Gp, Mem) // SSE
1245 ASMJIT_INST_2x(divpd, Divpd, Xmm, Xmm) // SSE2
1246 ASMJIT_INST_2x(divpd, Divpd, Xmm, Mem) // SSE2
1247 ASMJIT_INST_2x(divps, Divps, Xmm, Xmm) // SSE
1248 ASMJIT_INST_2x(divps, Divps, Xmm, Mem) // SSE
1249 ASMJIT_INST_2x(divsd, Divsd, Xmm, Xmm) // SSE2
1250 ASMJIT_INST_2x(divsd, Divsd, Xmm, Mem) // SSE2
1251 ASMJIT_INST_2x(divss, Divss, Xmm, Xmm) // SSE
1252 ASMJIT_INST_2x(divss, Divss, Xmm, Mem) // SSE
1253 ASMJIT_INST_3i(dppd, Dppd, Xmm, Xmm, Imm) // SSE4_1
1254 ASMJIT_INST_3i(dppd, Dppd, Xmm, Mem, Imm) // SSE4_1
1255 ASMJIT_INST_3i(dpps, Dpps, Xmm, Xmm, Imm) // SSE4_1
1256 ASMJIT_INST_3i(dpps, Dpps, Xmm, Mem, Imm) // SSE4_1
1257 ASMJIT_INST_3i(extractps, Extractps, Gp, Xmm, Imm) // SSE4_1
1258 ASMJIT_INST_3i(extractps, Extractps, Mem, Xmm, Imm) // SSE4_1
1259 ASMJIT_INST_2x(extrq, Extrq, Xmm, Xmm) // SSE4A
1260 ASMJIT_INST_3ii(extrq, Extrq, Xmm, Imm, Imm) // SSE4A
1261 ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Xmm, Imm) // GFNI
1262 ASMJIT_INST_3i(gf2p8affineinvqb, Gf2p8affineinvqb, Xmm, Mem, Imm) // GFNI
1263 ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Xmm, Imm) // GFNI
1264 ASMJIT_INST_3i(gf2p8affineqb, Gf2p8affineqb, Xmm, Mem, Imm) // GFNI
1265 ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Xmm) // GFNI
1266 ASMJIT_INST_2x(gf2p8mulb, Gf2p8mulb, Xmm, Mem) // GFNI
1267 ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Xmm) // SSE3
1268 ASMJIT_INST_2x(haddpd, Haddpd, Xmm, Mem) // SSE3
1269 ASMJIT_INST_2x(haddps, Haddps, Xmm, Xmm) // SSE3
1270 ASMJIT_INST_2x(haddps, Haddps, Xmm, Mem) // SSE3
1271 ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Xmm) // SSE3
1272 ASMJIT_INST_2x(hsubpd, Hsubpd, Xmm, Mem) // SSE3
1273 ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Xmm) // SSE3
1274 ASMJIT_INST_2x(hsubps, Hsubps, Xmm, Mem) // SSE3
1275 ASMJIT_INST_3i(insertps, Insertps, Xmm, Xmm, Imm) // SSE4_1
1276 ASMJIT_INST_3i(insertps, Insertps, Xmm, Mem, Imm) // SSE4_1
1277 ASMJIT_INST_2x(insertq, Insertq, Xmm, Xmm) // SSE4A
1278 ASMJIT_INST_4ii(insertq, Insertq, Xmm, Xmm, Imm, Imm) // SSE4A
1279 ASMJIT_INST_2x(lddqu, Lddqu, Xmm, Mem) // SSE3
1280 ASMJIT_INST_3x(maskmovq, Maskmovq, Mm, Mm, DS_ZDI) // SSE [EXPLICIT]
1281 ASMJIT_INST_3x(maskmovdqu, Maskmovdqu, Xmm, Xmm, DS_ZDI) // SSE2 [EXPLICIT]
1282 ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Xmm) // SSE2
1283 ASMJIT_INST_2x(maxpd, Maxpd, Xmm, Mem) // SSE2
1284 ASMJIT_INST_2x(maxps, Maxps, Xmm, Xmm) // SSE
1285 ASMJIT_INST_2x(maxps, Maxps, Xmm, Mem) // SSE
1286 ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Xmm) // SSE2
1287 ASMJIT_INST_2x(maxsd, Maxsd, Xmm, Mem) // SSE2
1288 ASMJIT_INST_2x(maxss, Maxss, Xmm, Xmm) // SSE
1289 ASMJIT_INST_2x(maxss, Maxss, Xmm, Mem) // SSE
1290 ASMJIT_INST_2x(minpd, Minpd, Xmm, Xmm) // SSE2
1291 ASMJIT_INST_2x(minpd, Minpd, Xmm, Mem) // SSE2
1292 ASMJIT_INST_2x(minps, Minps, Xmm, Xmm) // SSE
1293 ASMJIT_INST_2x(minps, Minps, Xmm, Mem) // SSE
1294 ASMJIT_INST_2x(minsd, Minsd, Xmm, Xmm) // SSE2
1295 ASMJIT_INST_2x(minsd, Minsd, Xmm, Mem) // SSE2
1296 ASMJIT_INST_2x(minss, Minss, Xmm, Xmm) // SSE
1297 ASMJIT_INST_2x(minss, Minss, Xmm, Mem) // SSE
1298 ASMJIT_INST_2x(movapd, Movapd, Xmm, Xmm) // SSE2
1299 ASMJIT_INST_2x(movapd, Movapd, Xmm, Mem) // SSE2
1300 ASMJIT_INST_2x(movapd, Movapd, Mem, Xmm) // SSE2
1301 ASMJIT_INST_2x(movaps, Movaps, Xmm, Xmm) // SSE
1302 ASMJIT_INST_2x(movaps, Movaps, Xmm, Mem) // SSE
1303 ASMJIT_INST_2x(movaps, Movaps, Mem, Xmm) // SSE
1304 ASMJIT_INST_2x(movd, Movd, Mem, Mm) // MMX
1305 ASMJIT_INST_2x(movd, Movd, Mem, Xmm) // SSE
1306 ASMJIT_INST_2x(movd, Movd, Gp, Mm) // MMX
1307 ASMJIT_INST_2x(movd, Movd, Gp, Xmm) // SSE
1308 ASMJIT_INST_2x(movd, Movd, Mm, Mem) // MMX
1309 ASMJIT_INST_2x(movd, Movd, Xmm, Mem) // SSE
1310 ASMJIT_INST_2x(movd, Movd, Mm, Gp) // MMX
1311 ASMJIT_INST_2x(movd, Movd, Xmm, Gp) // SSE
1312 ASMJIT_INST_2x(movddup, Movddup, Xmm, Xmm) // SSE3
1313 ASMJIT_INST_2x(movddup, Movddup, Xmm, Mem) // SSE3
1314 ASMJIT_INST_2x(movdq2q, Movdq2q, Mm, Xmm) // SSE2
1315 ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Xmm) // SSE2
1316 ASMJIT_INST_2x(movdqa, Movdqa, Xmm, Mem) // SSE2
1317 ASMJIT_INST_2x(movdqa, Movdqa, Mem, Xmm) // SSE2
1318 ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Xmm) // SSE2
1319 ASMJIT_INST_2x(movdqu, Movdqu, Xmm, Mem) // SSE2
1320 ASMJIT_INST_2x(movdqu, Movdqu, Mem, Xmm) // SSE2
1321 ASMJIT_INST_2x(movhlps, Movhlps, Xmm, Xmm) // SSE
1322 ASMJIT_INST_2x(movhpd, Movhpd, Xmm, Mem) // SSE2
1323 ASMJIT_INST_2x(movhpd, Movhpd, Mem, Xmm) // SSE2
1324 ASMJIT_INST_2x(movhps, Movhps, Xmm, Mem) // SSE
1325 ASMJIT_INST_2x(movhps, Movhps, Mem, Xmm) // SSE
1326 ASMJIT_INST_2x(movlhps, Movlhps, Xmm, Xmm) // SSE
1327 ASMJIT_INST_2x(movlpd, Movlpd, Xmm, Mem) // SSE2
1328 ASMJIT_INST_2x(movlpd, Movlpd, Mem, Xmm) // SSE2
1329 ASMJIT_INST_2x(movlps, Movlps, Xmm, Mem) // SSE
1330 ASMJIT_INST_2x(movlps, Movlps, Mem, Xmm) // SSE
1331 ASMJIT_INST_2x(movmskps, Movmskps, Gp, Xmm) // SSE2
1332 ASMJIT_INST_2x(movmskpd, Movmskpd, Gp, Xmm) // SSE2
1333 ASMJIT_INST_2x(movntdq, Movntdq, Mem, Xmm) // SSE2
1334 ASMJIT_INST_2x(movntdqa, Movntdqa, Xmm, Mem) // SSE4_1
1335 ASMJIT_INST_2x(movntpd, Movntpd, Mem, Xmm) // SSE2
1336 ASMJIT_INST_2x(movntps, Movntps, Mem, Xmm) // SSE
1337 ASMJIT_INST_2x(movntsd, Movntsd, Mem, Xmm) // SSE4A
1338 ASMJIT_INST_2x(movntss, Movntss, Mem, Xmm) // SSE4A
1339 ASMJIT_INST_2x(movntq, Movntq, Mem, Mm) // SSE
1340 ASMJIT_INST_2x(movq, Movq, Mm, Mm) // MMX
1341 ASMJIT_INST_2x(movq, Movq, Xmm, Xmm) // SSE
1342 ASMJIT_INST_2x(movq, Movq, Mem, Mm) // MMX
1343 ASMJIT_INST_2x(movq, Movq, Mem, Xmm) // SSE
1344 ASMJIT_INST_2x(movq, Movq, Mm, Mem) // MMX
1345 ASMJIT_INST_2x(movq, Movq, Xmm, Mem) // SSE
1346 ASMJIT_INST_2x(movq, Movq, Gp, Mm) // MMX
1347 ASMJIT_INST_2x(movq, Movq, Gp, Xmm) // SSE+X64.
1348 ASMJIT_INST_2x(movq, Movq, Mm, Gp) // MMX
1349 ASMJIT_INST_2x(movq, Movq, Xmm, Gp) // SSE+X64.
1350 ASMJIT_INST_2x(movq2dq, Movq2dq, Xmm, Mm) // SSE2
1351 ASMJIT_INST_2x(movsd, Movsd, Xmm, Xmm) // SSE2
1352 ASMJIT_INST_2x(movsd, Movsd, Xmm, Mem) // SSE2
1353 ASMJIT_INST_2x(movsd, Movsd, Mem, Xmm) // SSE2
1354 ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Xmm) // SSE3
1355 ASMJIT_INST_2x(movshdup, Movshdup, Xmm, Mem) // SSE3
1356 ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Xmm) // SSE3
1357 ASMJIT_INST_2x(movsldup, Movsldup, Xmm, Mem) // SSE3
1358 ASMJIT_INST_2x(movss, Movss, Xmm, Xmm) // SSE
1359 ASMJIT_INST_2x(movss, Movss, Xmm, Mem) // SSE
1360 ASMJIT_INST_2x(movss, Movss, Mem, Xmm) // SSE
1361 ASMJIT_INST_2x(movupd, Movupd, Xmm, Xmm) // SSE2
1362 ASMJIT_INST_2x(movupd, Movupd, Xmm, Mem) // SSE2
1363 ASMJIT_INST_2x(movupd, Movupd, Mem, Xmm) // SSE2
1364 ASMJIT_INST_2x(movups, Movups, Xmm, Xmm) // SSE
1365 ASMJIT_INST_2x(movups, Movups, Xmm, Mem) // SSE
1366 ASMJIT_INST_2x(movups, Movups, Mem, Xmm) // SSE
1367 ASMJIT_INST_3i(mpsadbw, Mpsadbw, Xmm, Xmm, Imm) // SSE4_1
1368 ASMJIT_INST_3i(mpsadbw, Mpsadbw, Xmm, Mem, Imm) // SSE4_1
1369 ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Xmm) // SSE2
1370 ASMJIT_INST_2x(mulpd, Mulpd, Xmm, Mem) // SSE2
1371 ASMJIT_INST_2x(mulps, Mulps, Xmm, Xmm) // SSE
1372 ASMJIT_INST_2x(mulps, Mulps, Xmm, Mem) // SSE
1373 ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Xmm) // SSE2
1374 ASMJIT_INST_2x(mulsd, Mulsd, Xmm, Mem) // SSE2
1375 ASMJIT_INST_2x(mulss, Mulss, Xmm, Xmm) // SSE
1376 ASMJIT_INST_2x(mulss, Mulss, Xmm, Mem) // SSE
1377 ASMJIT_INST_2x(orpd, Orpd, Xmm, Xmm) // SSE2
1378 ASMJIT_INST_2x(orpd, Orpd, Xmm, Mem) // SSE2
1379 ASMJIT_INST_2x(orps, Orps, Xmm, Xmm) // SSE
1380 ASMJIT_INST_2x(orps, Orps, Xmm, Mem) // SSE
1381 ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mm) // MMX
1382 ASMJIT_INST_2x(packssdw, Packssdw, Mm, Mem) // MMX
1383 ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Xmm) // SSE2
1384 ASMJIT_INST_2x(packssdw, Packssdw, Xmm, Mem) // SSE2
1385 ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mm) // MMX
1386 ASMJIT_INST_2x(packsswb, Packsswb, Mm, Mem) // MMX
1387 ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Xmm) // SSE2
1388 ASMJIT_INST_2x(packsswb, Packsswb, Xmm, Mem) // SSE2
1389 ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Xmm) // SSE4_1
1390 ASMJIT_INST_2x(packusdw, Packusdw, Xmm, Mem) // SSE4_1
1391 ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mm) // MMX
1392 ASMJIT_INST_2x(packuswb, Packuswb, Mm, Mem) // MMX
1393 ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Xmm) // SSE2
1394 ASMJIT_INST_2x(packuswb, Packuswb, Xmm, Mem) // SSE2
1395 ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mm) // SSSE3
1396 ASMJIT_INST_2x(pabsb, Pabsb, Mm, Mem) // SSSE3
1397 ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Xmm) // SSSE3
1398 ASMJIT_INST_2x(pabsb, Pabsb, Xmm, Mem) // SSSE3
1399 ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mm) // SSSE3
1400 ASMJIT_INST_2x(pabsd, Pabsd, Mm, Mem) // SSSE3
1401 ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Xmm) // SSSE3
1402 ASMJIT_INST_2x(pabsd, Pabsd, Xmm, Mem) // SSSE3
1403 ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mm) // SSSE3
1404 ASMJIT_INST_2x(pabsw, Pabsw, Mm, Mem) // SSSE3
1405 ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Xmm) // SSSE3
1406 ASMJIT_INST_2x(pabsw, Pabsw, Xmm, Mem) // SSSE3
1407 ASMJIT_INST_2x(paddb, Paddb, Mm, Mm) // MMX
1408 ASMJIT_INST_2x(paddb, Paddb, Mm, Mem) // MMX
1409 ASMJIT_INST_2x(paddb, Paddb, Xmm, Xmm) // SSE2
1410 ASMJIT_INST_2x(paddb, Paddb, Xmm, Mem) // SSE2
1411 ASMJIT_INST_2x(paddd, Paddd, Mm, Mm) // MMX
1412 ASMJIT_INST_2x(paddd, Paddd, Mm, Mem) // MMX
1413 ASMJIT_INST_2x(paddd, Paddd, Xmm, Xmm) // SSE2
1414 ASMJIT_INST_2x(paddd, Paddd, Xmm, Mem) // SSE2
1415 ASMJIT_INST_2x(paddq, Paddq, Mm, Mm) // SSE2
1416 ASMJIT_INST_2x(paddq, Paddq, Mm, Mem) // SSE2
1417 ASMJIT_INST_2x(paddq, Paddq, Xmm, Xmm) // SSE2
1418 ASMJIT_INST_2x(paddq, Paddq, Xmm, Mem) // SSE2
1419 ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mm) // MMX
1420 ASMJIT_INST_2x(paddsb, Paddsb, Mm, Mem) // MMX
1421 ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Xmm) // SSE2
1422 ASMJIT_INST_2x(paddsb, Paddsb, Xmm, Mem) // SSE2
1423 ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mm) // MMX
1424 ASMJIT_INST_2x(paddsw, Paddsw, Mm, Mem) // MMX
1425 ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Xmm) // SSE2
1426 ASMJIT_INST_2x(paddsw, Paddsw, Xmm, Mem) // SSE2
1427 ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mm) // MMX
1428 ASMJIT_INST_2x(paddusb, Paddusb, Mm, Mem) // MMX
1429 ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Xmm) // SSE2
1430 ASMJIT_INST_2x(paddusb, Paddusb, Xmm, Mem) // SSE2
1431 ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mm) // MMX
1432 ASMJIT_INST_2x(paddusw, Paddusw, Mm, Mem) // MMX
1433 ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Xmm) // SSE2
1434 ASMJIT_INST_2x(paddusw, Paddusw, Xmm, Mem) // SSE2
1435 ASMJIT_INST_2x(paddw, Paddw, Mm, Mm) // MMX
1436 ASMJIT_INST_2x(paddw, Paddw, Mm, Mem) // MMX
1437 ASMJIT_INST_2x(paddw, Paddw, Xmm, Xmm) // SSE2
1438 ASMJIT_INST_2x(paddw, Paddw, Xmm, Mem) // SSE2
1439 ASMJIT_INST_3i(palignr, Palignr, Mm, Mm, Imm) // SSSE3
1440 ASMJIT_INST_3i(palignr, Palignr, Mm, Mem, Imm) // SSSE3
1441 ASMJIT_INST_3i(palignr, Palignr, Xmm, Xmm, Imm) // SSSE3
1442 ASMJIT_INST_3i(palignr, Palignr, Xmm, Mem, Imm) // SSSE3
1443 ASMJIT_INST_2x(pand, Pand, Mm, Mm) // MMX
1444 ASMJIT_INST_2x(pand, Pand, Mm, Mem) // MMX
1445 ASMJIT_INST_2x(pand, Pand, Xmm, Xmm) // SSE2
1446 ASMJIT_INST_2x(pand, Pand, Xmm, Mem) // SSE2
1447 ASMJIT_INST_2x(pandn, Pandn, Mm, Mm) // MMX
1448 ASMJIT_INST_2x(pandn, Pandn, Mm, Mem) // MMX
1449 ASMJIT_INST_2x(pandn, Pandn, Xmm, Xmm) // SSE2
1450 ASMJIT_INST_2x(pandn, Pandn, Xmm, Mem) // SSE2
1451 ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mm) // SSE
1452 ASMJIT_INST_2x(pavgb, Pavgb, Mm, Mem) // SSE
1453 ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Xmm) // SSE2
1454 ASMJIT_INST_2x(pavgb, Pavgb, Xmm, Mem) // SSE2
1455 ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mm) // SSE
1456 ASMJIT_INST_2x(pavgw, Pavgw, Mm, Mem) // SSE
1457 ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Xmm) // SSE2
1458 ASMJIT_INST_2x(pavgw, Pavgw, Xmm, Mem) // SSE2
1459 ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Xmm, XMM0) // SSE4_1 [EXPLICIT]
1460 ASMJIT_INST_3x(pblendvb, Pblendvb, Xmm, Mem, XMM0) // SSE4_1 [EXPLICIT]
1461 ASMJIT_INST_3i(pblendw, Pblendw, Xmm, Xmm, Imm) // SSE4_1
1462 ASMJIT_INST_3i(pblendw, Pblendw, Xmm, Mem, Imm) // SSE4_1
1463 ASMJIT_INST_3i(pclmulqdq, Pclmulqdq, Xmm, Xmm, Imm) // PCLMULQDQ.
1464 ASMJIT_INST_3i(pclmulqdq, Pclmulqdq, Xmm, Mem, Imm) // PCLMULQDQ.
1465 ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Xmm, Imm, ECX, EAX, EDX) // SSE4_2 [EXPLICIT]
1466 ASMJIT_INST_6x(pcmpestri, Pcmpestri, Xmm, Mem, Imm, ECX, EAX, EDX) // SSE4_2 [EXPLICIT]
1467 ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm, XMM0, EAX, EDX) // SSE4_2 [EXPLICIT]
1468 ASMJIT_INST_6x(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm, XMM0, EAX, EDX) // SSE4_2 [EXPLICIT]
1469 ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mm) // MMX
1470 ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Mm, Mem) // MMX
1471 ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Xmm) // SSE2
1472 ASMJIT_INST_2x(pcmpeqb, Pcmpeqb, Xmm, Mem) // SSE2
1473 ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mm) // MMX
1474 ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Mm, Mem) // MMX
1475 ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Xmm) // SSE2
1476 ASMJIT_INST_2x(pcmpeqd, Pcmpeqd, Xmm, Mem) // SSE2
1477 ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Xmm) // SSE4_1
1478 ASMJIT_INST_2x(pcmpeqq, Pcmpeqq, Xmm, Mem) // SSE4_1
1479 ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mm) // MMX
1480 ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Mm, Mem) // MMX
1481 ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Xmm) // SSE2
1482 ASMJIT_INST_2x(pcmpeqw, Pcmpeqw, Xmm, Mem) // SSE2
1483 ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mm) // MMX
1484 ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Mm, Mem) // MMX
1485 ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Xmm) // SSE2
1486 ASMJIT_INST_2x(pcmpgtb, Pcmpgtb, Xmm, Mem) // SSE2
1487 ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mm) // MMX
1488 ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Mm, Mem) // MMX
1489 ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Xmm) // SSE2
1490 ASMJIT_INST_2x(pcmpgtd, Pcmpgtd, Xmm, Mem) // SSE2
1491 ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Xmm) // SSE4_2.
1492 ASMJIT_INST_2x(pcmpgtq, Pcmpgtq, Xmm, Mem) // SSE4_2.
1493 ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mm) // MMX
1494 ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Mm, Mem) // MMX
1495 ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Xmm) // SSE2
1496 ASMJIT_INST_2x(pcmpgtw, Pcmpgtw, Xmm, Mem) // SSE2
1497 ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Xmm, Imm, ECX) // SSE4_2 [EXPLICIT]
1498 ASMJIT_INST_4x(pcmpistri, Pcmpistri, Xmm, Mem, Imm, ECX) // SSE4_2 [EXPLICIT]
1499 ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm, XMM0) // SSE4_2 [EXPLICIT]
1500 ASMJIT_INST_4x(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm, XMM0) // SSE4_2 [EXPLICIT]
1501 ASMJIT_INST_3i(pextrb, Pextrb, Gp, Xmm, Imm) // SSE4_1
1502 ASMJIT_INST_3i(pextrb, Pextrb, Mem, Xmm, Imm) // SSE4_1
1503 ASMJIT_INST_3i(pextrd, Pextrd, Gp, Xmm, Imm) // SSE4_1
1504 ASMJIT_INST_3i(pextrd, Pextrd, Mem, Xmm, Imm) // SSE4_1
1505 ASMJIT_INST_3i(pextrq, Pextrq, Gp, Xmm, Imm) // SSE4_1
1506 ASMJIT_INST_3i(pextrq, Pextrq, Mem, Xmm, Imm) // SSE4_1
1507 ASMJIT_INST_3i(pextrw, Pextrw, Gp, Mm, Imm) // SSE
1508 ASMJIT_INST_3i(pextrw, Pextrw, Gp, Xmm, Imm) // SSE2
1509 ASMJIT_INST_3i(pextrw, Pextrw, Mem, Xmm, Imm) // SSE4_1
1510 ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mm) // SSSE3
1511 ASMJIT_INST_2x(phaddd, Phaddd, Mm, Mem) // SSSE3
1512 ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Xmm) // SSSE3
1513 ASMJIT_INST_2x(phaddd, Phaddd, Xmm, Mem) // SSSE3
1514 ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mm) // SSSE3
1515 ASMJIT_INST_2x(phaddsw, Phaddsw, Mm, Mem) // SSSE3
1516 ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Xmm) // SSSE3
1517 ASMJIT_INST_2x(phaddsw, Phaddsw, Xmm, Mem) // SSSE3
1518 ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mm) // SSSE3
1519 ASMJIT_INST_2x(phaddw, Phaddw, Mm, Mem) // SSSE3
1520 ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Xmm) // SSSE3
1521 ASMJIT_INST_2x(phaddw, Phaddw, Xmm, Mem) // SSSE3
1522 ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Xmm) // SSE4_1
1523 ASMJIT_INST_2x(phminposuw, Phminposuw, Xmm, Mem) // SSE4_1
1524 ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mm) // SSSE3
1525 ASMJIT_INST_2x(phsubd, Phsubd, Mm, Mem) // SSSE3
1526 ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Xmm) // SSSE3
1527 ASMJIT_INST_2x(phsubd, Phsubd, Xmm, Mem) // SSSE3
1528 ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mm) // SSSE3
1529 ASMJIT_INST_2x(phsubsw, Phsubsw, Mm, Mem) // SSSE3
1530 ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Xmm) // SSSE3
1531 ASMJIT_INST_2x(phsubsw, Phsubsw, Xmm, Mem) // SSSE3
1532 ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mm) // SSSE3
1533 ASMJIT_INST_2x(phsubw, Phsubw, Mm, Mem) // SSSE3
1534 ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Xmm) // SSSE3
1535 ASMJIT_INST_2x(phsubw, Phsubw, Xmm, Mem) // SSSE3
1536 ASMJIT_INST_3i(pinsrb, Pinsrb, Xmm, Gp, Imm) // SSE4_1
1537 ASMJIT_INST_3i(pinsrb, Pinsrb, Xmm, Mem, Imm) // SSE4_1
1538 ASMJIT_INST_3i(pinsrd, Pinsrd, Xmm, Gp, Imm) // SSE4_1
1539 ASMJIT_INST_3i(pinsrd, Pinsrd, Xmm, Mem, Imm) // SSE4_1
1540 ASMJIT_INST_3i(pinsrq, Pinsrq, Xmm, Gp, Imm) // SSE4_1
1541 ASMJIT_INST_3i(pinsrq, Pinsrq, Xmm, Mem, Imm) // SSE4_1
1542 ASMJIT_INST_3i(pinsrw, Pinsrw, Mm, Gp, Imm) // SSE
1543 ASMJIT_INST_3i(pinsrw, Pinsrw, Mm, Mem, Imm) // SSE
1544 ASMJIT_INST_3i(pinsrw, Pinsrw, Xmm, Gp, Imm) // SSE2
1545 ASMJIT_INST_3i(pinsrw, Pinsrw, Xmm, Mem, Imm) // SSE2
1546 ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mm) // SSSE3
1547 ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Mm, Mem) // SSSE3
1548 ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Xmm) // SSSE3
1549 ASMJIT_INST_2x(pmaddubsw, Pmaddubsw, Xmm, Mem) // SSSE3
1550 ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mm) // MMX
1551 ASMJIT_INST_2x(pmaddwd, Pmaddwd, Mm, Mem) // MMX
1552 ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Xmm) // SSE2
1553 ASMJIT_INST_2x(pmaddwd, Pmaddwd, Xmm, Mem) // SSE2
1554 ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Xmm) // SSE4_1
1555 ASMJIT_INST_2x(pmaxsb, Pmaxsb, Xmm, Mem) // SSE4_1
1556 ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Xmm) // SSE4_1
1557 ASMJIT_INST_2x(pmaxsd, Pmaxsd, Xmm, Mem) // SSE4_1
1558 ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mm) // SSE
1559 ASMJIT_INST_2x(pmaxsw, Pmaxsw, Mm, Mem) // SSE
1560 ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Xmm) // SSE2
1561 ASMJIT_INST_2x(pmaxsw, Pmaxsw, Xmm, Mem) // SSE2
1562 ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mm) // SSE
1563 ASMJIT_INST_2x(pmaxub, Pmaxub, Mm, Mem) // SSE
1564 ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Xmm) // SSE2
1565 ASMJIT_INST_2x(pmaxub, Pmaxub, Xmm, Mem) // SSE2
1566 ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Xmm) // SSE4_1
1567 ASMJIT_INST_2x(pmaxud, Pmaxud, Xmm, Mem) // SSE4_1
1568 ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Xmm) // SSE4_1
1569 ASMJIT_INST_2x(pmaxuw, Pmaxuw, Xmm, Mem) // SSE4_1
1570 ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Xmm) // SSE4_1
1571 ASMJIT_INST_2x(pminsb, Pminsb, Xmm, Mem) // SSE4_1
1572 ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Xmm) // SSE4_1
1573 ASMJIT_INST_2x(pminsd, Pminsd, Xmm, Mem) // SSE4_1
1574 ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mm) // SSE
1575 ASMJIT_INST_2x(pminsw, Pminsw, Mm, Mem) // SSE
1576 ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Xmm) // SSE2
1577 ASMJIT_INST_2x(pminsw, Pminsw, Xmm, Mem) // SSE2
1578 ASMJIT_INST_2x(pminub, Pminub, Mm, Mm) // SSE
1579 ASMJIT_INST_2x(pminub, Pminub, Mm, Mem) // SSE
1580 ASMJIT_INST_2x(pminub, Pminub, Xmm, Xmm) // SSE2
1581 ASMJIT_INST_2x(pminub, Pminub, Xmm, Mem) // SSE2
1582 ASMJIT_INST_2x(pminud, Pminud, Xmm, Xmm) // SSE4_1
1583 ASMJIT_INST_2x(pminud, Pminud, Xmm, Mem) // SSE4_1
1584 ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Xmm) // SSE4_1
1585 ASMJIT_INST_2x(pminuw, Pminuw, Xmm, Mem) // SSE4_1
1586 ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Mm) // SSE
1587 ASMJIT_INST_2x(pmovmskb, Pmovmskb, Gp, Xmm) // SSE2
1588 ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Xmm) // SSE4_1
1589 ASMJIT_INST_2x(pmovsxbd, Pmovsxbd, Xmm, Mem) // SSE4_1
1590 ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Xmm) // SSE4_1
1591 ASMJIT_INST_2x(pmovsxbq, Pmovsxbq, Xmm, Mem) // SSE4_1
1592 ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Xmm) // SSE4_1
1593 ASMJIT_INST_2x(pmovsxbw, Pmovsxbw, Xmm, Mem) // SSE4_1
1594 ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Xmm) // SSE4_1
1595 ASMJIT_INST_2x(pmovsxdq, Pmovsxdq, Xmm, Mem) // SSE4_1
1596 ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Xmm) // SSE4_1
1597 ASMJIT_INST_2x(pmovsxwd, Pmovsxwd, Xmm, Mem) // SSE4_1
1598 ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Xmm) // SSE4_1
1599 ASMJIT_INST_2x(pmovsxwq, Pmovsxwq, Xmm, Mem) // SSE4_1
1600 ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Xmm) // SSE4_1
1601 ASMJIT_INST_2x(pmovzxbd, Pmovzxbd, Xmm, Mem) // SSE4_1
1602 ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Xmm) // SSE4_1
1603 ASMJIT_INST_2x(pmovzxbq, Pmovzxbq, Xmm, Mem) // SSE4_1
1604 ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Xmm) // SSE4_1
1605 ASMJIT_INST_2x(pmovzxbw, Pmovzxbw, Xmm, Mem) // SSE4_1
1606 ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Xmm) // SSE4_1
1607 ASMJIT_INST_2x(pmovzxdq, Pmovzxdq, Xmm, Mem) // SSE4_1
1608 ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Xmm) // SSE4_1
1609 ASMJIT_INST_2x(pmovzxwd, Pmovzxwd, Xmm, Mem) // SSE4_1
1610 ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Xmm) // SSE4_1
1611 ASMJIT_INST_2x(pmovzxwq, Pmovzxwq, Xmm, Mem) // SSE4_1
1612 ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Xmm) // SSE4_1
1613 ASMJIT_INST_2x(pmuldq, Pmuldq, Xmm, Mem) // SSE4_1
1614 ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mm) // SSSE3
1615 ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Mm, Mem) // SSSE3
1616 ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Xmm) // SSSE3
1617 ASMJIT_INST_2x(pmulhrsw, Pmulhrsw, Xmm, Mem) // SSSE3
1618 ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mm) // MMX
1619 ASMJIT_INST_2x(pmulhw, Pmulhw, Mm, Mem) // MMX
1620 ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Xmm) // SSE2
1621 ASMJIT_INST_2x(pmulhw, Pmulhw, Xmm, Mem) // SSE2
1622 ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mm) // SSE
1623 ASMJIT_INST_2x(pmulhuw, Pmulhuw, Mm, Mem) // SSE
1624 ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Xmm) // SSE2
1625 ASMJIT_INST_2x(pmulhuw, Pmulhuw, Xmm, Mem) // SSE2
1626 ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Xmm) // SSE4_1
1627 ASMJIT_INST_2x(pmulld, Pmulld, Xmm, Mem) // SSE4_1
1628 ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mm) // MMX
1629 ASMJIT_INST_2x(pmullw, Pmullw, Mm, Mem) // MMX
1630 ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Xmm) // SSE2
1631 ASMJIT_INST_2x(pmullw, Pmullw, Xmm, Mem) // SSE2
1632 ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mm) // SSE2
1633 ASMJIT_INST_2x(pmuludq, Pmuludq, Mm, Mem) // SSE2
1634 ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Xmm) // SSE2
1635 ASMJIT_INST_2x(pmuludq, Pmuludq, Xmm, Mem) // SSE2
1636 ASMJIT_INST_2x(por, Por, Mm, Mm) // MMX
1637 ASMJIT_INST_2x(por, Por, Mm, Mem) // MMX
1638 ASMJIT_INST_2x(por, Por, Xmm, Xmm) // SSE2
1639 ASMJIT_INST_2x(por, Por, Xmm, Mem) // SSE2
1640 ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mm) // SSE
1641 ASMJIT_INST_2x(psadbw, Psadbw, Mm, Mem) // SSE
1642 ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Xmm) // SSE
1643 ASMJIT_INST_2x(psadbw, Psadbw, Xmm, Mem) // SSE
1644 ASMJIT_INST_2x(pslld, Pslld, Mm, Mm) // MMX
1645 ASMJIT_INST_2x(pslld, Pslld, Mm, Mem) // MMX
1646 ASMJIT_INST_2i(pslld, Pslld, Mm, Imm) // MMX
1647 ASMJIT_INST_2x(pslld, Pslld, Xmm, Xmm) // SSE2
1648 ASMJIT_INST_2x(pslld, Pslld, Xmm, Mem) // SSE2
1649 ASMJIT_INST_2i(pslld, Pslld, Xmm, Imm) // SSE2
1650 ASMJIT_INST_2i(pslldq, Pslldq, Xmm, Imm) // SSE2
1651 ASMJIT_INST_2x(psllq, Psllq, Mm, Mm) // MMX
1652 ASMJIT_INST_2x(psllq, Psllq, Mm, Mem) // MMX
1653 ASMJIT_INST_2i(psllq, Psllq, Mm, Imm) // MMX
1654 ASMJIT_INST_2x(psllq, Psllq, Xmm, Xmm) // SSE2
1655 ASMJIT_INST_2x(psllq, Psllq, Xmm, Mem) // SSE2
1656 ASMJIT_INST_2i(psllq, Psllq, Xmm, Imm) // SSE2
1657 ASMJIT_INST_2x(psllw, Psllw, Mm, Mm) // MMX
1658 ASMJIT_INST_2x(psllw, Psllw, Mm, Mem) // MMX
1659 ASMJIT_INST_2i(psllw, Psllw, Mm, Imm) // MMX
1660 ASMJIT_INST_2x(psllw, Psllw, Xmm, Xmm) // SSE2
1661 ASMJIT_INST_2x(psllw, Psllw, Xmm, Mem) // SSE2
1662 ASMJIT_INST_2i(psllw, Psllw, Xmm, Imm) // SSE2
1663 ASMJIT_INST_2x(psrad, Psrad, Mm, Mm) // MMX
1664 ASMJIT_INST_2x(psrad, Psrad, Mm, Mem) // MMX
1665 ASMJIT_INST_2i(psrad, Psrad, Mm, Imm) // MMX
1666 ASMJIT_INST_2x(psrad, Psrad, Xmm, Xmm) // SSE2
1667 ASMJIT_INST_2x(psrad, Psrad, Xmm, Mem) // SSE2
1668 ASMJIT_INST_2i(psrad, Psrad, Xmm, Imm) // SSE2
1669 ASMJIT_INST_2x(psraw, Psraw, Mm, Mm) // MMX
1670 ASMJIT_INST_2x(psraw, Psraw, Mm, Mem) // MMX
1671 ASMJIT_INST_2i(psraw, Psraw, Mm, Imm) // MMX
1672 ASMJIT_INST_2x(psraw, Psraw, Xmm, Xmm) // SSE2
1673 ASMJIT_INST_2x(psraw, Psraw, Xmm, Mem) // SSE2
1674 ASMJIT_INST_2i(psraw, Psraw, Xmm, Imm) // SSE2
1675 ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mm) // SSSE3
1676 ASMJIT_INST_2x(pshufb, Pshufb, Mm, Mem) // SSSE3
1677 ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Xmm) // SSSE3
1678 ASMJIT_INST_2x(pshufb, Pshufb, Xmm, Mem) // SSSE3
1679 ASMJIT_INST_3i(pshufd, Pshufd, Xmm, Xmm, Imm) // SSE2
1680 ASMJIT_INST_3i(pshufd, Pshufd, Xmm, Mem, Imm) // SSE2
1681 ASMJIT_INST_3i(pshufhw, Pshufhw, Xmm, Xmm, Imm) // SSE2
1682 ASMJIT_INST_3i(pshufhw, Pshufhw, Xmm, Mem, Imm) // SSE2
1683 ASMJIT_INST_3i(pshuflw, Pshuflw, Xmm, Xmm, Imm) // SSE2
1684 ASMJIT_INST_3i(pshuflw, Pshuflw, Xmm, Mem, Imm) // SSE2
1685 ASMJIT_INST_3i(pshufw, Pshufw, Mm, Mm, Imm) // SSE
1686 ASMJIT_INST_3i(pshufw, Pshufw, Mm, Mem, Imm) // SSE
1687 ASMJIT_INST_2x(psignb, Psignb, Mm, Mm) // SSSE3
1688 ASMJIT_INST_2x(psignb, Psignb, Mm, Mem) // SSSE3
1689 ASMJIT_INST_2x(psignb, Psignb, Xmm, Xmm) // SSSE3
1690 ASMJIT_INST_2x(psignb, Psignb, Xmm, Mem) // SSSE3
1691 ASMJIT_INST_2x(psignd, Psignd, Mm, Mm) // SSSE3
1692 ASMJIT_INST_2x(psignd, Psignd, Mm, Mem) // SSSE3
1693 ASMJIT_INST_2x(psignd, Psignd, Xmm, Xmm) // SSSE3
1694 ASMJIT_INST_2x(psignd, Psignd, Xmm, Mem) // SSSE3
1695 ASMJIT_INST_2x(psignw, Psignw, Mm, Mm) // SSSE3
1696 ASMJIT_INST_2x(psignw, Psignw, Mm, Mem) // SSSE3
1697 ASMJIT_INST_2x(psignw, Psignw, Xmm, Xmm) // SSSE3
1698 ASMJIT_INST_2x(psignw, Psignw, Xmm, Mem) // SSSE3
1699 ASMJIT_INST_2x(psrld, Psrld, Mm, Mm) // MMX
1700 ASMJIT_INST_2x(psrld, Psrld, Mm, Mem) // MMX
1701 ASMJIT_INST_2i(psrld, Psrld, Mm, Imm) // MMX
1702 ASMJIT_INST_2x(psrld, Psrld, Xmm, Xmm) // SSE2
1703 ASMJIT_INST_2x(psrld, Psrld, Xmm, Mem) // SSE2
1704 ASMJIT_INST_2i(psrld, Psrld, Xmm, Imm) // SSE2
1705 ASMJIT_INST_2i(psrldq, Psrldq, Xmm, Imm) // SSE2
1706 ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mm) // MMX
1707 ASMJIT_INST_2x(psrlq, Psrlq, Mm, Mem) // MMX
1708 ASMJIT_INST_2i(psrlq, Psrlq, Mm, Imm) // MMX
1709 ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Xmm) // SSE2
1710 ASMJIT_INST_2x(psrlq, Psrlq, Xmm, Mem) // SSE2
1711 ASMJIT_INST_2i(psrlq, Psrlq, Xmm, Imm) // SSE2
1712 ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mm) // MMX
1713 ASMJIT_INST_2x(psrlw, Psrlw, Mm, Mem) // MMX
1714 ASMJIT_INST_2i(psrlw, Psrlw, Mm, Imm) // MMX
1715 ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Xmm) // SSE2
1716 ASMJIT_INST_2x(psrlw, Psrlw, Xmm, Mem) // SSE2
1717 ASMJIT_INST_2i(psrlw, Psrlw, Xmm, Imm) // SSE2
1718 ASMJIT_INST_2x(psubb, Psubb, Mm, Mm) // MMX
1719 ASMJIT_INST_2x(psubb, Psubb, Mm, Mem) // MMX
1720 ASMJIT_INST_2x(psubb, Psubb, Xmm, Xmm) // SSE2
1721 ASMJIT_INST_2x(psubb, Psubb, Xmm, Mem) // SSE2
1722 ASMJIT_INST_2x(psubd, Psubd, Mm, Mm) // MMX
1723 ASMJIT_INST_2x(psubd, Psubd, Mm, Mem) // MMX
1724 ASMJIT_INST_2x(psubd, Psubd, Xmm, Xmm) // SSE2
1725 ASMJIT_INST_2x(psubd, Psubd, Xmm, Mem) // SSE2
1726 ASMJIT_INST_2x(psubq, Psubq, Mm, Mm) // SSE2
1727 ASMJIT_INST_2x(psubq, Psubq, Mm, Mem) // SSE2
1728 ASMJIT_INST_2x(psubq, Psubq, Xmm, Xmm) // SSE2
1729 ASMJIT_INST_2x(psubq, Psubq, Xmm, Mem) // SSE2
1730 ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mm) // MMX
1731 ASMJIT_INST_2x(psubsb, Psubsb, Mm, Mem) // MMX
1732 ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Xmm) // SSE2
1733 ASMJIT_INST_2x(psubsb, Psubsb, Xmm, Mem) // SSE2
1734 ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mm) // MMX
1735 ASMJIT_INST_2x(psubsw, Psubsw, Mm, Mem) // MMX
1736 ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Xmm) // SSE2
1737 ASMJIT_INST_2x(psubsw, Psubsw, Xmm, Mem) // SSE2
1738 ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mm) // MMX
1739 ASMJIT_INST_2x(psubusb, Psubusb, Mm, Mem) // MMX
1740 ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Xmm) // SSE2
1741 ASMJIT_INST_2x(psubusb, Psubusb, Xmm, Mem) // SSE2
1742 ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mm) // MMX
1743 ASMJIT_INST_2x(psubusw, Psubusw, Mm, Mem) // MMX
1744 ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Xmm) // SSE2
1745 ASMJIT_INST_2x(psubusw, Psubusw, Xmm, Mem) // SSE2
1746 ASMJIT_INST_2x(psubw, Psubw, Mm, Mm) // MMX
1747 ASMJIT_INST_2x(psubw, Psubw, Mm, Mem) // MMX
1748 ASMJIT_INST_2x(psubw, Psubw, Xmm, Xmm) // SSE2
1749 ASMJIT_INST_2x(psubw, Psubw, Xmm, Mem) // SSE2
1750 ASMJIT_INST_2x(ptest, Ptest, Xmm, Xmm) // SSE4_1
1751 ASMJIT_INST_2x(ptest, Ptest, Xmm, Mem) // SSE4_1
1752 ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mm) // MMX
1753 ASMJIT_INST_2x(punpckhbw, Punpckhbw, Mm, Mem) // MMX
1754 ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Xmm) // SSE2
1755 ASMJIT_INST_2x(punpckhbw, Punpckhbw, Xmm, Mem) // SSE2
1756 ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mm) // MMX
1757 ASMJIT_INST_2x(punpckhdq, Punpckhdq, Mm, Mem) // MMX
1758 ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Xmm) // SSE2
1759 ASMJIT_INST_2x(punpckhdq, Punpckhdq, Xmm, Mem) // SSE2
1760 ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Xmm) // SSE2
1761 ASMJIT_INST_2x(punpckhqdq, Punpckhqdq, Xmm, Mem) // SSE2
1762 ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mm) // MMX
1763 ASMJIT_INST_2x(punpckhwd, Punpckhwd, Mm, Mem) // MMX
1764 ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Xmm) // SSE2
1765 ASMJIT_INST_2x(punpckhwd, Punpckhwd, Xmm, Mem) // SSE2
1766 ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mm) // MMX
1767 ASMJIT_INST_2x(punpcklbw, Punpcklbw, Mm, Mem) // MMX
1768 ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Xmm) // SSE2
1769 ASMJIT_INST_2x(punpcklbw, Punpcklbw, Xmm, Mem) // SSE2
1770 ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mm) // MMX
1771 ASMJIT_INST_2x(punpckldq, Punpckldq, Mm, Mem) // MMX
1772 ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Xmm) // SSE2
1773 ASMJIT_INST_2x(punpckldq, Punpckldq, Xmm, Mem) // SSE2
1774 ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Xmm) // SSE2
1775 ASMJIT_INST_2x(punpcklqdq, Punpcklqdq, Xmm, Mem) // SSE2
1776 ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mm) // MMX
1777 ASMJIT_INST_2x(punpcklwd, Punpcklwd, Mm, Mem) // MMX
1778 ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Xmm) // SSE2
1779 ASMJIT_INST_2x(punpcklwd, Punpcklwd, Xmm, Mem) // SSE2
1780 ASMJIT_INST_2x(pxor, Pxor, Mm, Mm) // MMX
1781 ASMJIT_INST_2x(pxor, Pxor, Mm, Mem) // MMX
1782 ASMJIT_INST_2x(pxor, Pxor, Xmm, Xmm) // SSE2
1783 ASMJIT_INST_2x(pxor, Pxor, Xmm, Mem) // SSE2
1784 ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Xmm) // SSE
1785 ASMJIT_INST_2x(rcpps, Rcpps, Xmm, Mem) // SSE
1786 ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Xmm) // SSE
1787 ASMJIT_INST_2x(rcpss, Rcpss, Xmm, Mem) // SSE
1788 ASMJIT_INST_3i(roundpd, Roundpd, Xmm, Xmm, Imm) // SSE4_1
1789 ASMJIT_INST_3i(roundpd, Roundpd, Xmm, Mem, Imm) // SSE4_1
1790 ASMJIT_INST_3i(roundps, Roundps, Xmm, Xmm, Imm) // SSE4_1
1791 ASMJIT_INST_3i(roundps, Roundps, Xmm, Mem, Imm) // SSE4_1
1792 ASMJIT_INST_3i(roundsd, Roundsd, Xmm, Xmm, Imm) // SSE4_1
1793 ASMJIT_INST_3i(roundsd, Roundsd, Xmm, Mem, Imm) // SSE4_1
1794 ASMJIT_INST_3i(roundss, Roundss, Xmm, Xmm, Imm) // SSE4_1
1795 ASMJIT_INST_3i(roundss, Roundss, Xmm, Mem, Imm) // SSE4_1
1796 ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Xmm) // SSE
1797 ASMJIT_INST_2x(rsqrtps, Rsqrtps, Xmm, Mem) // SSE
1798 ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Xmm) // SSE
1799 ASMJIT_INST_2x(rsqrtss, Rsqrtss, Xmm, Mem) // SSE
1800 ASMJIT_INST_3i(shufpd, Shufpd, Xmm, Xmm, Imm) // SSE2
1801 ASMJIT_INST_3i(shufpd, Shufpd, Xmm, Mem, Imm) // SSE2
1802 ASMJIT_INST_3i(shufps, Shufps, Xmm, Xmm, Imm) // SSE
1803 ASMJIT_INST_3i(shufps, Shufps, Xmm, Mem, Imm) // SSE
1804 ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Xmm) // SSE2
1805 ASMJIT_INST_2x(sqrtpd, Sqrtpd, Xmm, Mem) // SSE2
1806 ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Xmm) // SSE
1807 ASMJIT_INST_2x(sqrtps, Sqrtps, Xmm, Mem) // SSE
1808 ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Xmm) // SSE2
1809 ASMJIT_INST_2x(sqrtsd, Sqrtsd, Xmm, Mem) // SSE2
1810 ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Xmm) // SSE
1811 ASMJIT_INST_2x(sqrtss, Sqrtss, Xmm, Mem) // SSE
1812 ASMJIT_INST_2x(subpd, Subpd, Xmm, Xmm) // SSE2
1813 ASMJIT_INST_2x(subpd, Subpd, Xmm, Mem) // SSE2
1814 ASMJIT_INST_2x(subps, Subps, Xmm, Xmm) // SSE
1815 ASMJIT_INST_2x(subps, Subps, Xmm, Mem) // SSE
1816 ASMJIT_INST_2x(subsd, Subsd, Xmm, Xmm) // SSE2
1817 ASMJIT_INST_2x(subsd, Subsd, Xmm, Mem) // SSE2
1818 ASMJIT_INST_2x(subss, Subss, Xmm, Xmm) // SSE
1819 ASMJIT_INST_2x(subss, Subss, Xmm, Mem) // SSE
1820 ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Xmm) // SSE2
1821 ASMJIT_INST_2x(ucomisd, Ucomisd, Xmm, Mem) // SSE2
1822 ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Xmm) // SSE
1823 ASMJIT_INST_2x(ucomiss, Ucomiss, Xmm, Mem) // SSE
1824 ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Xmm) // SSE2
1825 ASMJIT_INST_2x(unpckhpd, Unpckhpd, Xmm, Mem) // SSE2
1826 ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Xmm) // SSE
1827 ASMJIT_INST_2x(unpckhps, Unpckhps, Xmm, Mem) // SSE
1828 ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Xmm) // SSE2
1829 ASMJIT_INST_2x(unpcklpd, Unpcklpd, Xmm, Mem) // SSE2
1830 ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Xmm) // SSE
1831 ASMJIT_INST_2x(unpcklps, Unpcklps, Xmm, Mem) // SSE
1832 ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Xmm) // SSE2
1833 ASMJIT_INST_2x(xorpd, Xorpd, Xmm, Mem) // SSE2
1834 ASMJIT_INST_2x(xorps, Xorps, Xmm, Xmm) // SSE
1835 ASMJIT_INST_2x(xorps, Xorps, Xmm, Mem) // SSE
1836
1837 //! \}
1838
1839 //! \name 3DNOW and GEODE Instructions (Deprecated)
1840 //! \{
1841
1842 ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mm) // 3DNOW
1843 ASMJIT_INST_2x(pavgusb, Pavgusb, Mm, Mem) // 3DNOW
1844 ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mm) // 3DNOW
1845 ASMJIT_INST_2x(pf2id, Pf2id, Mm, Mem) // 3DNOW
1846 ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mm) // 3DNOW
1847 ASMJIT_INST_2x(pf2iw, Pf2iw, Mm, Mem) // 3DNOW
1848 ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mm) // 3DNOW
1849 ASMJIT_INST_2x(pfacc, Pfacc, Mm, Mem) // 3DNOW
1850 ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mm) // 3DNOW
1851 ASMJIT_INST_2x(pfadd, Pfadd, Mm, Mem) // 3DNOW
1852 ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mm) // 3DNOW
1853 ASMJIT_INST_2x(pfcmpeq, Pfcmpeq, Mm, Mem) // 3DNOW
1854 ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mm) // 3DNOW
1855 ASMJIT_INST_2x(pfcmpge, Pfcmpge, Mm, Mem) // 3DNOW
1856 ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mm) // 3DNOW
1857 ASMJIT_INST_2x(pfcmpgt, Pfcmpgt, Mm, Mem) // 3DNOW
1858 ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mm) // 3DNOW
1859 ASMJIT_INST_2x(pfmax, Pfmax, Mm, Mem) // 3DNOW
1860 ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mm) // 3DNOW
1861 ASMJIT_INST_2x(pfmin, Pfmin, Mm, Mem) // 3DNOW
1862 ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mm) // 3DNOW
1863 ASMJIT_INST_2x(pfmul, Pfmul, Mm, Mem) // 3DNOW
1864 ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mm) // 3DNOW
1865 ASMJIT_INST_2x(pfnacc, Pfnacc, Mm, Mem) // 3DNOW
1866 ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mm) // 3DNOW
1867 ASMJIT_INST_2x(pfpnacc, Pfpnacc, Mm, Mem) // 3DNOW
1868 ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mm) // 3DNOW
1869 ASMJIT_INST_2x(pfrcp, Pfrcp, Mm, Mem) // 3DNOW
1870 ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mm) // 3DNOW
1871 ASMJIT_INST_2x(pfrcpit1, Pfrcpit1, Mm, Mem) // 3DNOW
1872 ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mm) // 3DNOW
1873 ASMJIT_INST_2x(pfrcpit2, Pfrcpit2, Mm, Mem) // 3DNOW
1874 ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mm) // GEODE
1875 ASMJIT_INST_2x(pfrcpv, Pfrcpv, Mm, Mem) // GEODE
1876 ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mm) // 3DNOW
1877 ASMJIT_INST_2x(pfrsqit1, Pfrsqit1, Mm, Mem) // 3DNOW
1878 ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mm) // 3DNOW
1879 ASMJIT_INST_2x(pfrsqrt, Pfrsqrt, Mm, Mem) // 3DNOW
1880 ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mm) // GEODE
1881 ASMJIT_INST_2x(pfrsqrtv, Pfrsqrtv, Mm, Mem) // GEODE
1882 ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mm) // 3DNOW
1883 ASMJIT_INST_2x(pfsub, Pfsub, Mm, Mem) // 3DNOW
1884 ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mm) // 3DNOW
1885 ASMJIT_INST_2x(pfsubr, Pfsubr, Mm, Mem) // 3DNOW
1886 ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mm) // 3DNOW
1887 ASMJIT_INST_2x(pi2fd, Pi2fd, Mm, Mem) // 3DNOW
1888 ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mm) // 3DNOW
1889 ASMJIT_INST_2x(pi2fw, Pi2fw, Mm, Mem) // 3DNOW
1890 ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mm) // 3DNOW
1891 ASMJIT_INST_2x(pmulhrw, Pmulhrw, Mm, Mem) // 3DNOW
1892 ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mm) // 3DNOW
1893 ASMJIT_INST_2x(pswapd, Pswapd, Mm, Mem) // 3DNOW
1894 ASMJIT_INST_0x(femms, Femms) // 3DNOW
1895
1896 //! \}
1897
1898 //! \name AESNI Instructions
1899 //! \{
1900
1901 ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Xmm) // AESNI
1902 ASMJIT_INST_2x(aesdec, Aesdec, Xmm, Mem) // AESNI
1903 ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Xmm) // AESNI
1904 ASMJIT_INST_2x(aesdeclast, Aesdeclast, Xmm, Mem) // AESNI
1905 ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Xmm) // AESNI
1906 ASMJIT_INST_2x(aesenc, Aesenc, Xmm, Mem) // AESNI
1907 ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Xmm) // AESNI
1908 ASMJIT_INST_2x(aesenclast, Aesenclast, Xmm, Mem) // AESNI
1909 ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Xmm) // AESNI
1910 ASMJIT_INST_2x(aesimc, Aesimc, Xmm, Mem) // AESNI
1911 ASMJIT_INST_3i(aeskeygenassist, Aeskeygenassist, Xmm, Xmm, Imm) // AESNI
1912 ASMJIT_INST_3i(aeskeygenassist, Aeskeygenassist, Xmm, Mem, Imm) // AESNI
1913
1914 //! \}
1915
1916 //! \name SHA Instructions
1917 //! \{
1918
1919 ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Xmm) // SHA
1920 ASMJIT_INST_2x(sha1msg1, Sha1msg1, Xmm, Mem) // SHA
1921 ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Xmm) // SHA
1922 ASMJIT_INST_2x(sha1msg2, Sha1msg2, Xmm, Mem) // SHA
1923 ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Xmm) // SHA
1924 ASMJIT_INST_2x(sha1nexte, Sha1nexte, Xmm, Mem) // SHA
1925 ASMJIT_INST_3i(sha1rnds4, Sha1rnds4, Xmm, Xmm, Imm) // SHA
1926 ASMJIT_INST_3i(sha1rnds4, Sha1rnds4, Xmm, Mem, Imm) // SHA
1927 ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Xmm) // SHA
1928 ASMJIT_INST_2x(sha256msg1, Sha256msg1, Xmm, Mem) // SHA
1929 ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Xmm) // SHA
1930 ASMJIT_INST_2x(sha256msg2, Sha256msg2, Xmm, Mem) // SHA
1931 ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Xmm, XMM0) // SHA [EXPLICIT]
1932 ASMJIT_INST_3x(sha256rnds2, Sha256rnds2, Xmm, Mem, XMM0) // SHA [EXPLICIT]
1933
1934 //! \}
1935
1936 //! \name AVX, FMA, and AVX512 Instructions
1937 //! \{
1938
1939 ASMJIT_INST_3x(kaddb, Kaddb, KReg, KReg, KReg) // AVX512_DQ
1940 ASMJIT_INST_3x(kaddd, Kaddd, KReg, KReg, KReg) // AVX512_BW
1941 ASMJIT_INST_3x(kaddq, Kaddq, KReg, KReg, KReg) // AVX512_BW
1942 ASMJIT_INST_3x(kaddw, Kaddw, KReg, KReg, KReg) // AVX512_DQ
1943 ASMJIT_INST_3x(kandb, Kandb, KReg, KReg, KReg) // AVX512_DQ
1944 ASMJIT_INST_3x(kandd, Kandd, KReg, KReg, KReg) // AVX512_BW
1945 ASMJIT_INST_3x(kandnb, Kandnb, KReg, KReg, KReg) // AVX512_DQ
1946 ASMJIT_INST_3x(kandnd, Kandnd, KReg, KReg, KReg) // AVX512_BW
1947 ASMJIT_INST_3x(kandnq, Kandnq, KReg, KReg, KReg) // AVX512_BW
1948 ASMJIT_INST_3x(kandnw, Kandnw, KReg, KReg, KReg) // AVX512_F
1949 ASMJIT_INST_3x(kandq, Kandq, KReg, KReg, KReg) // AVX512_BW
1950 ASMJIT_INST_3x(kandw, Kandw, KReg, KReg, KReg) // AVX512_F
1951 ASMJIT_INST_2x(kmovb, Kmovb, KReg, KReg) // AVX512_DQ
1952 ASMJIT_INST_2x(kmovb, Kmovb, KReg, Mem) // AVX512_DQ
1953 ASMJIT_INST_2x(kmovb, Kmovb, KReg, Gp) // AVX512_DQ
1954 ASMJIT_INST_2x(kmovb, Kmovb, Mem, KReg) // AVX512_DQ
1955 ASMJIT_INST_2x(kmovb, Kmovb, Gp, KReg) // AVX512_DQ
1956 ASMJIT_INST_2x(kmovd, Kmovd, KReg, KReg) // AVX512_BW
1957 ASMJIT_INST_2x(kmovd, Kmovd, KReg, Mem) // AVX512_BW
1958 ASMJIT_INST_2x(kmovd, Kmovd, KReg, Gp) // AVX512_BW
1959 ASMJIT_INST_2x(kmovd, Kmovd, Mem, KReg) // AVX512_BW
1960 ASMJIT_INST_2x(kmovd, Kmovd, Gp, KReg) // AVX512_BW
1961 ASMJIT_INST_2x(kmovq, Kmovq, KReg, KReg) // AVX512_BW
1962 ASMJIT_INST_2x(kmovq, Kmovq, KReg, Mem) // AVX512_BW
1963 ASMJIT_INST_2x(kmovq, Kmovq, KReg, Gp) // AVX512_BW
1964 ASMJIT_INST_2x(kmovq, Kmovq, Mem, KReg) // AVX512_BW
1965 ASMJIT_INST_2x(kmovq, Kmovq, Gp, KReg) // AVX512_BW
1966 ASMJIT_INST_2x(kmovw, Kmovw, KReg, KReg) // AVX512_F
1967 ASMJIT_INST_2x(kmovw, Kmovw, KReg, Mem) // AVX512_F
1968 ASMJIT_INST_2x(kmovw, Kmovw, KReg, Gp) // AVX512_F
1969 ASMJIT_INST_2x(kmovw, Kmovw, Mem, KReg) // AVX512_F
1970 ASMJIT_INST_2x(kmovw, Kmovw, Gp, KReg) // AVX512_F
1971 ASMJIT_INST_2x(knotb, Knotb, KReg, KReg) // AVX512_DQ
1972 ASMJIT_INST_2x(knotd, Knotd, KReg, KReg) // AVX512_BW
1973 ASMJIT_INST_2x(knotq, Knotq, KReg, KReg) // AVX512_BW
1974 ASMJIT_INST_2x(knotw, Knotw, KReg, KReg) // AVX512_F
1975 ASMJIT_INST_3x(korb, Korb, KReg, KReg, KReg) // AVX512_DQ
1976 ASMJIT_INST_3x(kord, Kord, KReg, KReg, KReg) // AVX512_BW
1977 ASMJIT_INST_3x(korq, Korq, KReg, KReg, KReg) // AVX512_BW
1978 ASMJIT_INST_2x(kortestb, Kortestb, KReg, KReg) // AVX512_DQ
1979 ASMJIT_INST_2x(kortestd, Kortestd, KReg, KReg) // AVX512_BW
1980 ASMJIT_INST_2x(kortestq, Kortestq, KReg, KReg) // AVX512_BW
1981 ASMJIT_INST_2x(kortestw, Kortestw, KReg, KReg) // AVX512_F
1982 ASMJIT_INST_3x(korw, Korw, KReg, KReg, KReg) // AVX512_F
1983 ASMJIT_INST_3i(kshiftlb, Kshiftlb, KReg, KReg, Imm) // AVX512_DQ
1984 ASMJIT_INST_3i(kshiftld, Kshiftld, KReg, KReg, Imm) // AVX512_BW
1985 ASMJIT_INST_3i(kshiftlq, Kshiftlq, KReg, KReg, Imm) // AVX512_BW
1986 ASMJIT_INST_3i(kshiftlw, Kshiftlw, KReg, KReg, Imm) // AVX512_F
1987 ASMJIT_INST_3i(kshiftrb, Kshiftrb, KReg, KReg, Imm) // AVX512_DQ
1988 ASMJIT_INST_3i(kshiftrd, Kshiftrd, KReg, KReg, Imm) // AVX512_BW
1989 ASMJIT_INST_3i(kshiftrq, Kshiftrq, KReg, KReg, Imm) // AVX512_BW
1990 ASMJIT_INST_3i(kshiftrw, Kshiftrw, KReg, KReg, Imm) // AVX512_F
1991 ASMJIT_INST_2x(ktestb, Ktestb, KReg, KReg) // AVX512_DQ
1992 ASMJIT_INST_2x(ktestd, Ktestd, KReg, KReg) // AVX512_BW
1993 ASMJIT_INST_2x(ktestq, Ktestq, KReg, KReg) // AVX512_BW
1994 ASMJIT_INST_2x(ktestw, Ktestw, KReg, KReg) // AVX512_DQ
1995 ASMJIT_INST_3x(kunpckbw, Kunpckbw, KReg, KReg, KReg) // AVX512_F
1996 ASMJIT_INST_3x(kunpckdq, Kunpckdq, KReg, KReg, KReg) // AVX512_BW
1997 ASMJIT_INST_3x(kunpckwd, Kunpckwd, KReg, KReg, KReg) // AVX512_BW
1998 ASMJIT_INST_3x(kxnorb, Kxnorb, KReg, KReg, KReg) // AVX512_DQ
1999 ASMJIT_INST_3x(kxnord, Kxnord, KReg, KReg, KReg) // AVX512_BW
2000 ASMJIT_INST_3x(kxnorq, Kxnorq, KReg, KReg, KReg) // AVX512_BW
2001 ASMJIT_INST_3x(kxnorw, Kxnorw, KReg, KReg, KReg) // AVX512_F
2002 ASMJIT_INST_3x(kxorb, Kxorb, KReg, KReg, KReg) // AVX512_DQ
2003 ASMJIT_INST_3x(kxord, Kxord, KReg, KReg, KReg) // AVX512_BW
2004 ASMJIT_INST_3x(kxorq, Kxorq, KReg, KReg, KReg) // AVX512_BW
2005 ASMJIT_INST_3x(kxorw, Kxorw, KReg, KReg, KReg) // AVX512_F
2006 ASMJIT_INST_6x(v4fmaddps, V4fmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
2007 ASMJIT_INST_6x(v4fmaddss, V4fmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz}
2008 ASMJIT_INST_6x(v4fnmaddps, V4fnmaddps, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
2009 ASMJIT_INST_6x(v4fnmaddss, V4fnmaddss, Xmm, Xmm, Xmm, Xmm, Xmm, Mem) // AVX512_4FMAPS{kz}
2010 ASMJIT_INST_3x(vaddpd, Vaddpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2011 ASMJIT_INST_3x(vaddpd, Vaddpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2012 ASMJIT_INST_3x(vaddpd, Vaddpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
2013 ASMJIT_INST_3x(vaddpd, Vaddpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
2014 ASMJIT_INST_3x(vaddpd, Vaddpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64}
2015 ASMJIT_INST_3x(vaddpd, Vaddpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64}
2016 ASMJIT_INST_3x(vaddps, Vaddps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2017 ASMJIT_INST_3x(vaddps, Vaddps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2018 ASMJIT_INST_3x(vaddps, Vaddps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2019 ASMJIT_INST_3x(vaddps, Vaddps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2020 ASMJIT_INST_3x(vaddps, Vaddps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32}
2021 ASMJIT_INST_3x(vaddps, Vaddps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32}
2022 ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
2023 ASMJIT_INST_3x(vaddsd, Vaddsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
2024 ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
2025 ASMJIT_INST_3x(vaddss, Vaddss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
2026 ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Xmm, Xmm, Xmm) // AVX
2027 ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Xmm, Xmm, Mem) // AVX
2028 ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Ymm, Ymm, Ymm) // AVX
2029 ASMJIT_INST_3x(vaddsubpd, Vaddsubpd, Ymm, Ymm, Mem) // AVX
2030 ASMJIT_INST_3x(vaddsubps, Vaddsubps, Xmm, Xmm, Xmm) // AVX
2031 ASMJIT_INST_3x(vaddsubps, Vaddsubps, Xmm, Xmm, Mem) // AVX
2032 ASMJIT_INST_3x(vaddsubps, Vaddsubps, Ymm, Ymm, Ymm) // AVX
2033 ASMJIT_INST_3x(vaddsubps, Vaddsubps, Ymm, Ymm, Mem) // AVX
2034 ASMJIT_INST_3x(vaesdec, Vaesdec, Xmm, Xmm, Xmm) // AVX
2035 ASMJIT_INST_3x(vaesdec, Vaesdec, Xmm, Xmm, Mem) // AVX
2036 ASMJIT_INST_3x(vaesdec, Vaesdec, Ymm, Ymm, Ymm) // VAES AVX512_VL
2037 ASMJIT_INST_3x(vaesdec, Vaesdec, Ymm, Ymm, Mem) // VAES AVX512_VL
2038 ASMJIT_INST_3x(vaesdec, Vaesdec, Zmm, Zmm, Zmm) // VAES
2039 ASMJIT_INST_3x(vaesdec, Vaesdec, Zmm, Zmm, Mem) // VAES
2040 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Xmm, Xmm, Xmm) // AVX
2041 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Xmm, Xmm, Mem) // AVX
2042 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Ymm, Ymm, Ymm) // VAES AVX512_VL
2043 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Ymm, Ymm, Mem) // VAES AVX512_VL
2044 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Zmm, Zmm, Zmm) // VAES
2045 ASMJIT_INST_3x(vaesdeclast, Vaesdeclast, Zmm, Zmm, Mem) // VAES
2046 ASMJIT_INST_3x(vaesenc, Vaesenc, Xmm, Xmm, Xmm) // AVX
2047 ASMJIT_INST_3x(vaesenc, Vaesenc, Xmm, Xmm, Mem) // AVX
2048 ASMJIT_INST_3x(vaesenc, Vaesenc, Ymm, Ymm, Ymm) // VAES AVX512_VL
2049 ASMJIT_INST_3x(vaesenc, Vaesenc, Ymm, Ymm, Mem) // VAES AVX512_VL
2050 ASMJIT_INST_3x(vaesenc, Vaesenc, Zmm, Zmm, Zmm) // VAES
2051 ASMJIT_INST_3x(vaesenc, Vaesenc, Zmm, Zmm, Mem) // VAES
2052 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Xmm, Xmm, Xmm) // AVX
2053 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Xmm, Xmm, Mem) // AVX
2054 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Ymm, Ymm, Ymm) // VAES AVX512_VL
2055 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Ymm, Ymm, Mem) // VAES AVX512_VL
2056 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Zmm, Zmm, Zmm) // VAES
2057 ASMJIT_INST_3x(vaesenclast, Vaesenclast, Zmm, Zmm, Mem) // VAES
2058 ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Xmm) // AVX
2059 ASMJIT_INST_2x(vaesimc, Vaesimc, Xmm, Mem) // AVX
2060 ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Xmm, Imm) // AVX
2061 ASMJIT_INST_3i(vaeskeygenassist, Vaeskeygenassist, Xmm, Mem, Imm) // AVX
2062 ASMJIT_INST_4i(valignd, Valignd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
2063 ASMJIT_INST_4i(valignd, Valignd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
2064 ASMJIT_INST_4i(valignd, Valignd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
2065 ASMJIT_INST_4i(valignd, Valignd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
2066 ASMJIT_INST_4i(valignd, Valignd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
2067 ASMJIT_INST_4i(valignd, Valignd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32}
2068 ASMJIT_INST_4i(valignq, Valignq, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
2069 ASMJIT_INST_4i(valignq, Valignq, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
2070 ASMJIT_INST_4i(valignq, Valignq, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
2071 ASMJIT_INST_4i(valignq, Valignq, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
2072 ASMJIT_INST_4i(valignq, Valignq, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
2073 ASMJIT_INST_4i(valignq, Valignq, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64}
2074 ASMJIT_INST_3x(vandnpd, Vandnpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL
2075 ASMJIT_INST_3x(vandnpd, Vandnpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL
2076 ASMJIT_INST_3x(vandnpd, Vandnpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL
2077 ASMJIT_INST_3x(vandnpd, Vandnpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL
2078 ASMJIT_INST_3x(vandnpd, Vandnpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64}
2079 ASMJIT_INST_3x(vandnpd, Vandnpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64}
2080 ASMJIT_INST_3x(vandnps, Vandnps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL
2081 ASMJIT_INST_3x(vandnps, Vandnps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL
2082 ASMJIT_INST_3x(vandnps, Vandnps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL
2083 ASMJIT_INST_3x(vandnps, Vandnps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL
2084 ASMJIT_INST_3x(vandnps, Vandnps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32}
2085 ASMJIT_INST_3x(vandnps, Vandnps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32}
2086 ASMJIT_INST_3x(vandpd, Vandpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL
2087 ASMJIT_INST_3x(vandpd, Vandpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL
2088 ASMJIT_INST_3x(vandpd, Vandpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL
2089 ASMJIT_INST_3x(vandpd, Vandpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL
2090 ASMJIT_INST_3x(vandpd, Vandpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64}
2091 ASMJIT_INST_3x(vandpd, Vandpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64}
2092 ASMJIT_INST_3x(vandps, Vandps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL
2093 ASMJIT_INST_3x(vandps, Vandps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL
2094 ASMJIT_INST_3x(vandps, Vandps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL
2095 ASMJIT_INST_3x(vandps, Vandps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL
2096 ASMJIT_INST_3x(vandps, Vandps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32}
2097 ASMJIT_INST_3x(vandps, Vandps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32}
2098 ASMJIT_INST_3x(vblendmb, Vblendmb, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
2099 ASMJIT_INST_3x(vblendmb, Vblendmb, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
2100 ASMJIT_INST_3x(vblendmb, Vblendmb, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
2101 ASMJIT_INST_3x(vblendmb, Vblendmb, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
2102 ASMJIT_INST_3x(vblendmb, Vblendmb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
2103 ASMJIT_INST_3x(vblendmb, Vblendmb, Zmm, Zmm, Mem) // AVX512_BW{kz}
2104 ASMJIT_INST_3x(vblendmd, Vblendmd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2105 ASMJIT_INST_3x(vblendmd, Vblendmd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
2106 ASMJIT_INST_3x(vblendmd, Vblendmd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2107 ASMJIT_INST_3x(vblendmd, Vblendmd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
2108 ASMJIT_INST_3x(vblendmd, Vblendmd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
2109 ASMJIT_INST_3x(vblendmd, Vblendmd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
2110 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2111 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
2112 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
2113 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
2114 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
2115 ASMJIT_INST_3x(vblendmpd, Vblendmpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
2116 ASMJIT_INST_3x(vblendmps, Vblendmps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2117 ASMJIT_INST_3x(vblendmps, Vblendmps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
2118 ASMJIT_INST_3x(vblendmps, Vblendmps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2119 ASMJIT_INST_3x(vblendmps, Vblendmps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
2120 ASMJIT_INST_3x(vblendmps, Vblendmps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
2121 ASMJIT_INST_3x(vblendmps, Vblendmps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
2122 ASMJIT_INST_3x(vblendmq, Vblendmq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2123 ASMJIT_INST_3x(vblendmq, Vblendmq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
2124 ASMJIT_INST_3x(vblendmq, Vblendmq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
2125 ASMJIT_INST_3x(vblendmq, Vblendmq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
2126 ASMJIT_INST_3x(vblendmq, Vblendmq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
2127 ASMJIT_INST_3x(vblendmq, Vblendmq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
2128 ASMJIT_INST_3x(vblendmw, Vblendmw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
2129 ASMJIT_INST_3x(vblendmw, Vblendmw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
2130 ASMJIT_INST_3x(vblendmw, Vblendmw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
2131 ASMJIT_INST_3x(vblendmw, Vblendmw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
2132 ASMJIT_INST_3x(vblendmw, Vblendmw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
2133 ASMJIT_INST_3x(vblendmw, Vblendmw, Zmm, Zmm, Mem) // AVX512_BW{kz}
2134 ASMJIT_INST_4i(vblendpd, Vblendpd, Xmm, Xmm, Xmm, Imm) // AVX
2135 ASMJIT_INST_4i(vblendpd, Vblendpd, Xmm, Xmm, Mem, Imm) // AVX
2136 ASMJIT_INST_4i(vblendpd, Vblendpd, Ymm, Ymm, Ymm, Imm) // AVX
2137 ASMJIT_INST_4i(vblendpd, Vblendpd, Ymm, Ymm, Mem, Imm) // AVX
2138 ASMJIT_INST_4i(vblendps, Vblendps, Xmm, Xmm, Xmm, Imm) // AVX
2139 ASMJIT_INST_4i(vblendps, Vblendps, Xmm, Xmm, Mem, Imm) // AVX
2140 ASMJIT_INST_4i(vblendps, Vblendps, Ymm, Ymm, Ymm, Imm) // AVX
2141 ASMJIT_INST_4i(vblendps, Vblendps, Ymm, Ymm, Mem, Imm) // AVX
2142 ASMJIT_INST_4x(vblendvpd, Vblendvpd, Xmm, Xmm, Xmm, Xmm) // AVX
2143 ASMJIT_INST_4x(vblendvpd, Vblendvpd, Xmm, Xmm, Mem, Xmm) // AVX
2144 ASMJIT_INST_4x(vblendvpd, Vblendvpd, Ymm, Ymm, Ymm, Ymm) // AVX
2145 ASMJIT_INST_4x(vblendvpd, Vblendvpd, Ymm, Ymm, Mem, Ymm) // AVX
2146 ASMJIT_INST_4x(vblendvps, Vblendvps, Xmm, Xmm, Xmm, Xmm) // AVX
2147 ASMJIT_INST_4x(vblendvps, Vblendvps, Xmm, Xmm, Mem, Xmm) // AVX
2148 ASMJIT_INST_4x(vblendvps, Vblendvps, Ymm, Ymm, Ymm, Ymm) // AVX
2149 ASMJIT_INST_4x(vblendvps, Vblendvps, Ymm, Ymm, Mem, Ymm) // AVX
2150 ASMJIT_INST_2x(vbroadcastf128, Vbroadcastf128, Ymm, Mem) // AVX
2151 ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Ymm, Xmm) // AVX512_DQ{kz}-VL
2152 ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Ymm, Mem) // AVX512_DQ{kz}-VL
2153 ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Zmm, Xmm) // AVX512_DQ{kz}
2154 ASMJIT_INST_2x(vbroadcastf32x2, Vbroadcastf32x2, Zmm, Mem) // AVX512_DQ{kz}
2155 ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Ymm, Mem) // AVX512_F{kz}
2156 ASMJIT_INST_2x(vbroadcastf32x4, Vbroadcastf32x4, Zmm, Mem) // AVX512_F{kz}
2157 ASMJIT_INST_2x(vbroadcastf32x8, Vbroadcastf32x8, Zmm, Mem) // AVX512_DQ{kz}
2158 ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Ymm, Mem) // AVX512_DQ{kz}-VL
2159 ASMJIT_INST_2x(vbroadcastf64x2, Vbroadcastf64x2, Zmm, Mem) // AVX512_DQ{kz}
2160 ASMJIT_INST_2x(vbroadcastf64x4, Vbroadcastf64x4, Zmm, Mem) // AVX512_F{kz}
2161 ASMJIT_INST_2x(vbroadcasti128, Vbroadcasti128, Ymm, Mem) // AVX2
2162 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Xmm, Xmm) // AVX512_DQ{kz}-VL
2163 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Xmm, Mem) // AVX512_DQ{kz}-VL
2164 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Ymm, Xmm) // AVX512_DQ{kz}-VL
2165 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Ymm, Mem) // AVX512_DQ{kz}-VL
2166 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Zmm, Xmm) // AVX512_DQ{kz}
2167 ASMJIT_INST_2x(vbroadcasti32x2, Vbroadcasti32x2, Zmm, Mem) // AVX512_DQ{kz}
2168 ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Ymm, Mem) // AVX512_F{kz}-VL
2169 ASMJIT_INST_2x(vbroadcasti32x4, Vbroadcasti32x4, Zmm, Mem) // AVX512_F{kz}
2170 ASMJIT_INST_2x(vbroadcasti32x8, Vbroadcasti32x8, Zmm, Mem) // AVX512_DQ{kz}
2171 ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Ymm, Xmm) // AVX512_DQ{kz}-VL
2172 ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Ymm, Mem) // AVX512_DQ{kz}-VL
2173 ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Zmm, Xmm) // AVX512_DQ{kz}
2174 ASMJIT_INST_2x(vbroadcasti64x2, Vbroadcasti64x2, Zmm, Mem) // AVX512_DQ{kz}
2175 ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Zmm, Xmm) // AVX512_F{kz}
2176 ASMJIT_INST_2x(vbroadcasti64x4, Vbroadcasti64x4, Zmm, Mem) // AVX512_F{kz}
2177 ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Ymm, Mem) // AVX AVX512_F{kz}-VL
2178 ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
2179 ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Zmm, Xmm) // AVX512_F{kz}
2180 ASMJIT_INST_2x(vbroadcastsd, Vbroadcastsd, Zmm, Mem) // AVX512_F{kz}
2181 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Xmm, Mem) // AVX AVX512_F{kz}-VL
2182 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL
2183 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Ymm, Mem) // AVX AVX512_F{kz}
2184 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Ymm, Xmm) // AVX2 AVX512_F{kz}
2185 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Zmm, Xmm) // AVX512_F{kz}-VL
2186 ASMJIT_INST_2x(vbroadcastss, Vbroadcastss, Zmm, Mem) // AVX512_F{kz}-VL
2187 ASMJIT_INST_4i(vcmppd, Vcmppd, Xmm, Xmm, Xmm, Imm) // AVX
2188 ASMJIT_INST_4i(vcmppd, Vcmppd, Xmm, Xmm, Mem, Imm) // AVX
2189 ASMJIT_INST_4i(vcmppd, Vcmppd, Ymm, Ymm, Ymm, Imm) // AVX
2190 ASMJIT_INST_4i(vcmppd, Vcmppd, Ymm, Ymm, Mem, Imm) // AVX
2191 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
2192 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
2193 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
2194 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
2195 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64}
2196 ASMJIT_INST_4i(vcmppd, Vcmppd, KReg, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64}
2197 ASMJIT_INST_4i(vcmpps, Vcmpps, Xmm, Xmm, Xmm, Imm) // AVX
2198 ASMJIT_INST_4i(vcmpps, Vcmpps, Xmm, Xmm, Mem, Imm) // AVX
2199 ASMJIT_INST_4i(vcmpps, Vcmpps, Ymm, Ymm, Ymm, Imm) // AVX
2200 ASMJIT_INST_4i(vcmpps, Vcmpps, Ymm, Ymm, Mem, Imm) // AVX
2201 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
2202 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
2203 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
2204 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
2205 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32}
2206 ASMJIT_INST_4i(vcmpps, Vcmpps, KReg, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32}
2207 ASMJIT_INST_4i(vcmpsd, Vcmpsd, Xmm, Xmm, Xmm, Imm) // AVX
2208 ASMJIT_INST_4i(vcmpsd, Vcmpsd, Xmm, Xmm, Mem, Imm) // AVX
2209 ASMJIT_INST_4i(vcmpsd, Vcmpsd, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2210 ASMJIT_INST_4i(vcmpsd, Vcmpsd, KReg, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2211 ASMJIT_INST_4i(vcmpss, Vcmpss, Xmm, Xmm, Xmm, Imm) // AVX
2212 ASMJIT_INST_4i(vcmpss, Vcmpss, Xmm, Xmm, Mem, Imm) // AVX
2213 ASMJIT_INST_4i(vcmpss, Vcmpss, KReg, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2214 ASMJIT_INST_4i(vcmpss, Vcmpss, KReg, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2215 ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Xmm) // AVX AVX512_F{sae}
2216 ASMJIT_INST_2x(vcomisd, Vcomisd, Xmm, Mem) // AVX AVX512_F{sae}
2217 ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Xmm) // AVX AVX512_F{sae}
2218 ASMJIT_INST_2x(vcomiss, Vcomiss, Xmm, Mem) // AVX AVX512_F{sae}
2219 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Xmm, Xmm) // AVX512_F{kz}-VL
2220 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Xmm) // AVX512_F{kz}-VL
2221 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Ymm, Ymm) // AVX512_F{kz}-VL
2222 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Ymm) // AVX512_F{kz}-VL
2223 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Zmm, Zmm) // AVX512_F{kz}
2224 ASMJIT_INST_2x(vcompresspd, Vcompresspd, Mem, Zmm) // AVX512_F{kz}
2225 ASMJIT_INST_2x(vcompressps, Vcompressps, Xmm, Xmm) // AVX512_F{kz}-VL
2226 ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Xmm) // AVX512_F{kz}-VL
2227 ASMJIT_INST_2x(vcompressps, Vcompressps, Ymm, Ymm) // AVX512_F{kz}-VL
2228 ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Ymm) // AVX512_F{kz}-VL
2229 ASMJIT_INST_2x(vcompressps, Vcompressps, Zmm, Zmm) // AVX512_F{kz}
2230 ASMJIT_INST_2x(vcompressps, Vcompressps, Mem, Zmm) // AVX512_F{kz}
2231 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2232 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2233 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Ymm, Xmm) // AVX AVX512_F{kz|b32}-VL
2234 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2235 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Zmm, Ymm) // AVX512_F{kz|b32}
2236 ASMJIT_INST_2x(vcvtdq2pd, Vcvtdq2pd, Zmm, Mem) // AVX512_F{kz|b32}
2237 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2238 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2239 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2240 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2241 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Zmm, Zmm) // AVX512_F{kz|er|b32}
2242 ASMJIT_INST_2x(vcvtdq2ps, Vcvtdq2ps, Zmm, Mem) // AVX512_F{kz|er|b32}
2243 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Xmm, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL
2244 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Xmm, Xmm, Mem) // AVX512_BF16{kz|b32}-VL
2245 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Ymm, Ymm, Ymm) // AVX512_BF16{kz|b32}-VL
2246 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Ymm, Ymm, Mem) // AVX512_BF16{kz|b32}-VL
2247 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Zmm, Zmm, Zmm) // AVX512_BF16{kz|b32}
2248 ASMJIT_INST_3x(vcvtne2ps2bf16, Vcvtne2ps2bf16, Zmm, Zmm, Mem) // AVX512_BF16{kz|b32}
2249 ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL
2250 ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Ymm) // AVX512_BF16{kz|b32}-VL
2251 ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Xmm, Mem) // AVX512_BF16{kz|b32}-VL
2252 ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Ymm, Zmm) // AVX512_BF16{kz|b32}
2253 ASMJIT_INST_2x(vcvtneps2bf16, Vcvtneps2bf16, Ymm, Mem) // AVX512_BF16{kz|b32}
2254 ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2255 ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2256 ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL
2257 ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Ymm, Zmm) // AVX512_F{kz|er|b64}
2258 ASMJIT_INST_2x(vcvtpd2dq, Vcvtpd2dq, Ymm, Mem) // AVX512_F{kz|er|b64}
2259 ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2260 ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2261 ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL
2262 ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Ymm, Zmm) // AVX512_F{kz|er|b64}
2263 ASMJIT_INST_2x(vcvtpd2ps, Vcvtpd2ps, Ymm, Mem) // AVX512_F{kz|er|b64}
2264 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2265 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2266 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
2267 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
2268 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Zmm, Zmm) // AVX512_DQ{kz|er|b64}
2269 ASMJIT_INST_2x(vcvtpd2qq, Vcvtpd2qq, Zmm, Mem) // AVX512_DQ{kz|er|b64}
2270 ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2271 ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Mem) // AVX512_F{kz|b64}-VL
2272 ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Xmm, Ymm) // AVX512_F{kz|b64}-VL
2273 ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Ymm, Zmm) // AVX512_F{kz|er|b64}
2274 ASMJIT_INST_2x(vcvtpd2udq, Vcvtpd2udq, Ymm, Mem) // AVX512_F{kz|er|b64}
2275 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2276 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2277 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
2278 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
2279 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Zmm, Zmm) // AVX512_DQ{kz|er|b64}
2280 ASMJIT_INST_2x(vcvtpd2uqq, Vcvtpd2uqq, Zmm, Mem) // AVX512_DQ{kz|er|b64}
2281 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Xmm, Xmm) // F16C AVX512_F{kz}-VL
2282 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Xmm, Mem) // F16C AVX512_F{kz}-VL
2283 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Ymm, Xmm) // F16C AVX512_F{kz}-VL
2284 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Ymm, Mem) // F16C AVX512_F{kz}-VL
2285 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Zmm, Ymm) // AVX512_F{kz|sae}
2286 ASMJIT_INST_2x(vcvtph2ps, Vcvtph2ps, Zmm, Mem) // AVX512_F{kz|sae}
2287 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2288 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2289 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2290 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2291 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Zmm, Zmm) // AVX512_F{kz|er|b32}
2292 ASMJIT_INST_2x(vcvtps2dq, Vcvtps2dq, Zmm, Mem) // AVX512_F{kz|er|b32}
2293 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2294 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2295 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Ymm, Xmm) // AVX AVX512_F{kz|b32}-VL
2296 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2297 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Zmm, Ymm) // AVX512_F{kz|er|b32}
2298 ASMJIT_INST_2x(vcvtps2pd, Vcvtps2pd, Zmm, Mem) // AVX512_F{kz|er|b32}
2299 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Xmm, Xmm, Imm) // F16C AVX512_F{kz}-VL
2300 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Xmm, Imm) // F16C AVX512_F{kz}-VL
2301 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Xmm, Ymm, Imm) // F16C AVX512_F{kz}-VL
2302 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Ymm, Imm) // F16C AVX512_F{kz}-VL
2303 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Ymm, Zmm, Imm) // AVX512_F{kz|sae}
2304 ASMJIT_INST_3i(vcvtps2ph, Vcvtps2ph, Mem, Zmm, Imm) // AVX512_F{kz|sae}
2305 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL
2306 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL
2307 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL
2308 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL
2309 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Zmm, Ymm) // AVX512_DQ{kz|er|b32}
2310 ASMJIT_INST_2x(vcvtps2qq, Vcvtps2qq, Zmm, Mem) // AVX512_DQ{kz|er|b32}
2311 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2312 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Xmm, Mem) // AVX512_F{kz|b32}-VL
2313 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2314 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Ymm, Mem) // AVX512_F{kz|b32}-VL
2315 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Zmm, Zmm) // AVX512_F{kz|er|b32}
2316 ASMJIT_INST_2x(vcvtps2udq, Vcvtps2udq, Zmm, Mem) // AVX512_F{kz|er|b32}
2317 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL
2318 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL
2319 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL
2320 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL
2321 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Zmm, Ymm) // AVX512_DQ{kz|er|b32}
2322 ASMJIT_INST_2x(vcvtps2uqq, Vcvtps2uqq, Zmm, Mem) // AVX512_DQ{kz|er|b32}
2323 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2324 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2325 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
2326 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
2327 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Zmm, Zmm) // AVX512_DQ{kz|er|b64}
2328 ASMJIT_INST_2x(vcvtqq2pd, Vcvtqq2pd, Zmm, Mem) // AVX512_DQ{kz|er|b64}
2329 ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2330 ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2331 ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Xmm, Ymm) // AVX512_DQ{kz|b64}-VL
2332 ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Ymm, Zmm) // AVX512_DQ{kz|er|b64}
2333 ASMJIT_INST_2x(vcvtqq2ps, Vcvtqq2ps, Ymm, Mem) // AVX512_DQ{kz|er|b64}
2334 ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Xmm) // AVX AVX512_F{er}
2335 ASMJIT_INST_2x(vcvtsd2si, Vcvtsd2si, Gp, Mem) // AVX AVX512_F{er}
2336 ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
2337 ASMJIT_INST_3x(vcvtsd2ss, Vcvtsd2ss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
2338 ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Xmm) // AVX512_F{er}
2339 ASMJIT_INST_2x(vcvtsd2usi, Vcvtsd2usi, Gp, Mem) // AVX512_F{er}
2340 ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Gp) // AVX AVX512_F{er}
2341 ASMJIT_INST_3x(vcvtsi2sd, Vcvtsi2sd, Xmm, Xmm, Mem) // AVX AVX512_F{er}
2342 ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Gp) // AVX AVX512_F{er}
2343 ASMJIT_INST_3x(vcvtsi2ss, Vcvtsi2ss, Xmm, Xmm, Mem) // AVX AVX512_F{er}
2344 ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}
2345 ASMJIT_INST_3x(vcvtss2sd, Vcvtss2sd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}
2346 ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Xmm) // AVX AVX512_F{er}
2347 ASMJIT_INST_2x(vcvtss2si, Vcvtss2si, Gp, Mem) // AVX AVX512_F{er}
2348 ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Xmm) // AVX512_F{er}
2349 ASMJIT_INST_2x(vcvtss2usi, Vcvtss2usi, Gp, Mem) // AVX512_F{er}
2350 ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2351 ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2352 ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Xmm, Ymm) // AVX AVX512_F{kz|b64}-VL
2353 ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Ymm, Zmm) // AVX512_F{kz|sae|b64}
2354 ASMJIT_INST_2x(vcvttpd2dq, Vcvttpd2dq, Ymm, Mem) // AVX512_F{kz|sae|b64}
2355 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2356 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Xmm, Mem) // AVX512_F{kz|b64}-VL
2357 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Ymm, Ymm) // AVX512_F{kz|b64}-VL
2358 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Ymm, Mem) // AVX512_F{kz|b64}-VL
2359 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Zmm, Zmm) // AVX512_F{kz|sae|b64}
2360 ASMJIT_INST_2x(vcvttpd2qq, Vcvttpd2qq, Zmm, Mem) // AVX512_F{kz|sae|b64}
2361 ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2362 ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Mem) // AVX512_F{kz|b64}-VL
2363 ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Xmm, Ymm) // AVX512_F{kz|b64}-VL
2364 ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Ymm, Zmm) // AVX512_F{kz|sae|b64}
2365 ASMJIT_INST_2x(vcvttpd2udq, Vcvttpd2udq, Ymm, Mem) // AVX512_F{kz|sae|b64}
2366 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2367 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2368 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
2369 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
2370 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Zmm, Zmm) // AVX512_DQ{kz|sae|b64}
2371 ASMJIT_INST_2x(vcvttpd2uqq, Vcvttpd2uqq, Zmm, Mem) // AVX512_DQ{kz|sae|b64}
2372 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2373 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2374 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2375 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2376 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Zmm, Zmm) // AVX512_F{kz|sae|b32}
2377 ASMJIT_INST_2x(vcvttps2dq, Vcvttps2dq, Zmm, Mem) // AVX512_F{kz|sae|b32}
2378 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL
2379 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL
2380 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL
2381 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL
2382 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Zmm, Ymm) // AVX512_DQ{kz|sae|b32}
2383 ASMJIT_INST_2x(vcvttps2qq, Vcvttps2qq, Zmm, Mem) // AVX512_DQ{kz|sae|b32}
2384 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2385 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Xmm, Mem) // AVX512_F{kz|b32}-VL
2386 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2387 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Ymm, Mem) // AVX512_F{kz|b32}-VL
2388 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Zmm, Zmm) // AVX512_F{kz|sae|b32}
2389 ASMJIT_INST_2x(vcvttps2udq, Vcvttps2udq, Zmm, Mem) // AVX512_F{kz|sae|b32}
2390 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Xmm, Xmm) // AVX512_DQ{kz|b32}-VL
2391 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Xmm, Mem) // AVX512_DQ{kz|b32}-VL
2392 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Ymm, Xmm) // AVX512_DQ{kz|b32}-VL
2393 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Ymm, Mem) // AVX512_DQ{kz|b32}-VL
2394 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Zmm, Ymm) // AVX512_DQ{kz|sae|b32}
2395 ASMJIT_INST_2x(vcvttps2uqq, Vcvttps2uqq, Zmm, Mem) // AVX512_DQ{kz|sae|b32}
2396 ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Xmm) // AVX AVX512_F{sae}
2397 ASMJIT_INST_2x(vcvttsd2si, Vcvttsd2si, Gp, Mem) // AVX AVX512_F{sae}
2398 ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Xmm) // AVX512_F{sae}
2399 ASMJIT_INST_2x(vcvttsd2usi, Vcvttsd2usi, Gp, Mem) // AVX512_F{sae}
2400 ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Xmm) // AVX AVX512_F{sae}
2401 ASMJIT_INST_2x(vcvttss2si, Vcvttss2si, Gp, Mem) // AVX AVX512_F{sae}
2402 ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Xmm) // AVX512_F{sae}
2403 ASMJIT_INST_2x(vcvttss2usi, Vcvttss2usi, Gp, Mem) // AVX512_F{sae}
2404 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2405 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Xmm, Mem) // AVX512_F{kz|b32}-VL
2406 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Ymm, Xmm) // AVX512_F{kz|b32}-VL
2407 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Ymm, Mem) // AVX512_F{kz|b32}-VL
2408 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Zmm, Ymm) // AVX512_F{kz|b32}
2409 ASMJIT_INST_2x(vcvtudq2pd, Vcvtudq2pd, Zmm, Mem) // AVX512_F{kz|b32}
2410 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2411 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Xmm, Mem) // AVX512_F{kz|b32}-VL
2412 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2413 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Ymm, Mem) // AVX512_F{kz|b32}-VL
2414 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Zmm, Zmm) // AVX512_F{kz|er|b32}
2415 ASMJIT_INST_2x(vcvtudq2ps, Vcvtudq2ps, Zmm, Mem) // AVX512_F{kz|er|b32}
2416 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2417 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2418 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
2419 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
2420 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Zmm, Zmm) // AVX512_DQ{kz|er|b64}
2421 ASMJIT_INST_2x(vcvtuqq2pd, Vcvtuqq2pd, Zmm, Mem) // AVX512_DQ{kz|er|b64}
2422 ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
2423 ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
2424 ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Xmm, Ymm) // AVX512_DQ{kz|b64}-VL
2425 ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Ymm, Zmm) // AVX512_DQ{kz|er|b64}
2426 ASMJIT_INST_2x(vcvtuqq2ps, Vcvtuqq2ps, Ymm, Mem) // AVX512_DQ{kz|er|b64}
2427 ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Gp) // AVX512_F{er}
2428 ASMJIT_INST_3x(vcvtusi2sd, Vcvtusi2sd, Xmm, Xmm, Mem) // AVX512_F{er}
2429 ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Gp) // AVX512_F{er}
2430 ASMJIT_INST_3x(vcvtusi2ss, Vcvtusi2ss, Xmm, Xmm, Mem) // AVX512_F{er}
2431 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Xmm, Xmm, Xmm, Imm) // AVX512_BW{kz}-VL
2432 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Xmm, Xmm, Mem, Imm) // AVX512_BW{kz}-VL
2433 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Ymm, Ymm, Ymm, Imm) // AVX512_BW{kz}-VL
2434 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Ymm, Ymm, Mem, Imm) // AVX512_BW{kz}-VL
2435 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Zmm, Zmm, Zmm, Imm) // AVX512_BW{kz}
2436 ASMJIT_INST_4i(vdbpsadbw, Vdbpsadbw, Zmm, Zmm, Mem, Imm) // AVX512_BW{kz}
2437 ASMJIT_INST_3x(vdivpd, Vdivpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2438 ASMJIT_INST_3x(vdivpd, Vdivpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2439 ASMJIT_INST_3x(vdivpd, Vdivpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
2440 ASMJIT_INST_3x(vdivpd, Vdivpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
2441 ASMJIT_INST_3x(vdivpd, Vdivpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64}
2442 ASMJIT_INST_3x(vdivpd, Vdivpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64}
2443 ASMJIT_INST_3x(vdivps, Vdivps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2444 ASMJIT_INST_3x(vdivps, Vdivps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2445 ASMJIT_INST_3x(vdivps, Vdivps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2446 ASMJIT_INST_3x(vdivps, Vdivps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2447 ASMJIT_INST_3x(vdivps, Vdivps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32}
2448 ASMJIT_INST_3x(vdivps, Vdivps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32}
2449 ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
2450 ASMJIT_INST_3x(vdivsd, Vdivsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
2451 ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
2452 ASMJIT_INST_3x(vdivss, Vdivss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
2453 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Xmm, Xmm, Xmm) // AVX512_BF16{kz|b32}-VL
2454 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Xmm, Xmm, Mem) // AVX512_BF16{kz|b32}-VL
2455 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Ymm, Ymm, Ymm) // AVX512_BF16{kz|b32}-VL
2456 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Ymm, Ymm, Mem) // AVX512_BF16{kz|b32}-VL
2457 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Zmm, Zmm, Zmm) // AVX512_BF16{kz|b32}
2458 ASMJIT_INST_3x(vdpbf16ps, Vdpbf16ps, Zmm, Zmm, Mem) // AVX512_BF16{kz|b32}
2459 ASMJIT_INST_4i(vdppd, Vdppd, Xmm, Xmm, Xmm, Imm) // AVX
2460 ASMJIT_INST_4i(vdppd, Vdppd, Xmm, Xmm, Mem, Imm) // AVX
2461 ASMJIT_INST_4i(vdpps, Vdpps, Xmm, Xmm, Xmm, Imm) // AVX
2462 ASMJIT_INST_4i(vdpps, Vdpps, Xmm, Xmm, Mem, Imm) // AVX
2463 ASMJIT_INST_4i(vdpps, Vdpps, Ymm, Ymm, Ymm, Imm) // AVX
2464 ASMJIT_INST_4i(vdpps, Vdpps, Ymm, Ymm, Mem, Imm) // AVX
2465 ASMJIT_INST_2x(vexp2pd, Vexp2pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64}
2466 ASMJIT_INST_2x(vexp2pd, Vexp2pd, Zmm, Mem) // AVX512_ER{kz|sae|b64}
2467 ASMJIT_INST_2x(vexp2ps, Vexp2ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32}
2468 ASMJIT_INST_2x(vexp2ps, Vexp2ps, Zmm, Mem) // AVX512_ER{kz|sae|b32}
2469 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Xmm, Xmm) // AVX512_F{kz}-VL
2470 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Xmm, Mem) // AVX512_F{kz}-VL
2471 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Ymm, Ymm) // AVX512_F{kz}-VL
2472 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Ymm, Mem) // AVX512_F{kz}-VL
2473 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Zmm, Zmm) // AVX512_F{kz}
2474 ASMJIT_INST_2x(vexpandpd, Vexpandpd, Zmm, Mem) // AVX512_F{kz}
2475 ASMJIT_INST_2x(vexpandps, Vexpandps, Xmm, Xmm) // AVX512_F{kz}-VL
2476 ASMJIT_INST_2x(vexpandps, Vexpandps, Xmm, Mem) // AVX512_F{kz}-VL
2477 ASMJIT_INST_2x(vexpandps, Vexpandps, Ymm, Ymm) // AVX512_F{kz}-VL
2478 ASMJIT_INST_2x(vexpandps, Vexpandps, Ymm, Mem) // AVX512_F{kz}-VL
2479 ASMJIT_INST_2x(vexpandps, Vexpandps, Zmm, Zmm) // AVX512_F{kz}
2480 ASMJIT_INST_2x(vexpandps, Vexpandps, Zmm, Mem) // AVX512_F{kz}
2481 ASMJIT_INST_3i(vextractf128, Vextractf128, Xmm, Ymm, Imm) // AVX
2482 ASMJIT_INST_3i(vextractf128, Vextractf128, Mem, Ymm, Imm) // AVX
2483 ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Xmm, Ymm, Imm) // AVX512_F{kz}-VL
2484 ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Mem, Ymm, Imm) // AVX512_F{kz}-VL
2485 ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Xmm, Zmm, Imm) // AVX512_F{kz}
2486 ASMJIT_INST_3i(vextractf32x4, Vextractf32x4, Mem, Zmm, Imm) // AVX512_F{kz}
2487 ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Ymm, Zmm, Imm) // AVX512_DQ{kz}
2488 ASMJIT_INST_3i(vextractf32x8, Vextractf32x8, Mem, Zmm, Imm) // AVX512_DQ{kz}
2489 ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Xmm, Ymm, Imm) // AVX512_DQ{kz}-VL
2490 ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Mem, Ymm, Imm) // AVX512_DQ{kz}-VL
2491 ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Xmm, Zmm, Imm) // AVX512_DQ{kz}
2492 ASMJIT_INST_3i(vextractf64x2, Vextractf64x2, Mem, Zmm, Imm) // AVX512_DQ{kz}
2493 ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Ymm, Zmm, Imm) // AVX512_F{kz}
2494 ASMJIT_INST_3i(vextractf64x4, Vextractf64x4, Mem, Zmm, Imm) // AVX512_F{kz}
2495 ASMJIT_INST_3i(vextracti128, Vextracti128, Xmm, Ymm, Imm) // AVX2
2496 ASMJIT_INST_3i(vextracti128, Vextracti128, Mem, Ymm, Imm) // AVX2
2497 ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Xmm, Ymm, Imm) // AVX512_F{kz}-VL
2498 ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Mem, Ymm, Imm) // AVX512_F{kz}-VL
2499 ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Xmm, Zmm, Imm) // AVX512_F{kz}
2500 ASMJIT_INST_3i(vextracti32x4, Vextracti32x4, Mem, Zmm, Imm) // AVX512_F{kz}
2501 ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Ymm, Zmm, Imm) // AVX512_DQ{kz}
2502 ASMJIT_INST_3i(vextracti32x8, Vextracti32x8, Mem, Zmm, Imm) // AVX512_DQ{kz}
2503 ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Xmm, Ymm, Imm) // AVX512_DQ{kz}-VL
2504 ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Mem, Ymm, Imm) // AVX512_DQ{kz}-VL
2505 ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Xmm, Zmm, Imm) // AVX512_DQ{kz}
2506 ASMJIT_INST_3i(vextracti64x2, Vextracti64x2, Mem, Zmm, Imm) // AVX512_DQ{kz}
2507 ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Ymm, Zmm, Imm) // AVX512_F{kz}
2508 ASMJIT_INST_3i(vextracti64x4, Vextracti64x4, Mem, Zmm, Imm) // AVX512_F{kz}
2509 ASMJIT_INST_3i(vextractps, Vextractps, Gp, Xmm, Imm) // AVX AVX512_F
2510 ASMJIT_INST_3i(vextractps, Vextractps, Mem, Xmm, Imm) // AVX AVX512_F
2511 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
2512 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
2513 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
2514 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
2515 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64}
2516 ASMJIT_INST_4i(vfixupimmpd, Vfixupimmpd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64}
2517 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
2518 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
2519 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
2520 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
2521 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32}
2522 ASMJIT_INST_4i(vfixupimmps, Vfixupimmps, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32}
2523 ASMJIT_INST_4i(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2524 ASMJIT_INST_4i(vfixupimmsd, Vfixupimmsd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2525 ASMJIT_INST_4i(vfixupimmss, Vfixupimmss, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2526 ASMJIT_INST_4i(vfixupimmss, Vfixupimmss, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2527 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2528 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2529 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2530 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2531 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2532 ASMJIT_INST_3x(vfmadd132pd, Vfmadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2533 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2534 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2535 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2536 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2537 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2538 ASMJIT_INST_3x(vfmadd132ps, Vfmadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2539 ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2540 ASMJIT_INST_3x(vfmadd132sd, Vfmadd132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2541 ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2542 ASMJIT_INST_3x(vfmadd132ss, Vfmadd132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2543 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2544 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2545 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2546 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2547 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2548 ASMJIT_INST_3x(vfmadd213pd, Vfmadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2549 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2550 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2551 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2552 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2553 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2554 ASMJIT_INST_3x(vfmadd213ps, Vfmadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2555 ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2556 ASMJIT_INST_3x(vfmadd213sd, Vfmadd213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2557 ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2558 ASMJIT_INST_3x(vfmadd213ss, Vfmadd213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2559 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2560 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2561 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2562 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2563 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2564 ASMJIT_INST_3x(vfmadd231pd, Vfmadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2565 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2566 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2567 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2568 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2569 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2570 ASMJIT_INST_3x(vfmadd231ps, Vfmadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2571 ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2572 ASMJIT_INST_3x(vfmadd231sd, Vfmadd231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2573 ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2574 ASMJIT_INST_3x(vfmadd231ss, Vfmadd231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2575 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2576 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2577 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2578 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2579 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2580 ASMJIT_INST_3x(vfmaddsub132pd, Vfmaddsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2581 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2582 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2583 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2584 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2585 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2586 ASMJIT_INST_3x(vfmaddsub132ps, Vfmaddsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2587 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2588 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2589 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2590 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2591 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2592 ASMJIT_INST_3x(vfmaddsub213pd, Vfmaddsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2593 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2594 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2595 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2596 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2597 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2598 ASMJIT_INST_3x(vfmaddsub213ps, Vfmaddsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2599 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2600 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2601 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2602 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2603 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2604 ASMJIT_INST_3x(vfmaddsub231pd, Vfmaddsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2605 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2606 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2607 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2608 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2609 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2610 ASMJIT_INST_3x(vfmaddsub231ps, Vfmaddsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2611 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2612 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2613 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2614 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2615 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2616 ASMJIT_INST_3x(vfmsub132pd, Vfmsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2617 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2618 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2619 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2620 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2621 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2622 ASMJIT_INST_3x(vfmsub132ps, Vfmsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2623 ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2624 ASMJIT_INST_3x(vfmsub132sd, Vfmsub132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2625 ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2626 ASMJIT_INST_3x(vfmsub132ss, Vfmsub132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2627 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2628 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2629 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2630 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2631 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2632 ASMJIT_INST_3x(vfmsub213pd, Vfmsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2633 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2634 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2635 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2636 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2637 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2638 ASMJIT_INST_3x(vfmsub213ps, Vfmsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2639 ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2640 ASMJIT_INST_3x(vfmsub213sd, Vfmsub213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2641 ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2642 ASMJIT_INST_3x(vfmsub213ss, Vfmsub213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2643 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2644 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2645 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2646 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2647 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2648 ASMJIT_INST_3x(vfmsub231pd, Vfmsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2649 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2650 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2651 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2652 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2653 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2654 ASMJIT_INST_3x(vfmsub231ps, Vfmsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2655 ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2656 ASMJIT_INST_3x(vfmsub231sd, Vfmsub231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2657 ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2658 ASMJIT_INST_3x(vfmsub231ss, Vfmsub231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2659 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2660 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2661 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2662 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2663 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2664 ASMJIT_INST_3x(vfmsubadd132pd, Vfmsubadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2665 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2666 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2667 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2668 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2669 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2670 ASMJIT_INST_3x(vfmsubadd132ps, Vfmsubadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2671 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2672 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2673 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2674 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2675 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2676 ASMJIT_INST_3x(vfmsubadd213pd, Vfmsubadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2677 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2678 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2679 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2680 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2681 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2682 ASMJIT_INST_3x(vfmsubadd213ps, Vfmsubadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2683 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2684 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2685 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2686 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2687 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2688 ASMJIT_INST_3x(vfmsubadd231pd, Vfmsubadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2689 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2690 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2691 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2692 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2693 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2694 ASMJIT_INST_3x(vfmsubadd231ps, Vfmsubadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2695 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2696 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2697 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2698 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2699 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2700 ASMJIT_INST_3x(vfnmadd132pd, Vfnmadd132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2701 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2702 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2703 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2704 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2705 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2706 ASMJIT_INST_3x(vfnmadd132ps, Vfnmadd132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2707 ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2708 ASMJIT_INST_3x(vfnmadd132sd, Vfnmadd132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2709 ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2710 ASMJIT_INST_3x(vfnmadd132ss, Vfnmadd132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2711 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2712 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2713 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2714 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2715 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2716 ASMJIT_INST_3x(vfnmadd213pd, Vfnmadd213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2717 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2718 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2719 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2720 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2721 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2722 ASMJIT_INST_3x(vfnmadd213ps, Vfnmadd213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2723 ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2724 ASMJIT_INST_3x(vfnmadd213sd, Vfnmadd213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2725 ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2726 ASMJIT_INST_3x(vfnmadd213ss, Vfnmadd213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2727 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2728 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2729 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2730 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2731 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2732 ASMJIT_INST_3x(vfnmadd231pd, Vfnmadd231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2733 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2734 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2735 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2736 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2737 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2738 ASMJIT_INST_3x(vfnmadd231ps, Vfnmadd231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2739 ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2740 ASMJIT_INST_3x(vfnmadd231sd, Vfnmadd231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2741 ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2742 ASMJIT_INST_3x(vfnmadd231ss, Vfnmadd231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2743 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2744 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2745 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2746 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2747 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2748 ASMJIT_INST_3x(vfnmsub132pd, Vfnmsub132pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2749 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2750 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2751 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2752 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2753 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2754 ASMJIT_INST_3x(vfnmsub132ps, Vfnmsub132ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2755 ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2756 ASMJIT_INST_3x(vfnmsub132sd, Vfnmsub132sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2757 ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2758 ASMJIT_INST_3x(vfnmsub132ss, Vfnmsub132ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2759 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2760 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2761 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2762 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2763 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2764 ASMJIT_INST_3x(vfnmsub213pd, Vfnmsub213pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2765 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2766 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2767 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2768 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2769 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2770 ASMJIT_INST_3x(vfnmsub213ps, Vfnmsub213ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2771 ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2772 ASMJIT_INST_3x(vfnmsub213sd, Vfnmsub213sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2773 ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2774 ASMJIT_INST_3x(vfnmsub213ss, Vfnmsub213ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2775 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b64}-VL
2776 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b64}-VL
2777 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b64}-VL
2778 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b64}-VL
2779 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b64}
2780 ASMJIT_INST_3x(vfnmsub231pd, Vfnmsub231pd, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b64}
2781 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|b32}-VL
2782 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Xmm, Xmm, Mem) // FMA AVX512_F{kz|b32}-VL
2783 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Ymm, Ymm, Ymm) // FMA AVX512_F{kz|b32}-VL
2784 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Ymm, Ymm, Mem) // FMA AVX512_F{kz|b32}-VL
2785 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Zmm, Zmm, Zmm) // FMA AVX512_F{kz|er|b32}
2786 ASMJIT_INST_3x(vfnmsub231ps, Vfnmsub231ps, Zmm, Zmm, Mem) // FMA AVX512_F{kz|er|b32}
2787 ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2788 ASMJIT_INST_3x(vfnmsub231sd, Vfnmsub231sd, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2789 ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Xmm) // FMA AVX512_F{kz|er}
2790 ASMJIT_INST_3x(vfnmsub231ss, Vfnmsub231ss, Xmm, Xmm, Mem) // FMA AVX512_F{kz|er}
2791 ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Xmm, Imm) // AVX512_DQ{k|b64}-VL
2792 ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Mem, Imm) // AVX512_DQ{k|b64} AVX512_DQ{k|b64}-VL
2793 ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Ymm, Imm) // AVX512_DQ{k|b64}-VL
2794 ASMJIT_INST_3i(vfpclasspd, Vfpclasspd, KReg, Zmm, Imm) // AVX512_DQ{k|b64}
2795 ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Xmm, Imm) // AVX512_DQ{k|b32}-VL
2796 ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Mem, Imm) // AVX512_DQ{k|b32} AVX512_DQ{k|b32}-VL
2797 ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Ymm, Imm) // AVX512_DQ{k|b32}-VL
2798 ASMJIT_INST_3i(vfpclassps, Vfpclassps, KReg, Zmm, Imm) // AVX512_DQ{k|b32}
2799 ASMJIT_INST_3i(vfpclasssd, Vfpclasssd, KReg, Xmm, Imm) // AVX512_DQ{k}
2800 ASMJIT_INST_3i(vfpclasssd, Vfpclasssd, KReg, Mem, Imm) // AVX512_DQ{k}
2801 ASMJIT_INST_3i(vfpclassss, Vfpclassss, KReg, Xmm, Imm) // AVX512_DQ{k}
2802 ASMJIT_INST_3i(vfpclassss, Vfpclassss, KReg, Mem, Imm) // AVX512_DQ{k}
2803 ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Xmm, Mem, Xmm) // AVX2
2804 ASMJIT_INST_3x(vgatherdpd, Vgatherdpd, Ymm, Mem, Ymm) // AVX2
2805 ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Xmm, Mem) // AVX512_F{k}-VL
2806 ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Ymm, Mem) // AVX512_F{k}-VL
2807 ASMJIT_INST_2x(vgatherdpd, Vgatherdpd, Zmm, Mem) // AVX512_F{k}
2808 ASMJIT_INST_3x(vgatherdps, Vgatherdps, Xmm, Mem, Xmm) // AVX2
2809 ASMJIT_INST_3x(vgatherdps, Vgatherdps, Ymm, Mem, Ymm) // AVX2
2810 ASMJIT_INST_2x(vgatherdps, Vgatherdps, Xmm, Mem) // AVX512_F{k}-VL
2811 ASMJIT_INST_2x(vgatherdps, Vgatherdps, Ymm, Mem) // AVX512_F{k}-VL
2812 ASMJIT_INST_2x(vgatherdps, Vgatherdps, Zmm, Mem) // AVX512_F{k}
2813 ASMJIT_INST_1x(vgatherpf0dpd, Vgatherpf0dpd, Mem) // AVX512_PF{k}
2814 ASMJIT_INST_1x(vgatherpf0dps, Vgatherpf0dps, Mem) // AVX512_PF{k}
2815 ASMJIT_INST_1x(vgatherpf0qpd, Vgatherpf0qpd, Mem) // AVX512_PF{k}
2816 ASMJIT_INST_1x(vgatherpf0qps, Vgatherpf0qps, Mem) // AVX512_PF{k}
2817 ASMJIT_INST_1x(vgatherpf1dpd, Vgatherpf1dpd, Mem) // AVX512_PF{k}
2818 ASMJIT_INST_1x(vgatherpf1dps, Vgatherpf1dps, Mem) // AVX512_PF{k}
2819 ASMJIT_INST_1x(vgatherpf1qpd, Vgatherpf1qpd, Mem) // AVX512_PF{k}
2820 ASMJIT_INST_1x(vgatherpf1qps, Vgatherpf1qps, Mem) // AVX512_PF{k}
2821 ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Xmm, Mem, Xmm) // AVX2
2822 ASMJIT_INST_3x(vgatherqpd, Vgatherqpd, Ymm, Mem, Ymm) // AVX2
2823 ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Xmm, Mem) // AVX512_F{k}-VL
2824 ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Ymm, Mem) // AVX512_F{k}-VL
2825 ASMJIT_INST_2x(vgatherqpd, Vgatherqpd, Zmm, Mem) // AVX512_F{k}
2826 ASMJIT_INST_3x(vgatherqps, Vgatherqps, Xmm, Mem, Xmm) // AVX2
2827 ASMJIT_INST_2x(vgatherqps, Vgatherqps, Xmm, Mem) // AVX512_F{k}-VL
2828 ASMJIT_INST_2x(vgatherqps, Vgatherqps, Ymm, Mem) // AVX512_F{k}-VL
2829 ASMJIT_INST_2x(vgatherqps, Vgatherqps, Zmm, Mem) // AVX512_F{k}
2830 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Xmm, Xmm) // AVX512_F{kz|b64}-VL
2831 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Xmm, Mem) // AVX512_F{kz|b64}-VL
2832 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Ymm, Ymm) // AVX512_F{kz|b64}-VL
2833 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Ymm, Mem) // AVX512_F{kz|b64}-VL
2834 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Zmm, Zmm) // AVX512_F{kz|sae|b64}
2835 ASMJIT_INST_2x(vgetexppd, Vgetexppd, Zmm, Mem) // AVX512_F{kz|sae|b64}
2836 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Xmm, Xmm) // AVX512_F{kz|b32}-VL
2837 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Xmm, Mem) // AVX512_F{kz|b32}-VL
2838 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Ymm, Ymm) // AVX512_F{kz|b32}-VL
2839 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Ymm, Mem) // AVX512_F{kz|b32}-VL
2840 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Zmm, Zmm) // AVX512_F{kz|sae|b32}
2841 ASMJIT_INST_2x(vgetexpps, Vgetexpps, Zmm, Mem) // AVX512_F{kz|sae|b32}
2842 ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Xmm) // AVX512_F{kz|sae}
2843 ASMJIT_INST_3x(vgetexpsd, Vgetexpsd, Xmm, Xmm, Mem) // AVX512_F{kz|sae}
2844 ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Xmm) // AVX512_F{kz|sae}
2845 ASMJIT_INST_3x(vgetexpss, Vgetexpss, Xmm, Xmm, Mem) // AVX512_F{kz|sae}
2846 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
2847 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
2848 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
2849 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
2850 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64}
2851 ASMJIT_INST_3i(vgetmantpd, Vgetmantpd, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64}
2852 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
2853 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
2854 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
2855 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
2856 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32}
2857 ASMJIT_INST_3i(vgetmantps, Vgetmantps, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32}
2858 ASMJIT_INST_4i(vgetmantsd, Vgetmantsd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2859 ASMJIT_INST_4i(vgetmantsd, Vgetmantsd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2860 ASMJIT_INST_4i(vgetmantss, Vgetmantss, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
2861 ASMJIT_INST_4i(vgetmantss, Vgetmantss, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
2862 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Xmm,Xmm,Xmm,Imm) // AVX AVX512_VL{kz} GFNI
2863 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Xmm,Xmm,Mem,Imm) // AVX AVX512_VL{kz} GFNI
2864 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Ymm,Ymm,Ymm,Imm) // AVX AVX512_VL{kz} GFNI
2865 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Ymm,Ymm,Mem,Imm) // AVX AVX512_VL{kz} GFNI
2866 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Zmm,Zmm,Zmm,Imm) // AVX512_VL{kz} GFNI
2867 ASMJIT_INST_4i(vgf2p8affineinvqb, Vgf2p8affineinvqb,Zmm,Zmm,Mem,Imm) // AVX512_VL{kz} GFNI
2868 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Xmm, Xmm, Xmm, Imm) // AVX AVX512_VL{kz} GFNI
2869 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Xmm, Xmm, Mem, Imm) // AVX AVX512_VL{kz} GFNI
2870 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Ymm, Ymm, Ymm, Imm) // AVX AVX512_VL{kz} GFNI
2871 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Ymm, Ymm, Mem, Imm) // AVX AVX512_VL{kz} GFNI
2872 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Zmm, Zmm, Zmm, Imm) // AVX512_VL{kz} GFNI
2873 ASMJIT_INST_4i(vgf2p8affineqb, Vgf2p8affineqb, Zmm, Zmm, Mem, Imm) // AVX512_VL{kz} GFNI
2874 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Xmm, Xmm, Xmm) // AVX AVX512_VL{kz} GFNI
2875 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Xmm, Xmm, Mem) // AVX AVX512_VL{kz} GFNI
2876 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Ymm, Ymm, Ymm) // AVX AVX512_VL{kz} GFNI
2877 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Ymm, Ymm, Mem) // AVX AVX512_VL{kz} GFNI
2878 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Zmm, Zmm, Zmm) // AVX512_VL{kz} GFNI
2879 ASMJIT_INST_3x(vgf2p8mulb, Vgf2p8mulb, Zmm, Zmm, Mem) // AVX512_VL{kz} GFNI
2880 ASMJIT_INST_3x(vhaddpd, Vhaddpd, Xmm, Xmm, Xmm) // AVX
2881 ASMJIT_INST_3x(vhaddpd, Vhaddpd, Xmm, Xmm, Mem) // AVX
2882 ASMJIT_INST_3x(vhaddpd, Vhaddpd, Ymm, Ymm, Ymm) // AVX
2883 ASMJIT_INST_3x(vhaddpd, Vhaddpd, Ymm, Ymm, Mem) // AVX
2884 ASMJIT_INST_3x(vhaddps, Vhaddps, Xmm, Xmm, Xmm) // AVX
2885 ASMJIT_INST_3x(vhaddps, Vhaddps, Xmm, Xmm, Mem) // AVX
2886 ASMJIT_INST_3x(vhaddps, Vhaddps, Ymm, Ymm, Ymm) // AVX
2887 ASMJIT_INST_3x(vhaddps, Vhaddps, Ymm, Ymm, Mem) // AVX
2888 ASMJIT_INST_3x(vhsubpd, Vhsubpd, Xmm, Xmm, Xmm) // AVX
2889 ASMJIT_INST_3x(vhsubpd, Vhsubpd, Xmm, Xmm, Mem) // AVX
2890 ASMJIT_INST_3x(vhsubpd, Vhsubpd, Ymm, Ymm, Ymm) // AVX
2891 ASMJIT_INST_3x(vhsubpd, Vhsubpd, Ymm, Ymm, Mem) // AVX
2892 ASMJIT_INST_3x(vhsubps, Vhsubps, Xmm, Xmm, Xmm) // AVX
2893 ASMJIT_INST_3x(vhsubps, Vhsubps, Xmm, Xmm, Mem) // AVX
2894 ASMJIT_INST_3x(vhsubps, Vhsubps, Ymm, Ymm, Ymm) // AVX
2895 ASMJIT_INST_3x(vhsubps, Vhsubps, Ymm, Ymm, Mem) // AVX
2896 ASMJIT_INST_4i(vinsertf128, Vinsertf128, Ymm, Ymm, Xmm, Imm) // AVX
2897 ASMJIT_INST_4i(vinsertf128, Vinsertf128, Ymm, Ymm, Mem, Imm) // AVX
2898 ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Ymm, Ymm, Xmm, Imm) // AVX512_F{kz}-VL
2899 ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz}-VL
2900 ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Zmm, Zmm, Xmm, Imm) // AVX512_F{kz}
2901 ASMJIT_INST_4i(vinsertf32x4, Vinsertf32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz}
2902 ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Zmm, Zmm, Ymm, Imm) // AVX512_DQ{kz}
2903 ASMJIT_INST_4i(vinsertf32x8, Vinsertf32x8, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz}
2904 ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Ymm, Ymm, Xmm, Imm) // AVX512_DQ{kz}-VL
2905 ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz}-VL
2906 ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Zmm, Zmm, Xmm, Imm) // AVX512_DQ{kz}
2907 ASMJIT_INST_4i(vinsertf64x2, Vinsertf64x2, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz}
2908 ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Zmm, Zmm, Ymm, Imm) // AVX512_F{kz}
2909 ASMJIT_INST_4i(vinsertf64x4, Vinsertf64x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz}
2910 ASMJIT_INST_4i(vinserti128, Vinserti128, Ymm, Ymm, Xmm, Imm) // AVX2
2911 ASMJIT_INST_4i(vinserti128, Vinserti128, Ymm, Ymm, Mem, Imm) // AVX2
2912 ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Ymm, Ymm, Xmm, Imm) // AVX512_F{kz}-VL
2913 ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz}-VL
2914 ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Zmm, Zmm, Xmm, Imm) // AVX512_F{kz}
2915 ASMJIT_INST_4i(vinserti32x4, Vinserti32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz}
2916 ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Zmm, Zmm, Ymm, Imm) // AVX512_DQ{kz}
2917 ASMJIT_INST_4i(vinserti32x8, Vinserti32x8, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz}
2918 ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Ymm, Ymm, Xmm, Imm) // AVX512_DQ{kz}-VL
2919 ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz}-VL
2920 ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Zmm, Zmm, Xmm, Imm) // AVX512_DQ{kz}
2921 ASMJIT_INST_4i(vinserti64x2, Vinserti64x2, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz}
2922 ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Zmm, Zmm, Ymm, Imm) // AVX512_F{kz}
2923 ASMJIT_INST_4i(vinserti64x4, Vinserti64x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz}
2924 ASMJIT_INST_4i(vinsertps, Vinsertps, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F
2925 ASMJIT_INST_4i(vinsertps, Vinsertps, Xmm, Xmm, Mem, Imm) // AVX AVX512_F
2926 ASMJIT_INST_2x(vlddqu, Vlddqu, Xmm, Mem) // AVX
2927 ASMJIT_INST_2x(vlddqu, Vlddqu, Ymm, Mem) // AVX
2928 ASMJIT_INST_1x(vldmxcsr, Vldmxcsr, Mem) // AVX
2929 ASMJIT_INST_3x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm, DS_ZDI) // AVX [EXPLICIT]
2930 ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Xmm, Xmm) // AVX
2931 ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Mem, Ymm, Ymm) // AVX
2932 ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Xmm, Xmm, Mem) // AVX
2933 ASMJIT_INST_3x(vmaskmovpd, Vmaskmovpd, Ymm, Ymm, Mem) // AVX
2934 ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Xmm, Xmm) // AVX
2935 ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Mem, Ymm, Ymm) // AVX
2936 ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Xmm, Xmm, Mem) // AVX
2937 ASMJIT_INST_3x(vmaskmovps, Vmaskmovps, Ymm, Ymm, Mem) // AVX
2938 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2939 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2940 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
2941 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
2942 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b64}
2943 ASMJIT_INST_3x(vmaxpd, Vmaxpd, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b64}
2944 ASMJIT_INST_3x(vmaxps, Vmaxps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2945 ASMJIT_INST_3x(vmaxps, Vmaxps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2946 ASMJIT_INST_3x(vmaxps, Vmaxps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2947 ASMJIT_INST_3x(vmaxps, Vmaxps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2948 ASMJIT_INST_3x(vmaxps, Vmaxps, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b32}
2949 ASMJIT_INST_3x(vmaxps, Vmaxps, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b32}
2950 ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL
2951 ASMJIT_INST_3x(vmaxsd, Vmaxsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL
2952 ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL
2953 ASMJIT_INST_3x(vmaxss, Vmaxss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL
2954 ASMJIT_INST_3x(vminpd, Vminpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
2955 ASMJIT_INST_3x(vminpd, Vminpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
2956 ASMJIT_INST_3x(vminpd, Vminpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
2957 ASMJIT_INST_3x(vminpd, Vminpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
2958 ASMJIT_INST_3x(vminpd, Vminpd, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b64}
2959 ASMJIT_INST_3x(vminpd, Vminpd, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b64}
2960 ASMJIT_INST_3x(vminps, Vminps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
2961 ASMJIT_INST_3x(vminps, Vminps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
2962 ASMJIT_INST_3x(vminps, Vminps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
2963 ASMJIT_INST_3x(vminps, Vminps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
2964 ASMJIT_INST_3x(vminps, Vminps, Zmm, Zmm, Zmm) // AVX512_F{kz|sae|b32}
2965 ASMJIT_INST_3x(vminps, Vminps, Zmm, Zmm, Mem) // AVX512_F{kz|sae|b32}
2966 ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL
2967 ASMJIT_INST_3x(vminsd, Vminsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL
2968 ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|sae}-VL
2969 ASMJIT_INST_3x(vminss, Vminss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|sae}-VL
2970 ASMJIT_INST_2x(vmovapd, Vmovapd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
2971 ASMJIT_INST_2x(vmovapd, Vmovapd, Xmm, Mem) // AVX AVX512_F{kz}-VL
2972 ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Xmm) // AVX AVX512_F{kz}-VL
2973 ASMJIT_INST_2x(vmovapd, Vmovapd, Ymm, Ymm) // AVX AVX512_F{kz}-VL
2974 ASMJIT_INST_2x(vmovapd, Vmovapd, Ymm, Mem) // AVX AVX512_F{kz}-VL
2975 ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Ymm) // AVX AVX512_F{kz}-VL
2976 ASMJIT_INST_2x(vmovapd, Vmovapd, Zmm, Zmm) // AVX512_F{kz}
2977 ASMJIT_INST_2x(vmovapd, Vmovapd, Zmm, Mem) // AVX512_F{kz}
2978 ASMJIT_INST_2x(vmovapd, Vmovapd, Mem, Zmm) // AVX512_F{kz}
2979 ASMJIT_INST_2x(vmovaps, Vmovaps, Xmm, Xmm) // AVX AVX512_F{kz}-VL
2980 ASMJIT_INST_2x(vmovaps, Vmovaps, Xmm, Mem) // AVX AVX512_F{kz}-VL
2981 ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Xmm) // AVX AVX512_F{kz}-VL
2982 ASMJIT_INST_2x(vmovaps, Vmovaps, Ymm, Ymm) // AVX AVX512_F{kz}-VL
2983 ASMJIT_INST_2x(vmovaps, Vmovaps, Ymm, Mem) // AVX AVX512_F{kz}-VL
2984 ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Ymm) // AVX AVX512_F{kz}-VL
2985 ASMJIT_INST_2x(vmovaps, Vmovaps, Zmm, Zmm) // AVX512_F{kz}
2986 ASMJIT_INST_2x(vmovaps, Vmovaps, Zmm, Mem) // AVX512_F{kz}
2987 ASMJIT_INST_2x(vmovaps, Vmovaps, Mem, Zmm) // AVX512_F{kz}
2988 ASMJIT_INST_2x(vmovd, Vmovd, Gp, Xmm) // AVX AVX512_F
2989 ASMJIT_INST_2x(vmovd, Vmovd, Mem, Xmm) // AVX AVX512_F
2990 ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Gp) // AVX AVX512_F
2991 ASMJIT_INST_2x(vmovd, Vmovd, Xmm, Mem) // AVX AVX512_F
2992 ASMJIT_INST_2x(vmovddup, Vmovddup, Xmm, Xmm) // AVX AVX512_F{kz}-VL
2993 ASMJIT_INST_2x(vmovddup, Vmovddup, Xmm, Mem) // AVX AVX512_F{kz}-VL
2994 ASMJIT_INST_2x(vmovddup, Vmovddup, Ymm, Ymm) // AVX AVX512_F{kz}-VL
2995 ASMJIT_INST_2x(vmovddup, Vmovddup, Ymm, Mem) // AVX AVX512_F{kz}-VL
2996 ASMJIT_INST_2x(vmovddup, Vmovddup, Zmm, Zmm) // AVX512_F{kz}
2997 ASMJIT_INST_2x(vmovddup, Vmovddup, Zmm, Mem) // AVX512_F{kz}
2998 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Xmm, Xmm) // AVX
2999 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Xmm, Mem) // AVX
3000 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Xmm) // AVX
3001 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Ymm, Ymm) // AVX
3002 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Ymm, Mem) // AVX
3003 ASMJIT_INST_2x(vmovdqa, Vmovdqa, Mem, Ymm) // AVX
3004 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Xmm, Xmm) // AVX512_F{kz}-VL
3005 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Xmm, Mem) // AVX512_F{kz}-VL
3006 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Xmm) // AVX512_F{kz}-VL
3007 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Ymm, Ymm) // AVX512_F{kz}-VL
3008 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Ymm, Mem) // AVX512_F{kz}-VL
3009 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Ymm) // AVX512_F{kz}-VL
3010 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Zmm, Zmm) // AVX512_F{kz}
3011 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Zmm, Mem) // AVX512_F{kz}
3012 ASMJIT_INST_2x(vmovdqa32, Vmovdqa32, Mem, Zmm) // AVX512_F{kz}
3013 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Xmm, Xmm) // AVX512_F{kz}-VL
3014 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Xmm, Mem) // AVX512_F{kz}-VL
3015 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Xmm) // AVX512_F{kz}-VL
3016 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Ymm, Ymm) // AVX512_F{kz}-VL
3017 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Ymm, Mem) // AVX512_F{kz}-VL
3018 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Ymm) // AVX512_F{kz}-VL
3019 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Zmm, Zmm) // AVX512_F{kz}
3020 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Zmm, Mem) // AVX512_F{kz}
3021 ASMJIT_INST_2x(vmovdqa64, Vmovdqa64, Mem, Zmm) // AVX512_F{kz}
3022 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Xmm, Xmm) // AVX
3023 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Xmm, Mem) // AVX
3024 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Xmm) // AVX
3025 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Ymm, Ymm) // AVX
3026 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Ymm, Mem) // AVX
3027 ASMJIT_INST_2x(vmovdqu, Vmovdqu, Mem, Ymm) // AVX
3028 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Xmm, Xmm) // AVX512_BW{kz}-VL
3029 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Xmm, Mem) // AVX512_BW{kz}-VL
3030 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Xmm) // AVX512_BW{kz}-VL
3031 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Ymm, Ymm) // AVX512_BW{kz}-VL
3032 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Ymm, Mem) // AVX512_BW{kz}-VL
3033 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Ymm) // AVX512_BW{kz}-VL
3034 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Zmm, Zmm) // AVX512_BW{kz}
3035 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Zmm, Mem) // AVX512_BW{kz}
3036 ASMJIT_INST_2x(vmovdqu16, Vmovdqu16, Mem, Zmm) // AVX512_BW{kz}
3037 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Xmm, Xmm) // AVX512_F{kz}-VL
3038 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Xmm, Mem) // AVX512_F{kz}-VL
3039 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Xmm) // AVX512_F{kz}-VL
3040 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Ymm, Ymm) // AVX512_F{kz}-VL
3041 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Ymm, Mem) // AVX512_F{kz}-VL
3042 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Ymm) // AVX512_F{kz}-VL
3043 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Zmm, Zmm) // AVX512_F{kz}
3044 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Zmm, Mem) // AVX512_F{kz}
3045 ASMJIT_INST_2x(vmovdqu32, Vmovdqu32, Mem, Zmm) // AVX512_F{kz}
3046 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Xmm, Xmm) // AVX512_F{kz}-VL
3047 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Xmm, Mem) // AVX512_F{kz}-VL
3048 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Xmm) // AVX512_F{kz}-VL
3049 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Ymm, Ymm) // AVX512_F{kz}-VL
3050 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Ymm, Mem) // AVX512_F{kz}-VL
3051 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Ymm) // AVX512_F{kz}-VL
3052 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Zmm, Zmm) // AVX512_F{kz}
3053 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Zmm, Mem) // AVX512_F{kz}
3054 ASMJIT_INST_2x(vmovdqu64, Vmovdqu64, Mem, Zmm) // AVX512_F{kz}
3055 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Xmm, Xmm) // AVX512_BW{kz}-VL
3056 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Xmm, Mem) // AVX512_BW{kz}-VL
3057 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Xmm) // AVX512_BW{kz}-VL
3058 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Ymm, Ymm) // AVX512_BW{kz}-VL
3059 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Ymm, Mem) // AVX512_BW{kz}-VL
3060 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Ymm) // AVX512_BW{kz}-VL
3061 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Zmm, Zmm) // AVX512_BW{kz}
3062 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Zmm, Mem) // AVX512_BW{kz}
3063 ASMJIT_INST_2x(vmovdqu8, Vmovdqu8, Mem, Zmm) // AVX512_BW{kz}
3064 ASMJIT_INST_3x(vmovhlps, Vmovhlps, Xmm, Xmm, Xmm) // AVX AVX512_F
3065 ASMJIT_INST_2x(vmovhpd, Vmovhpd, Mem, Xmm) // AVX AVX512_F
3066 ASMJIT_INST_3x(vmovhpd, Vmovhpd, Xmm, Xmm, Mem) // AVX AVX512_F
3067 ASMJIT_INST_2x(vmovhps, Vmovhps, Mem, Xmm) // AVX AVX512_F
3068 ASMJIT_INST_3x(vmovhps, Vmovhps, Xmm, Xmm, Mem) // AVX AVX512_F
3069 ASMJIT_INST_3x(vmovlhps, Vmovlhps, Xmm, Xmm, Xmm) // AVX AVX512_F
3070 ASMJIT_INST_2x(vmovlpd, Vmovlpd, Mem, Xmm) // AVX AVX512_F
3071 ASMJIT_INST_3x(vmovlpd, Vmovlpd, Xmm, Xmm, Mem) // AVX AVX512_F
3072 ASMJIT_INST_2x(vmovlps, Vmovlps, Mem, Xmm) // AVX AVX512_F
3073 ASMJIT_INST_3x(vmovlps, Vmovlps, Xmm, Xmm, Mem) // AVX AVX512_F
3074 ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Xmm) // AVX
3075 ASMJIT_INST_2x(vmovmskpd, Vmovmskpd, Gp, Ymm) // AVX
3076 ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Xmm) // AVX
3077 ASMJIT_INST_2x(vmovmskps, Vmovmskps, Gp, Ymm) // AVX
3078 ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Xmm) // AVX AVX512_F-VL
3079 ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Ymm) // AVX AVX512_F-VL
3080 ASMJIT_INST_2x(vmovntdq, Vmovntdq, Mem, Zmm) // AVX512_F
3081 ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Xmm, Mem) // AVX AVX512_F-VL
3082 ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Ymm, Mem) // AVX2 AVX512_F-VL
3083 ASMJIT_INST_2x(vmovntdqa, Vmovntdqa, Zmm, Mem) // AVX512_F
3084 ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Xmm) // AVX AVX512_F-VL
3085 ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Ymm) // AVX AVX512_F-VL
3086 ASMJIT_INST_2x(vmovntpd, Vmovntpd, Mem, Zmm) // AVX512_F
3087 ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Xmm) // AVX AVX512_F-VL
3088 ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Ymm) // AVX AVX512_F-VL
3089 ASMJIT_INST_2x(vmovntps, Vmovntps, Mem, Zmm) // AVX512_F
3090 ASMJIT_INST_2x(vmovq, Vmovq, Gp, Xmm) // AVX AVX512_F
3091 ASMJIT_INST_2x(vmovq, Vmovq, Mem, Xmm) // AVX AVX512_F
3092 ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Mem) // AVX AVX512_F
3093 ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Gp) // AVX AVX512_F
3094 ASMJIT_INST_2x(vmovq, Vmovq, Xmm, Xmm) // AVX AVX512_F
3095 ASMJIT_INST_2x(vmovsd, Vmovsd, Mem, Xmm) // AVX AVX512_F
3096 ASMJIT_INST_2x(vmovsd, Vmovsd, Xmm, Mem) // AVX AVX512_F{kz}
3097 ASMJIT_INST_3x(vmovsd, Vmovsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}
3098 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Xmm, Xmm) // AVX AVX512_F{kz}-VL
3099 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Xmm, Mem) // AVX AVX512_F{kz}-VL
3100 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Ymm, Ymm) // AVX AVX512_F{kz}-VL
3101 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Ymm, Mem) // AVX AVX512_F{kz}-VL
3102 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Zmm, Zmm) // AVX512_F{kz}
3103 ASMJIT_INST_2x(vmovshdup, Vmovshdup, Zmm, Mem) // AVX512_F{kz}
3104 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Xmm, Xmm) // AVX AVX512_F{kz}-VL
3105 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Xmm, Mem) // AVX AVX512_F{kz}-VL
3106 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Ymm, Ymm) // AVX AVX512_F{kz}-VL
3107 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Ymm, Mem) // AVX AVX512_F{kz}-VL
3108 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Zmm, Zmm) // AVX512_F{kz}
3109 ASMJIT_INST_2x(vmovsldup, Vmovsldup, Zmm, Mem) // AVX512_F{kz}
3110 ASMJIT_INST_2x(vmovss, Vmovss, Mem, Xmm) // AVX AVX512_F
3111 ASMJIT_INST_2x(vmovss, Vmovss, Xmm, Mem) // AVX AVX512_F{kz}
3112 ASMJIT_INST_3x(vmovss, Vmovss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}
3113 ASMJIT_INST_2x(vmovupd, Vmovupd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
3114 ASMJIT_INST_2x(vmovupd, Vmovupd, Xmm, Mem) // AVX AVX512_F{kz}-VL
3115 ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Xmm) // AVX AVX512_F{kz}-VL
3116 ASMJIT_INST_2x(vmovupd, Vmovupd, Ymm, Ymm) // AVX AVX512_F{kz}-VL
3117 ASMJIT_INST_2x(vmovupd, Vmovupd, Ymm, Mem) // AVX AVX512_F{kz}-VL
3118 ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Ymm) // AVX AVX512_F{kz}-VL
3119 ASMJIT_INST_2x(vmovupd, Vmovupd, Zmm, Zmm) // AVX512_F{kz}
3120 ASMJIT_INST_2x(vmovupd, Vmovupd, Zmm, Mem) // AVX512_F{kz}
3121 ASMJIT_INST_2x(vmovupd, Vmovupd, Mem, Zmm) // AVX512_F{kz}
3122 ASMJIT_INST_2x(vmovups, Vmovups, Xmm, Xmm) // AVX AVX512_F{kz}-VL
3123 ASMJIT_INST_2x(vmovups, Vmovups, Xmm, Mem) // AVX AVX512_F{kz}-VL
3124 ASMJIT_INST_2x(vmovups, Vmovups, Mem, Xmm) // AVX AVX512_F{kz}-VL
3125 ASMJIT_INST_2x(vmovups, Vmovups, Ymm, Ymm) // AVX AVX512_F{kz}-VL
3126 ASMJIT_INST_2x(vmovups, Vmovups, Ymm, Mem) // AVX AVX512_F{kz}-VL
3127 ASMJIT_INST_2x(vmovups, Vmovups, Mem, Ymm) // AVX AVX512_F{kz}-VL
3128 ASMJIT_INST_2x(vmovups, Vmovups, Zmm, Zmm) // AVX512_F{kz}
3129 ASMJIT_INST_2x(vmovups, Vmovups, Zmm, Mem) // AVX512_F{kz}
3130 ASMJIT_INST_2x(vmovups, Vmovups, Mem, Zmm) // AVX512_F{kz}
3131 ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Xmm, Xmm, Xmm, Imm) // AVX
3132 ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Xmm, Xmm, Mem, Imm) // AVX
3133 ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Ymm, Ymm, Ymm, Imm) // AVX2
3134 ASMJIT_INST_4i(vmpsadbw, Vmpsadbw, Ymm, Ymm, Mem, Imm) // AVX2
3135 ASMJIT_INST_3x(vmulpd, Vmulpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
3136 ASMJIT_INST_3x(vmulpd, Vmulpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
3137 ASMJIT_INST_3x(vmulpd, Vmulpd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
3138 ASMJIT_INST_3x(vmulpd, Vmulpd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
3139 ASMJIT_INST_3x(vmulpd, Vmulpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64}
3140 ASMJIT_INST_3x(vmulpd, Vmulpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64}
3141 ASMJIT_INST_3x(vmulps, Vmulps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3142 ASMJIT_INST_3x(vmulps, Vmulps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3143 ASMJIT_INST_3x(vmulps, Vmulps, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3144 ASMJIT_INST_3x(vmulps, Vmulps, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3145 ASMJIT_INST_3x(vmulps, Vmulps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32}
3146 ASMJIT_INST_3x(vmulps, Vmulps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32}
3147 ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
3148 ASMJIT_INST_3x(vmulsd, Vmulsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
3149 ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
3150 ASMJIT_INST_3x(vmulss, Vmulss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
3151 ASMJIT_INST_3x(vorpd, Vorpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL
3152 ASMJIT_INST_3x(vorpd, Vorpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL
3153 ASMJIT_INST_3x(vorpd, Vorpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL
3154 ASMJIT_INST_3x(vorpd, Vorpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL
3155 ASMJIT_INST_3x(vorpd, Vorpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64}
3156 ASMJIT_INST_3x(vorpd, Vorpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64}
3157 ASMJIT_INST_3x(vorps, Vorps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3158 ASMJIT_INST_3x(vorps, Vorps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3159 ASMJIT_INST_3x(vorps, Vorps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
3160 ASMJIT_INST_3x(vorps, Vorps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
3161 ASMJIT_INST_3x(vorps, Vorps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3162 ASMJIT_INST_3x(vorps, Vorps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3163 ASMJIT_INST_6x(vp4dpwssd, Vp4dpwssd, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
3164 ASMJIT_INST_6x(vp4dpwssds, Vp4dpwssds, Zmm, Zmm, Zmm, Zmm, Zmm, Mem) // AVX512_4FMAPS{kz}
3165 ASMJIT_INST_2x(vpabsb, Vpabsb, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3166 ASMJIT_INST_2x(vpabsb, Vpabsb, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3167 ASMJIT_INST_2x(vpabsb, Vpabsb, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3168 ASMJIT_INST_2x(vpabsb, Vpabsb, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3169 ASMJIT_INST_2x(vpabsb, Vpabsb, Zmm, Zmm) // AVX512_BW{kz}
3170 ASMJIT_INST_2x(vpabsb, Vpabsb, Zmm, Mem) // AVX512_BW{kz}
3171 ASMJIT_INST_2x(vpabsd, Vpabsd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
3172 ASMJIT_INST_2x(vpabsd, Vpabsd, Xmm, Mem) // AVX AVX512_F{kz}-VL
3173 ASMJIT_INST_2x(vpabsd, Vpabsd, Ymm, Ymm) // AVX2 AVX512_F{kz}-VL
3174 ASMJIT_INST_2x(vpabsd, Vpabsd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
3175 ASMJIT_INST_2x(vpabsd, Vpabsd, Zmm, Zmm) // AVX512_F{kz}
3176 ASMJIT_INST_2x(vpabsd, Vpabsd, Zmm, Mem) // AVX512_F{kz}
3177 ASMJIT_INST_2x(vpabsq, Vpabsq, Xmm, Xmm) // AVX512_F{kz}-VL
3178 ASMJIT_INST_2x(vpabsq, Vpabsq, Xmm, Mem) // AVX512_F{kz}-VL
3179 ASMJIT_INST_2x(vpabsq, Vpabsq, Ymm, Ymm) // AVX512_F{kz}-VL
3180 ASMJIT_INST_2x(vpabsq, Vpabsq, Ymm, Mem) // AVX512_F{kz}-VL
3181 ASMJIT_INST_2x(vpabsq, Vpabsq, Zmm, Zmm) // AVX512_F{kz}
3182 ASMJIT_INST_2x(vpabsq, Vpabsq, Zmm, Mem) // AVX512_F{kz}
3183 ASMJIT_INST_2x(vpabsw, Vpabsw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3184 ASMJIT_INST_2x(vpabsw, Vpabsw, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3185 ASMJIT_INST_2x(vpabsw, Vpabsw, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3186 ASMJIT_INST_2x(vpabsw, Vpabsw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3187 ASMJIT_INST_2x(vpabsw, Vpabsw, Zmm, Zmm) // AVX512_BW{kz}
3188 ASMJIT_INST_2x(vpabsw, Vpabsw, Zmm, Mem) // AVX512_BW{kz}
3189 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz|b32}-VL
3190 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz|b32}-VL
3191 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz|b32}-VL
3192 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz|b32}-VL
3193 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Zmm, Zmm, Zmm) // AVX512_BW{kz|b32}
3194 ASMJIT_INST_3x(vpackssdw, Vpackssdw, Zmm, Zmm, Mem) // AVX512_BW{kz|b32}
3195 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3196 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3197 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3198 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3199 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3200 ASMJIT_INST_3x(vpacksswb, Vpacksswb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3201 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz|b32}-VL
3202 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz|b32}-VL
3203 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz|b32}-VL
3204 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz|b32}-VL
3205 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Zmm, Zmm, Zmm) // AVX512_BW{kz|b32}
3206 ASMJIT_INST_3x(vpackusdw, Vpackusdw, Zmm, Zmm, Mem) // AVX512_BW{kz|b32}
3207 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3208 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3209 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3210 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3211 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3212 ASMJIT_INST_3x(vpackuswb, Vpackuswb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3213 ASMJIT_INST_3x(vpaddb, Vpaddb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3214 ASMJIT_INST_3x(vpaddb, Vpaddb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3215 ASMJIT_INST_3x(vpaddb, Vpaddb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3216 ASMJIT_INST_3x(vpaddb, Vpaddb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3217 ASMJIT_INST_3x(vpaddb, Vpaddb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3218 ASMJIT_INST_3x(vpaddb, Vpaddb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3219 ASMJIT_INST_3x(vpaddd, Vpaddd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3220 ASMJIT_INST_3x(vpaddd, Vpaddd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3221 ASMJIT_INST_3x(vpaddd, Vpaddd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3222 ASMJIT_INST_3x(vpaddd, Vpaddd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3223 ASMJIT_INST_3x(vpaddd, Vpaddd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3224 ASMJIT_INST_3x(vpaddd, Vpaddd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3225 ASMJIT_INST_3x(vpaddq, Vpaddq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
3226 ASMJIT_INST_3x(vpaddq, Vpaddq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
3227 ASMJIT_INST_3x(vpaddq, Vpaddq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
3228 ASMJIT_INST_3x(vpaddq, Vpaddq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
3229 ASMJIT_INST_3x(vpaddq, Vpaddq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3230 ASMJIT_INST_3x(vpaddq, Vpaddq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3231 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3232 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3233 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3234 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3235 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3236 ASMJIT_INST_3x(vpaddsb, Vpaddsb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3237 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3238 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3239 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3240 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3241 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3242 ASMJIT_INST_3x(vpaddsw, Vpaddsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3243 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3244 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3245 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3246 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3247 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3248 ASMJIT_INST_3x(vpaddusb, Vpaddusb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3249 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3250 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3251 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3252 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3253 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3254 ASMJIT_INST_3x(vpaddusw, Vpaddusw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3255 ASMJIT_INST_3x(vpaddw, Vpaddw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3256 ASMJIT_INST_3x(vpaddw, Vpaddw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3257 ASMJIT_INST_3x(vpaddw, Vpaddw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3258 ASMJIT_INST_3x(vpaddw, Vpaddw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3259 ASMJIT_INST_3x(vpaddw, Vpaddw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3260 ASMJIT_INST_3x(vpaddw, Vpaddw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3261 ASMJIT_INST_4i(vpalignr, Vpalignr, Xmm, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
3262 ASMJIT_INST_4i(vpalignr, Vpalignr, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL
3263 ASMJIT_INST_4i(vpalignr, Vpalignr, Ymm, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
3264 ASMJIT_INST_4i(vpalignr, Vpalignr, Ymm, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL
3265 ASMJIT_INST_4i(vpalignr, Vpalignr, Zmm, Zmm, Zmm, Imm) // AVX512_BW{kz}
3266 ASMJIT_INST_4i(vpalignr, Vpalignr, Zmm, Zmm, Mem, Imm) // AVX512_BW{kz}
3267 ASMJIT_INST_3x(vpand, Vpand, Xmm, Xmm, Xmm) // AVX
3268 ASMJIT_INST_3x(vpand, Vpand, Xmm, Xmm, Mem) // AVX
3269 ASMJIT_INST_3x(vpand, Vpand, Ymm, Ymm, Ymm) // AVX2
3270 ASMJIT_INST_3x(vpand, Vpand, Ymm, Ymm, Mem) // AVX2
3271 ASMJIT_INST_3x(vpandd, Vpandd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3272 ASMJIT_INST_3x(vpandd, Vpandd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3273 ASMJIT_INST_3x(vpandd, Vpandd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3274 ASMJIT_INST_3x(vpandd, Vpandd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3275 ASMJIT_INST_3x(vpandd, Vpandd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3276 ASMJIT_INST_3x(vpandd, Vpandd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3277 ASMJIT_INST_3x(vpandn, Vpandn, Xmm, Xmm, Xmm) // AVX
3278 ASMJIT_INST_3x(vpandn, Vpandn, Xmm, Xmm, Mem) // AVX
3279 ASMJIT_INST_3x(vpandn, Vpandn, Ymm, Ymm, Ymm) // AVX2
3280 ASMJIT_INST_3x(vpandn, Vpandn, Ymm, Ymm, Mem) // AVX2
3281 ASMJIT_INST_3x(vpandnd, Vpandnd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3282 ASMJIT_INST_3x(vpandnd, Vpandnd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3283 ASMJIT_INST_3x(vpandnd, Vpandnd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3284 ASMJIT_INST_3x(vpandnd, Vpandnd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3285 ASMJIT_INST_3x(vpandnd, Vpandnd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3286 ASMJIT_INST_3x(vpandnd, Vpandnd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3287 ASMJIT_INST_3x(vpandnq, Vpandnq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3288 ASMJIT_INST_3x(vpandnq, Vpandnq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3289 ASMJIT_INST_3x(vpandnq, Vpandnq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3290 ASMJIT_INST_3x(vpandnq, Vpandnq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3291 ASMJIT_INST_3x(vpandnq, Vpandnq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3292 ASMJIT_INST_3x(vpandnq, Vpandnq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3293 ASMJIT_INST_3x(vpandq, Vpandq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3294 ASMJIT_INST_3x(vpandq, Vpandq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3295 ASMJIT_INST_3x(vpandq, Vpandq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3296 ASMJIT_INST_3x(vpandq, Vpandq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3297 ASMJIT_INST_3x(vpandq, Vpandq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3298 ASMJIT_INST_3x(vpandq, Vpandq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3299 ASMJIT_INST_3x(vpavgb, Vpavgb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3300 ASMJIT_INST_3x(vpavgb, Vpavgb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3301 ASMJIT_INST_3x(vpavgb, Vpavgb, Ymm, Ymm, Ymm) // AVX AVX512_BW{kz}-VL
3302 ASMJIT_INST_3x(vpavgb, Vpavgb, Ymm, Ymm, Mem) // AVX AVX512_BW{kz}-VL
3303 ASMJIT_INST_3x(vpavgb, Vpavgb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3304 ASMJIT_INST_3x(vpavgb, Vpavgb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3305 ASMJIT_INST_3x(vpavgw, Vpavgw, Xmm, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL
3306 ASMJIT_INST_3x(vpavgw, Vpavgw, Xmm, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL
3307 ASMJIT_INST_3x(vpavgw, Vpavgw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3308 ASMJIT_INST_3x(vpavgw, Vpavgw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3309 ASMJIT_INST_3x(vpavgw, Vpavgw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3310 ASMJIT_INST_3x(vpavgw, Vpavgw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3311 ASMJIT_INST_4i(vpblendd, Vpblendd, Xmm, Xmm, Xmm, Imm) // AVX2
3312 ASMJIT_INST_4i(vpblendd, Vpblendd, Xmm, Xmm, Mem, Imm) // AVX2
3313 ASMJIT_INST_4i(vpblendd, Vpblendd, Ymm, Ymm, Ymm, Imm) // AVX2
3314 ASMJIT_INST_4i(vpblendd, Vpblendd, Ymm, Ymm, Mem, Imm) // AVX2
3315 ASMJIT_INST_4x(vpblendvb, Vpblendvb, Xmm, Xmm, Xmm, Xmm) // AVX
3316 ASMJIT_INST_4x(vpblendvb, Vpblendvb, Xmm, Xmm, Mem, Xmm) // AVX
3317 ASMJIT_INST_4x(vpblendvb, Vpblendvb, Ymm, Ymm, Ymm, Ymm) // AVX2
3318 ASMJIT_INST_4x(vpblendvb, Vpblendvb, Ymm, Ymm, Mem, Ymm) // AVX2
3319 ASMJIT_INST_4i(vpblendw, Vpblendw, Xmm, Xmm, Xmm, Imm) // AVX
3320 ASMJIT_INST_4i(vpblendw, Vpblendw, Xmm, Xmm, Mem, Imm) // AVX
3321 ASMJIT_INST_4i(vpblendw, Vpblendw, Ymm, Ymm, Ymm, Imm) // AVX2
3322 ASMJIT_INST_4i(vpblendw, Vpblendw, Ymm, Ymm, Mem, Imm) // AVX2
3323 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL
3324 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL
3325 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
3326 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3327 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Xmm, Gp) // AVX512_BW{kz}-VL
3328 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Ymm, Gp) // AVX512_BW{kz}-VL
3329 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Gp) // AVX512_BW{kz}
3330 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Xmm) // AVX512_BW{kz}
3331 ASMJIT_INST_2x(vpbroadcastb, Vpbroadcastb, Zmm, Mem) // AVX512_BW{kz}
3332 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL
3333 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Mem) // AVX2 AVX512_F{kz}-VL
3334 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
3335 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
3336 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Xmm, Gp) // AVX512_F{kz}-VL
3337 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Ymm, Gp) // AVX512_F{kz}-VL
3338 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Gp) // AVX512_F{kz}
3339 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Xmm) // AVX512_F{kz}
3340 ASMJIT_INST_2x(vpbroadcastd, Vpbroadcastd, Zmm, Mem) // AVX512_F{kz}
3341 ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Xmm, KReg) // AVX512_CD-VL
3342 ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Ymm, KReg) // AVX512_CD-VL
3343 ASMJIT_INST_2x(vpbroadcastmb2d, Vpbroadcastmb2d, Zmm, KReg) // AVX512_CD
3344 ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Xmm, KReg) // AVX512_CD-VL
3345 ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Ymm, KReg) // AVX512_CD-VL
3346 ASMJIT_INST_2x(vpbroadcastmb2q, Vpbroadcastmb2q, Zmm, KReg) // AVX512_CD
3347 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Xmm) // AVX2 AVX512_F{kz}-VL
3348 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Mem) // AVX2 AVX512_F{kz}-VL
3349 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
3350 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
3351 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Xmm, Gp) // AVX512_F{kz}-VL
3352 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Ymm, Gp) // AVX512_F{kz}-VL
3353 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Gp) // AVX512_F{kz}
3354 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Xmm) // AVX512_F{kz}
3355 ASMJIT_INST_2x(vpbroadcastq, Vpbroadcastq, Zmm, Mem) // AVX512_F{kz}
3356 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Xmm) // AVX2 AVX512_BW{kz}-VL
3357 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Mem) // AVX2 AVX512_BW{kz}-VL
3358 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
3359 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3360 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Xmm, Gp) // AVX512_BW{kz}-VL
3361 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Ymm, Gp) // AVX512_BW{kz}-VL
3362 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Gp) // AVX512_BW{kz}
3363 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Xmm) // AVX512_BW{kz}
3364 ASMJIT_INST_2x(vpbroadcastw, Vpbroadcastw, Zmm, Mem) // AVX512_BW{kz}
3365 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F-VL
3366 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Xmm, Xmm, Mem, Imm) // AVX AVX512_F-VL
3367 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Ymm, Ymm, Ymm, Imm) // AVX512_F-VL VPCLMULQDQ
3368 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Ymm, Ymm, Mem, Imm) // AVX512_F-VL VPCLMULQDQ
3369 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Zmm, Zmm, Zmm, Imm) // AVX512_F VPCLMULQDQ
3370 ASMJIT_INST_4i(vpclmulqdq, Vpclmulqdq, Zmm, Zmm, Mem, Imm) // AVX512_F VPCLMULQDQ
3371 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Xmm, Xmm, Imm) // AVX512_BW{k}-VL
3372 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Xmm, Mem, Imm) // AVX512_BW{k}-VL
3373 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Ymm, Ymm, Imm) // AVX512_BW{k}-VL
3374 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Ymm, Mem, Imm) // AVX512_BW{k}-VL
3375 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Zmm, Zmm, Imm) // AVX512_BW{k}
3376 ASMJIT_INST_4i(vpcmpb, Vpcmpb, KReg, Zmm, Mem, Imm) // AVX512_BW{k}
3377 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b32}-VL
3378 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Xmm, Mem, Imm) // AVX512_F{k|b32}-VL
3379 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b32}-VL
3380 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Ymm, Mem, Imm) // AVX512_F{k|b32}-VL
3381 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b32}
3382 ASMJIT_INST_4i(vpcmpd, Vpcmpd, KReg, Zmm, Mem, Imm) // AVX512_F{k|b32}
3383 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Xmm, Xmm, Xmm) // AVX
3384 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Xmm, Xmm, Mem) // AVX
3385 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Ymm, Ymm, Ymm) // AVX2
3386 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, Ymm, Ymm, Mem) // AVX2
3387 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
3388 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Xmm, Mem) // AVX512_BW{k}-VL
3389 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
3390 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Ymm, Mem) // AVX512_BW{k}-VL
3391 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Zmm, Zmm) // AVX512_BW{k}
3392 ASMJIT_INST_3x(vpcmpeqb, Vpcmpeqb, KReg, Zmm, Mem) // AVX512_BW{k}
3393 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Xmm, Xmm, Xmm) // AVX
3394 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Xmm, Xmm, Mem) // AVX
3395 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Ymm, Ymm, Ymm) // AVX2
3396 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, Ymm, Ymm, Mem) // AVX2
3397 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL
3398 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL
3399 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL
3400 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL
3401 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Zmm, Zmm) // AVX512_F{k|b32}
3402 ASMJIT_INST_3x(vpcmpeqd, Vpcmpeqd, KReg, Zmm, Mem) // AVX512_F{k|b32}
3403 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Xmm, Xmm, Xmm) // AVX
3404 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Xmm, Xmm, Mem) // AVX
3405 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Ymm, Ymm, Ymm) // AVX2
3406 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, Ymm, Ymm, Mem) // AVX2
3407 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL
3408 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL
3409 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL
3410 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL
3411 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Zmm, Zmm) // AVX512_F{k|b64}
3412 ASMJIT_INST_3x(vpcmpeqq, Vpcmpeqq, KReg, Zmm, Mem) // AVX512_F{k|b64}
3413 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Xmm, Xmm, Xmm) // AVX
3414 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Xmm, Xmm, Mem) // AVX
3415 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Ymm, Ymm, Ymm) // AVX2
3416 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, Ymm, Ymm, Mem) // AVX2
3417 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
3418 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Xmm, Mem) // AVX512_BW{k}-VL
3419 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
3420 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Ymm, Mem) // AVX512_BW{k}-VL
3421 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Zmm, Zmm) // AVX512_BW{k}
3422 ASMJIT_INST_3x(vpcmpeqw, Vpcmpeqw, KReg, Zmm, Mem) // AVX512_BW{k}
3423 ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm, ECX, EAX, EDX) // AVX [EXPLICIT]
3424 ASMJIT_INST_6x(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm, ECX, EAX, EDX) // AVX [EXPLICIT]
3425 ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm, XMM0, EAX, EDX)// AVX [EXPLICIT]
3426 ASMJIT_INST_6x(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm, XMM0, EAX, EDX)// AVX [EXPLICIT]
3427 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Xmm, Xmm, Xmm) // AVX
3428 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Xmm, Xmm, Mem) // AVX
3429 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Ymm, Ymm, Ymm) // AVX2
3430 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, Ymm, Ymm, Mem) // AVX2
3431 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
3432 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Xmm, Mem) // AVX512_BW{k}-VL
3433 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
3434 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Ymm, Mem) // AVX512_BW{k}-VL
3435 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Zmm, Zmm) // AVX512_BW{k}
3436 ASMJIT_INST_3x(vpcmpgtb, Vpcmpgtb, KReg, Zmm, Mem) // AVX512_BW{k}
3437 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Xmm, Xmm, Xmm) // AVX
3438 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Xmm, Xmm, Mem) // AVX
3439 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Ymm, Ymm, Ymm) // AVX2
3440 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, Ymm, Ymm, Mem) // AVX2
3441 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL
3442 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL
3443 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL
3444 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL
3445 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Zmm, Zmm) // AVX512_F{k|b32}
3446 ASMJIT_INST_3x(vpcmpgtd, Vpcmpgtd, KReg, Zmm, Mem) // AVX512_F{k|b32}
3447 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Xmm, Xmm, Xmm) // AVX
3448 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Xmm, Xmm, Mem) // AVX
3449 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Ymm, Ymm, Ymm) // AVX2
3450 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, Ymm, Ymm, Mem) // AVX2
3451 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL
3452 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL
3453 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL
3454 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL
3455 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Zmm, Zmm) // AVX512_F{k|b64}
3456 ASMJIT_INST_3x(vpcmpgtq, Vpcmpgtq, KReg, Zmm, Mem) // AVX512_F{k|b64}
3457 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Xmm, Xmm, Xmm) // AVX
3458 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Xmm, Xmm, Mem) // AVX
3459 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Ymm, Ymm, Ymm) // AVX2
3460 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, Ymm, Ymm, Mem) // AVX2
3461 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
3462 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Xmm, Mem) // AVX512_BW{k}-VL
3463 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
3464 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Ymm, Mem) // AVX512_BW{k}-VL
3465 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Zmm, Zmm) // AVX512_BW{k}
3466 ASMJIT_INST_3x(vpcmpgtw, Vpcmpgtw, KReg, Zmm, Mem) // AVX512_BW{k}
3467 ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm, ECX) // AVX [EXPLICIT]
3468 ASMJIT_INST_4x(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm, ECX) // AVX [EXPLICIT]
3469 ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm, XMM0) // AVX [EXPLICIT]
3470 ASMJIT_INST_4x(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm, XMM0) // AVX [EXPLICIT]
3471 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b64}-VL
3472 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Xmm, Mem, Imm) // AVX512_F{k|b64}-VL
3473 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b64}-VL
3474 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Ymm, Mem, Imm) // AVX512_F{k|b64}-VL
3475 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b64}
3476 ASMJIT_INST_4i(vpcmpq, Vpcmpq, KReg, Zmm, Mem, Imm) // AVX512_F{k|b64}
3477 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Xmm, Xmm, Imm) // AVX512_BW{k}-VL
3478 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Xmm, Mem, Imm) // AVX512_BW{k}-VL
3479 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Ymm, Ymm, Imm) // AVX512_BW{k}-VL
3480 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Ymm, Mem, Imm) // AVX512_BW{k}-VL
3481 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Zmm, Zmm, Imm) // AVX512_BW{k}
3482 ASMJIT_INST_4i(vpcmpub, Vpcmpub, KReg, Zmm, Mem, Imm) // AVX512_BW{k}
3483 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b32}-VL
3484 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Xmm, Mem, Imm) // AVX512_F{k|b32}-VL
3485 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b32}-VL
3486 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Ymm, Mem, Imm) // AVX512_F{k|b32}-VL
3487 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b32}
3488 ASMJIT_INST_4i(vpcmpud, Vpcmpud, KReg, Zmm, Mem, Imm) // AVX512_F{k|b32}
3489 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Xmm, Xmm, Imm) // AVX512_F{k|b64}-VL
3490 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Xmm, Mem, Imm) // AVX512_F{k|b64}-VL
3491 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Ymm, Ymm, Imm) // AVX512_F{k|b64}-VL
3492 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Ymm, Mem, Imm) // AVX512_F{k|b64}-VL
3493 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Zmm, Zmm, Imm) // AVX512_F{k|b64}
3494 ASMJIT_INST_4i(vpcmpuq, Vpcmpuq, KReg, Zmm, Mem, Imm) // AVX512_F{k|b64}
3495 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Xmm, Xmm, Imm) // AVX512_BW{k|b64}-VL
3496 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Xmm, Mem, Imm) // AVX512_BW{k|b64}-VL
3497 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Ymm, Ymm, Imm) // AVX512_BW{k|b64}-VL
3498 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Ymm, Mem, Imm) // AVX512_BW{k|b64}-VL
3499 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Zmm, Zmm, Imm) // AVX512_BW{k|b64}
3500 ASMJIT_INST_4i(vpcmpuw, Vpcmpuw, KReg, Zmm, Mem, Imm) // AVX512_BW{k|b64}
3501 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Xmm, Xmm, Imm) // AVX512_BW{k|b64}-VL
3502 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Xmm, Mem, Imm) // AVX512_BW{k|b64}-VL
3503 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Ymm, Ymm, Imm) // AVX512_BW{k|b64}-VL
3504 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Ymm, Mem, Imm) // AVX512_BW{k|b64}-VL
3505 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Zmm, Zmm, Imm) // AVX512_BW{k|b64}
3506 ASMJIT_INST_4i(vpcmpw, Vpcmpw, KReg, Zmm, Mem, Imm) // AVX512_BW{k|b64}
3507 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
3508 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Xmm) // AVX512_VBMI2{kz}-VL
3509 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
3510 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Ymm) // AVX512_VBMI2{kz}-VL
3511 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Zmm, Zmm) // AVX512_VBMI2{kz}
3512 ASMJIT_INST_2x(vpcompressb, Vpcompressb, Mem, Zmm) // AVX512_VBMI2{kz}
3513 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Xmm, Xmm) // AVX512_F{kz}-VL
3514 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Xmm) // AVX512_F{kz}-VL
3515 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Ymm, Ymm) // AVX512_F{kz}-VL
3516 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Ymm) // AVX512_F{kz}-VL
3517 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Zmm, Zmm) // AVX512_F{kz}
3518 ASMJIT_INST_2x(vpcompressd, Vpcompressd, Mem, Zmm) // AVX512_F{kz}
3519 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Xmm, Xmm) // AVX512_F{kz}-VL
3520 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Xmm) // AVX512_F{kz}-VL
3521 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Ymm, Ymm) // AVX512_F{kz}-VL
3522 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Ymm) // AVX512_F{kz}-VL
3523 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Zmm, Zmm) // AVX512_F{kz}
3524 ASMJIT_INST_2x(vpcompressq, Vpcompressq, Mem, Zmm) // AVX512_F{kz}
3525 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
3526 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Xmm) // AVX512_VBMI2{kz}-VL
3527 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
3528 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Ymm) // AVX512_VBMI2{kz}-VL
3529 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Zmm, Zmm) // AVX512_VBMI2{kz}
3530 ASMJIT_INST_2x(vpcompressw, Vpcompressw, Mem, Zmm) // AVX512_VBMI2{kz}
3531 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Xmm, Xmm) // AVX512_CD{kz|b32}-VL
3532 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Xmm, Mem) // AVX512_CD{kz|b32}-VL
3533 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Ymm, Ymm) // AVX512_CD{kz|b32}-VL
3534 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Ymm, Mem) // AVX512_CD{kz|b32}-VL
3535 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Zmm, Zmm) // AVX512_CD{kz|b32}
3536 ASMJIT_INST_2x(vpconflictd, Vpconflictd, Zmm, Mem) // AVX512_CD{kz|b32}
3537 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Xmm, Xmm) // AVX512_CD{kz|b32}-VL
3538 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Xmm, Mem) // AVX512_CD{kz|b32}-VL
3539 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Ymm, Ymm) // AVX512_CD{kz|b32}-VL
3540 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Ymm, Mem) // AVX512_CD{kz|b32}-VL
3541 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Zmm, Zmm) // AVX512_CD{kz|b32}
3542 ASMJIT_INST_2x(vpconflictq, Vpconflictq, Zmm, Mem) // AVX512_CD{kz|b32}
3543 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL
3544 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL
3545 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL
3546 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL
3547 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32}
3548 ASMJIT_INST_3x(vpdpbusd, Vpdpbusd, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32}
3549 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL
3550 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL
3551 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL
3552 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL
3553 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32}
3554 ASMJIT_INST_3x(vpdpbusds, Vpdpbusds, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32}
3555 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL
3556 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL
3557 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL
3558 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL
3559 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32}
3560 ASMJIT_INST_3x(vpdpwssd, Vpdpwssd, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32}
3561 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Xmm, Xmm, Xmm) // AVX512_VNNI{kz|b32}-VL
3562 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Xmm, Xmm, Mem) // AVX512_VNNI{kz|b32}-VL
3563 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Ymm, Ymm, Ymm) // AVX512_VNNI{kz|b32}-VL
3564 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Ymm, Ymm, Mem) // AVX512_VNNI{kz|b32}-VL
3565 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Zmm, Zmm, Zmm) // AVX512_VNNI{kz|b32}
3566 ASMJIT_INST_3x(vpdpwssds, Vpdpwssds, Zmm, Zmm, Mem) // AVX512_VNNI{kz|b32}
3567 ASMJIT_INST_4i(vperm2f128, Vperm2f128, Ymm, Ymm, Ymm, Imm) // AVX
3568 ASMJIT_INST_4i(vperm2f128, Vperm2f128, Ymm, Ymm, Mem, Imm) // AVX
3569 ASMJIT_INST_4i(vperm2i128, Vperm2i128, Ymm, Ymm, Ymm, Imm) // AVX2
3570 ASMJIT_INST_4i(vperm2i128, Vperm2i128, Ymm, Ymm, Mem, Imm) // AVX2
3571 ASMJIT_INST_3x(vpermb, Vpermb, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL
3572 ASMJIT_INST_3x(vpermb, Vpermb, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL
3573 ASMJIT_INST_3x(vpermb, Vpermb, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL
3574 ASMJIT_INST_3x(vpermb, Vpermb, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL
3575 ASMJIT_INST_3x(vpermb, Vpermb, Zmm, Zmm, Zmm) // AVX512_VBMI{kz}
3576 ASMJIT_INST_3x(vpermb, Vpermb, Zmm, Zmm, Mem) // AVX512_VBMI{kz}
3577 ASMJIT_INST_3x(vpermd, Vpermd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3578 ASMJIT_INST_3x(vpermd, Vpermd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3579 ASMJIT_INST_3x(vpermd, Vpermd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}-VL
3580 ASMJIT_INST_3x(vpermd, Vpermd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}-VL
3581 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL
3582 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL
3583 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL
3584 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL
3585 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Zmm, Zmm, Zmm) // AVX512_VBMI{kz}
3586 ASMJIT_INST_3x(vpermi2b, Vpermi2b, Zmm, Zmm, Mem) // AVX512_VBMI{kz}
3587 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3588 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3589 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3590 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3591 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3592 ASMJIT_INST_3x(vpermi2d, Vpermi2d, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3593 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3594 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3595 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3596 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3597 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3598 ASMJIT_INST_3x(vpermi2pd, Vpermi2pd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3599 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3600 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3601 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3602 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3603 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3604 ASMJIT_INST_3x(vpermi2ps, Vpermi2ps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3605 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3606 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3607 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3608 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3609 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3610 ASMJIT_INST_3x(vpermi2q, Vpermi2q, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3611 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
3612 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
3613 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
3614 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
3615 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3616 ASMJIT_INST_3x(vpermi2w, Vpermi2w, Zmm, Zmm, Mem) // AVX512_BW{kz}
3617 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
3618 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
3619 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL
3620 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
3621 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
3622 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
3623 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL
3624 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
3625 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3626 ASMJIT_INST_3x(vpermilpd, Vpermilpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3627 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
3628 ASMJIT_INST_3i(vpermilpd, Vpermilpd, Zmm, Mem, Imm) // AVX512_F{kz|b64}
3629 ASMJIT_INST_3x(vpermilps, Vpermilps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
3630 ASMJIT_INST_3x(vpermilps, Vpermilps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
3631 ASMJIT_INST_3i(vpermilps, Vpermilps, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL
3632 ASMJIT_INST_3i(vpermilps, Vpermilps, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
3633 ASMJIT_INST_3x(vpermilps, Vpermilps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
3634 ASMJIT_INST_3x(vpermilps, Vpermilps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
3635 ASMJIT_INST_3i(vpermilps, Vpermilps, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL
3636 ASMJIT_INST_3i(vpermilps, Vpermilps, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
3637 ASMJIT_INST_3x(vpermilps, Vpermilps, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3638 ASMJIT_INST_3x(vpermilps, Vpermilps, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3639 ASMJIT_INST_3i(vpermilps, Vpermilps, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
3640 ASMJIT_INST_3i(vpermilps, Vpermilps, Zmm, Mem, Imm) // AVX512_F{kz|b64}
3641 ASMJIT_INST_3i(vpermpd, Vpermpd, Ymm, Ymm, Imm) // AVX2
3642 ASMJIT_INST_3i(vpermpd, Vpermpd, Ymm, Mem, Imm) // AVX2
3643 ASMJIT_INST_3x(vpermps, Vpermps, Ymm, Ymm, Ymm) // AVX2
3644 ASMJIT_INST_3x(vpermps, Vpermps, Ymm, Ymm, Mem) // AVX2
3645 ASMJIT_INST_3i(vpermq, Vpermq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL
3646 ASMJIT_INST_3i(vpermq, Vpermq, Ymm, Mem, Imm) // AVX2 AVX512_F{kz|b64}-VL
3647 ASMJIT_INST_3x(vpermq, Vpermq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3648 ASMJIT_INST_3x(vpermq, Vpermq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3649 ASMJIT_INST_3x(vpermq, Vpermq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}-VL
3650 ASMJIT_INST_3x(vpermq, Vpermq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}-VL
3651 ASMJIT_INST_3i(vpermq, Vpermq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}-VL
3652 ASMJIT_INST_3i(vpermq, Vpermq, Zmm, Mem, Imm) // AVX512_F{kz|b64}-VL
3653 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Xmm, Xmm, Xmm) // AVX512_VBMI{kz}-VL
3654 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Xmm, Xmm, Mem) // AVX512_VBMI{kz}-VL
3655 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Ymm, Ymm, Ymm) // AVX512_VBMI{kz}-VL
3656 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Ymm, Ymm, Mem) // AVX512_VBMI{kz}-VL
3657 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Zmm, Zmm, Zmm) // AVX512_VBMI{kz}
3658 ASMJIT_INST_3x(vpermt2b, Vpermt2b, Zmm, Zmm, Mem) // AVX512_VBMI{kz}
3659 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3660 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3661 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3662 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3663 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3664 ASMJIT_INST_3x(vpermt2d, Vpermt2d, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3665 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3666 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3667 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3668 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3669 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3670 ASMJIT_INST_3x(vpermt2pd, Vpermt2pd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3671 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
3672 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
3673 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
3674 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
3675 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3676 ASMJIT_INST_3x(vpermt2ps, Vpermt2ps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3677 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3678 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3679 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3680 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3681 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3682 ASMJIT_INST_3x(vpermt2q, Vpermt2q, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3683 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
3684 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
3685 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
3686 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
3687 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3688 ASMJIT_INST_3x(vpermt2w, Vpermt2w, Zmm, Zmm, Mem) // AVX512_BW{kz}
3689 ASMJIT_INST_3x(vpermw, Vpermw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
3690 ASMJIT_INST_3x(vpermw, Vpermw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
3691 ASMJIT_INST_3x(vpermw, Vpermw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
3692 ASMJIT_INST_3x(vpermw, Vpermw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
3693 ASMJIT_INST_3x(vpermw, Vpermw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3694 ASMJIT_INST_3x(vpermw, Vpermw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3695 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
3696 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Xmm, Mem) // AVX512_VBMI2{kz}-VL
3697 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
3698 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Ymm, Mem) // AVX512_VBMI2{kz}-VL
3699 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Zmm, Zmm) // AVX512_VBMI2{kz}
3700 ASMJIT_INST_2x(vpexpandb, Vpexpandb, Zmm, Mem) // AVX512_VBMI2{kz}
3701 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Xmm, Xmm) // AVX512_F{kz}-VL
3702 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Xmm, Mem) // AVX512_F{kz}-VL
3703 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Ymm, Ymm) // AVX512_F{kz}-VL
3704 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Ymm, Mem) // AVX512_F{kz}-VL
3705 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Zmm, Zmm) // AVX512_F{kz}
3706 ASMJIT_INST_2x(vpexpandd, Vpexpandd, Zmm, Mem) // AVX512_F{kz}
3707 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Xmm, Xmm) // AVX512_F{kz}-VL
3708 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Xmm, Mem) // AVX512_F{kz}-VL
3709 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Ymm, Ymm) // AVX512_F{kz}-VL
3710 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Ymm, Mem) // AVX512_F{kz}-VL
3711 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Zmm, Zmm) // AVX512_F{kz}
3712 ASMJIT_INST_2x(vpexpandq, Vpexpandq, Zmm, Mem) // AVX512_F{kz}
3713 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
3714 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Xmm, Mem) // AVX512_VBMI2{kz}-VL
3715 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
3716 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Ymm, Mem) // AVX512_VBMI2{kz}-VL
3717 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Zmm, Zmm) // AVX512_VBMI2{kz}
3718 ASMJIT_INST_2x(vpexpandw, Vpexpandw, Zmm, Mem) // AVX512_VBMI2{kz}
3719 ASMJIT_INST_3i(vpextrb, Vpextrb, Gp, Xmm, Imm) // AVX AVX512_BW
3720 ASMJIT_INST_3i(vpextrb, Vpextrb, Mem, Xmm, Imm) // AVX AVX512_BW
3721 ASMJIT_INST_3i(vpextrd, Vpextrd, Gp, Xmm, Imm) // AVX AVX512_DQ
3722 ASMJIT_INST_3i(vpextrd, Vpextrd, Mem, Xmm, Imm) // AVX AVX512_DQ
3723 ASMJIT_INST_3i(vpextrq, Vpextrq, Gp, Xmm, Imm) // AVX AVX512_DQ
3724 ASMJIT_INST_3i(vpextrq, Vpextrq, Mem, Xmm, Imm) // AVX AVX512_DQ
3725 ASMJIT_INST_3i(vpextrw, Vpextrw, Gp, Xmm, Imm) // AVX AVX512_BW
3726 ASMJIT_INST_3i(vpextrw, Vpextrw, Mem, Xmm, Imm) // AVX AVX512_BW
3727 ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Xmm, Mem, Xmm) // AVX2
3728 ASMJIT_INST_3x(vpgatherdd, Vpgatherdd, Ymm, Mem, Ymm) // AVX2
3729 ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Xmm, Mem) // AVX512_F{k}-VL
3730 ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Ymm, Mem) // AVX512_F{k}-VL
3731 ASMJIT_INST_2x(vpgatherdd, Vpgatherdd, Zmm, Mem) // AVX512_F{k}
3732 ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Xmm, Mem, Xmm) // AVX2
3733 ASMJIT_INST_3x(vpgatherdq, Vpgatherdq, Ymm, Mem, Ymm) // AVX2
3734 ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Xmm, Mem) // AVX512_F{k}-VL
3735 ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Ymm, Mem) // AVX512_F{k}-VL
3736 ASMJIT_INST_2x(vpgatherdq, Vpgatherdq, Zmm, Mem) // AVX512_F{k}
3737 ASMJIT_INST_3x(vpgatherqd, Vpgatherqd, Xmm, Mem, Xmm) // AVX2
3738 ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Xmm, Mem) // AVX512_F{k}-VL
3739 ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Ymm, Mem) // AVX512_F{k}-VL
3740 ASMJIT_INST_2x(vpgatherqd, Vpgatherqd, Zmm, Mem) // AVX512_F{k}
3741 ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Xmm, Mem, Xmm) // AVX2
3742 ASMJIT_INST_3x(vpgatherqq, Vpgatherqq, Ymm, Mem, Ymm) // AVX2
3743 ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Xmm, Mem) // AVX512_F{k}-VL
3744 ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Ymm, Mem) // AVX512_F{k}-VL
3745 ASMJIT_INST_2x(vpgatherqq, Vpgatherqq, Zmm, Mem) // AVX512_F{k}
3746 ASMJIT_INST_3x(vphaddd, Vphaddd, Xmm, Xmm, Xmm) // AVX
3747 ASMJIT_INST_3x(vphaddd, Vphaddd, Xmm, Xmm, Mem) // AVX
3748 ASMJIT_INST_3x(vphaddd, Vphaddd, Ymm, Ymm, Ymm) // AVX2
3749 ASMJIT_INST_3x(vphaddd, Vphaddd, Ymm, Ymm, Mem) // AVX2
3750 ASMJIT_INST_3x(vphaddsw, Vphaddsw, Xmm, Xmm, Xmm) // AVX
3751 ASMJIT_INST_3x(vphaddsw, Vphaddsw, Xmm, Xmm, Mem) // AVX
3752 ASMJIT_INST_3x(vphaddsw, Vphaddsw, Ymm, Ymm, Ymm) // AVX2
3753 ASMJIT_INST_3x(vphaddsw, Vphaddsw, Ymm, Ymm, Mem) // AVX2
3754 ASMJIT_INST_3x(vphaddw, Vphaddw, Xmm, Xmm, Xmm) // AVX
3755 ASMJIT_INST_3x(vphaddw, Vphaddw, Xmm, Xmm, Mem) // AVX
3756 ASMJIT_INST_3x(vphaddw, Vphaddw, Ymm, Ymm, Ymm) // AVX2
3757 ASMJIT_INST_3x(vphaddw, Vphaddw, Ymm, Ymm, Mem) // AVX2
3758 ASMJIT_INST_2x(vphminposuw, Vphminposuw, Xmm, Xmm) // AVX
3759 ASMJIT_INST_2x(vphminposuw, Vphminposuw, Xmm, Mem) // AVX
3760 ASMJIT_INST_3x(vphsubd, Vphsubd, Xmm, Xmm, Xmm) // AVX
3761 ASMJIT_INST_3x(vphsubd, Vphsubd, Xmm, Xmm, Mem) // AVX
3762 ASMJIT_INST_3x(vphsubd, Vphsubd, Ymm, Ymm, Ymm) // AVX2
3763 ASMJIT_INST_3x(vphsubd, Vphsubd, Ymm, Ymm, Mem) // AVX2
3764 ASMJIT_INST_3x(vphsubsw, Vphsubsw, Xmm, Xmm, Xmm) // AVX
3765 ASMJIT_INST_3x(vphsubsw, Vphsubsw, Xmm, Xmm, Mem) // AVX
3766 ASMJIT_INST_3x(vphsubsw, Vphsubsw, Ymm, Ymm, Ymm) // AVX2
3767 ASMJIT_INST_3x(vphsubsw, Vphsubsw, Ymm, Ymm, Mem) // AVX2
3768 ASMJIT_INST_3x(vphsubw, Vphsubw, Xmm, Xmm, Xmm) // AVX
3769 ASMJIT_INST_3x(vphsubw, Vphsubw, Xmm, Xmm, Mem) // AVX
3770 ASMJIT_INST_3x(vphsubw, Vphsubw, Ymm, Ymm, Ymm) // AVX2
3771 ASMJIT_INST_3x(vphsubw, Vphsubw, Ymm, Ymm, Mem) // AVX2
3772 ASMJIT_INST_4i(vpinsrb, Vpinsrb, Xmm, Xmm, Gp, Imm) // AVX AVX512_BW{kz}
3773 ASMJIT_INST_4i(vpinsrb, Vpinsrb, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz}
3774 ASMJIT_INST_4i(vpinsrd, Vpinsrd, Xmm, Xmm, Gp, Imm) // AVX AVX512_DQ{kz}
3775 ASMJIT_INST_4i(vpinsrd, Vpinsrd, Xmm, Xmm, Mem, Imm) // AVX AVX512_DQ{kz}
3776 ASMJIT_INST_4i(vpinsrq, Vpinsrq, Xmm, Xmm, Gp, Imm) // AVX AVX512_DQ{kz}
3777 ASMJIT_INST_4i(vpinsrq, Vpinsrq, Xmm, Xmm, Mem, Imm) // AVX AVX512_DQ{kz}
3778 ASMJIT_INST_4i(vpinsrw, Vpinsrw, Xmm, Xmm, Gp, Imm) // AVX AVX512_BW{kz}
3779 ASMJIT_INST_4i(vpinsrw, Vpinsrw, Xmm, Xmm, Mem, Imm) // AVX AVX512_BW{kz}
3780 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Xmm, Xmm) // AVX512_CD{kz|b32}-VL
3781 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Xmm, Mem) // AVX512_CD{kz|b32}-VL
3782 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Ymm, Ymm) // AVX512_CD{kz|b32}-VL
3783 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Ymm, Mem) // AVX512_CD{kz|b32}-VL
3784 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Zmm, Zmm) // AVX512_CD{kz|b32}
3785 ASMJIT_INST_2x(vplzcntd, Vplzcntd, Zmm, Mem) // AVX512_CD{kz|b32}
3786 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Xmm, Xmm) // AVX512_CD{kz|b64}-VL
3787 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Xmm, Mem) // AVX512_CD{kz|b64}-VL
3788 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Ymm, Ymm) // AVX512_CD{kz|b64}-VL
3789 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Ymm, Mem) // AVX512_CD{kz|b64}-VL
3790 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Zmm, Zmm) // AVX512_CD{kz|b64}
3791 ASMJIT_INST_2x(vplzcntq, Vplzcntq, Zmm, Mem) // AVX512_CD{kz|b64}
3792 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Xmm, Xmm, Xmm) // AVX512_IFMA{kz|b64}-VL
3793 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Xmm, Xmm, Mem) // AVX512_IFMA{kz|b64}-VL
3794 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Ymm, Ymm, Ymm) // AVX512_IFMA{kz|b64}-VL
3795 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Ymm, Ymm, Mem) // AVX512_IFMA{kz|b64}-VL
3796 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Zmm, Zmm, Zmm) // AVX512_IFMA{kz|b64}
3797 ASMJIT_INST_3x(vpmadd52huq, Vpmadd52huq, Zmm, Zmm, Mem) // AVX512_IFMA{kz|b64}
3798 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Xmm, Xmm, Xmm) // AVX512_IFMA{kz|b64}-VL
3799 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Xmm, Xmm, Mem) // AVX512_IFMA{kz|b64}-VL
3800 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Ymm, Ymm, Ymm) // AVX512_IFMA{kz|b64}-VL
3801 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Ymm, Ymm, Mem) // AVX512_IFMA{kz|b64}-VL
3802 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Zmm, Zmm, Zmm) // AVX512_IFMA{kz|b64}
3803 ASMJIT_INST_3x(vpmadd52luq, Vpmadd52luq, Zmm, Zmm, Mem) // AVX512_IFMA{kz|b64}
3804 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3805 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3806 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3807 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3808 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3809 ASMJIT_INST_3x(vpmaddubsw, Vpmaddubsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3810 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3811 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3812 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3813 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3814 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3815 ASMJIT_INST_3x(vpmaddwd, Vpmaddwd, Zmm, Zmm, Mem) // AVX512_BW{kz}
3816 ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Xmm, Xmm) // AVX2
3817 ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Mem, Ymm, Ymm) // AVX2
3818 ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Xmm, Xmm, Mem) // AVX2
3819 ASMJIT_INST_3x(vpmaskmovd, Vpmaskmovd, Ymm, Ymm, Mem) // AVX2
3820 ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Xmm, Xmm) // AVX2
3821 ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Mem, Ymm, Ymm) // AVX2
3822 ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Xmm, Xmm, Mem) // AVX2
3823 ASMJIT_INST_3x(vpmaskmovq, Vpmaskmovq, Ymm, Ymm, Mem) // AVX2
3824 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3825 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3826 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3827 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3828 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3829 ASMJIT_INST_3x(vpmaxsb, Vpmaxsb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3830 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3831 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3832 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3833 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3834 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3835 ASMJIT_INST_3x(vpmaxsd, Vpmaxsd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3836 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3837 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3838 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3839 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3840 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3841 ASMJIT_INST_3x(vpmaxsq, Vpmaxsq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3842 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3843 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3844 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3845 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3846 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3847 ASMJIT_INST_3x(vpmaxsw, Vpmaxsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3848 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3849 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3850 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3851 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3852 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3853 ASMJIT_INST_3x(vpmaxub, Vpmaxub, Zmm, Zmm, Mem) // AVX512_BW{kz}
3854 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3855 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3856 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3857 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3858 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3859 ASMJIT_INST_3x(vpmaxud, Vpmaxud, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3860 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3861 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3862 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3863 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3864 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3865 ASMJIT_INST_3x(vpmaxuq, Vpmaxuq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3866 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3867 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3868 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3869 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3870 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3871 ASMJIT_INST_3x(vpmaxuw, Vpmaxuw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3872 ASMJIT_INST_3x(vpminsb, Vpminsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3873 ASMJIT_INST_3x(vpminsb, Vpminsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3874 ASMJIT_INST_3x(vpminsb, Vpminsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3875 ASMJIT_INST_3x(vpminsb, Vpminsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3876 ASMJIT_INST_3x(vpminsb, Vpminsb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3877 ASMJIT_INST_3x(vpminsb, Vpminsb, Zmm, Zmm, Mem) // AVX512_BW{kz}
3878 ASMJIT_INST_3x(vpminsd, Vpminsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3879 ASMJIT_INST_3x(vpminsd, Vpminsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3880 ASMJIT_INST_3x(vpminsd, Vpminsd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3881 ASMJIT_INST_3x(vpminsd, Vpminsd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3882 ASMJIT_INST_3x(vpminsd, Vpminsd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3883 ASMJIT_INST_3x(vpminsd, Vpminsd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3884 ASMJIT_INST_3x(vpminsq, Vpminsq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3885 ASMJIT_INST_3x(vpminsq, Vpminsq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3886 ASMJIT_INST_3x(vpminsq, Vpminsq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3887 ASMJIT_INST_3x(vpminsq, Vpminsq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3888 ASMJIT_INST_3x(vpminsq, Vpminsq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3889 ASMJIT_INST_3x(vpminsq, Vpminsq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3890 ASMJIT_INST_3x(vpminsw, Vpminsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3891 ASMJIT_INST_3x(vpminsw, Vpminsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3892 ASMJIT_INST_3x(vpminsw, Vpminsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3893 ASMJIT_INST_3x(vpminsw, Vpminsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3894 ASMJIT_INST_3x(vpminsw, Vpminsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3895 ASMJIT_INST_3x(vpminsw, Vpminsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3896 ASMJIT_INST_3x(vpminub, Vpminub, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3897 ASMJIT_INST_3x(vpminub, Vpminub, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3898 ASMJIT_INST_3x(vpminub, Vpminub, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3899 ASMJIT_INST_3x(vpminub, Vpminub, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3900 ASMJIT_INST_3x(vpminub, Vpminub, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3901 ASMJIT_INST_3x(vpminub, Vpminub, Zmm, Zmm, Mem) // AVX512_BW{kz}
3902 ASMJIT_INST_3x(vpminud, Vpminud, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
3903 ASMJIT_INST_3x(vpminud, Vpminud, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
3904 ASMJIT_INST_3x(vpminud, Vpminud, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
3905 ASMJIT_INST_3x(vpminud, Vpminud, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
3906 ASMJIT_INST_3x(vpminud, Vpminud, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
3907 ASMJIT_INST_3x(vpminud, Vpminud, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
3908 ASMJIT_INST_3x(vpminuq, Vpminuq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
3909 ASMJIT_INST_3x(vpminuq, Vpminuq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
3910 ASMJIT_INST_3x(vpminuq, Vpminuq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
3911 ASMJIT_INST_3x(vpminuq, Vpminuq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
3912 ASMJIT_INST_3x(vpminuq, Vpminuq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
3913 ASMJIT_INST_3x(vpminuq, Vpminuq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
3914 ASMJIT_INST_3x(vpminuw, Vpminuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
3915 ASMJIT_INST_3x(vpminuw, Vpminuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
3916 ASMJIT_INST_3x(vpminuw, Vpminuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
3917 ASMJIT_INST_3x(vpminuw, Vpminuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
3918 ASMJIT_INST_3x(vpminuw, Vpminuw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
3919 ASMJIT_INST_3x(vpminuw, Vpminuw, Zmm, Zmm, Mem) // AVX512_BW{kz}
3920 ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Xmm) // AVX512_BW-VL
3921 ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Ymm) // AVX512_BW-VL
3922 ASMJIT_INST_2x(vpmovb2m, Vpmovb2m, KReg, Zmm) // AVX512_BW
3923 ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Xmm) // AVX512_DQ-VL
3924 ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Ymm) // AVX512_DQ-VL
3925 ASMJIT_INST_2x(vpmovd2m, Vpmovd2m, KReg, Zmm) // AVX512_DQ
3926 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Xmm) // AVX512_F{kz}-VL
3927 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Xmm) // AVX512_F{kz}-VL
3928 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Ymm) // AVX512_F{kz}-VL
3929 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Ymm) // AVX512_F{kz}-VL
3930 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Xmm, Zmm) // AVX512_F{kz}
3931 ASMJIT_INST_2x(vpmovdb, Vpmovdb, Mem, Zmm) // AVX512_F{kz}
3932 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Xmm, Xmm) // AVX512_F{kz}-VL
3933 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Xmm) // AVX512_F{kz}-VL
3934 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Xmm, Ymm) // AVX512_F{kz}-VL
3935 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Ymm) // AVX512_F{kz}-VL
3936 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Ymm, Zmm) // AVX512_F{kz}
3937 ASMJIT_INST_2x(vpmovdw, Vpmovdw, Mem, Zmm) // AVX512_F{kz}
3938 ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Xmm, KReg) // AVX512_BW-VL
3939 ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Ymm, KReg) // AVX512_BW-VL
3940 ASMJIT_INST_2x(vpmovm2b, Vpmovm2b, Zmm, KReg) // AVX512_BW
3941 ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Xmm, KReg) // AVX512_DQ-VL
3942 ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Ymm, KReg) // AVX512_DQ-VL
3943 ASMJIT_INST_2x(vpmovm2d, Vpmovm2d, Zmm, KReg) // AVX512_DQ
3944 ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Xmm, KReg) // AVX512_DQ-VL
3945 ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Ymm, KReg) // AVX512_DQ-VL
3946 ASMJIT_INST_2x(vpmovm2q, Vpmovm2q, Zmm, KReg) // AVX512_DQ
3947 ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Xmm, KReg) // AVX512_BW-VL
3948 ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Ymm, KReg) // AVX512_BW-VL
3949 ASMJIT_INST_2x(vpmovm2w, Vpmovm2w, Zmm, KReg) // AVX512_BW
3950 ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Xmm) // AVX
3951 ASMJIT_INST_2x(vpmovmskb, Vpmovmskb, Gp, Ymm) // AVX2
3952 ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Xmm) // AVX512_DQ-VL
3953 ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Ymm) // AVX512_DQ-VL
3954 ASMJIT_INST_2x(vpmovq2m, Vpmovq2m, KReg, Zmm) // AVX512_DQ
3955 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Xmm) // AVX512_F{kz}-VL
3956 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Xmm) // AVX512_F{kz}-VL
3957 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Ymm) // AVX512_F{kz}-VL
3958 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Ymm) // AVX512_F{kz}-VL
3959 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Xmm, Zmm) // AVX512_F{kz}
3960 ASMJIT_INST_2x(vpmovqb, Vpmovqb, Mem, Zmm) // AVX512_F{kz}
3961 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Xmm, Xmm) // AVX512_F{kz}-VL
3962 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Xmm) // AVX512_F{kz}-VL
3963 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Xmm, Ymm) // AVX512_F{kz}-VL
3964 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Ymm) // AVX512_F{kz}-VL
3965 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Ymm, Zmm) // AVX512_F{kz}
3966 ASMJIT_INST_2x(vpmovqd, Vpmovqd, Mem, Zmm) // AVX512_F{kz}
3967 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Xmm) // AVX512_F{kz}-VL
3968 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Xmm) // AVX512_F{kz}-VL
3969 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Ymm) // AVX512_F{kz}-VL
3970 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Ymm) // AVX512_F{kz}-VL
3971 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Xmm, Zmm) // AVX512_F{kz}
3972 ASMJIT_INST_2x(vpmovqw, Vpmovqw, Mem, Zmm) // AVX512_F{kz}
3973 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Xmm) // AVX512_F{kz}-VL
3974 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Xmm) // AVX512_F{kz}-VL
3975 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Ymm) // AVX512_F{kz}-VL
3976 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Ymm) // AVX512_F{kz}-VL
3977 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Xmm, Zmm) // AVX512_F{kz}
3978 ASMJIT_INST_2x(vpmovsdb, Vpmovsdb, Mem, Zmm) // AVX512_F{kz}
3979 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Xmm, Xmm) // AVX512_F{kz}-VL
3980 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Xmm) // AVX512_F{kz}-VL
3981 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Xmm, Ymm) // AVX512_F{kz}-VL
3982 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Ymm) // AVX512_F{kz}-VL
3983 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Ymm, Zmm) // AVX512_F{kz}
3984 ASMJIT_INST_2x(vpmovsdw, Vpmovsdw, Mem, Zmm) // AVX512_F{kz}
3985 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Xmm) // AVX512_F{kz}-VL
3986 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Xmm) // AVX512_F{kz}-VL
3987 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Ymm) // AVX512_F{kz}-VL
3988 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Ymm) // AVX512_F{kz}-VL
3989 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Xmm, Zmm) // AVX512_F{kz}
3990 ASMJIT_INST_2x(vpmovsqb, Vpmovsqb, Mem, Zmm) // AVX512_F{kz}
3991 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Xmm, Xmm) // AVX512_F{kz}-VL
3992 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Xmm) // AVX512_F{kz}-VL
3993 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Xmm, Ymm) // AVX512_F{kz}-VL
3994 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Ymm) // AVX512_F{kz}-VL
3995 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Ymm, Zmm) // AVX512_F{kz}
3996 ASMJIT_INST_2x(vpmovsqd, Vpmovsqd, Mem, Zmm) // AVX512_F{kz}
3997 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Xmm) // AVX512_F{kz}-VL
3998 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Xmm) // AVX512_F{kz}-VL
3999 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Ymm) // AVX512_F{kz}-VL
4000 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Ymm) // AVX512_F{kz}-VL
4001 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Xmm, Zmm) // AVX512_F{kz}
4002 ASMJIT_INST_2x(vpmovsqw, Vpmovsqw, Mem, Zmm) // AVX512_F{kz}
4003 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Xmm, Xmm) // AVX512_BW{kz}-VL
4004 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Xmm) // AVX512_BW{kz}-VL
4005 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Xmm, Ymm) // AVX512_BW{kz}-VL
4006 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Ymm) // AVX512_BW{kz}-VL
4007 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Ymm, Zmm) // AVX512_BW{kz}
4008 ASMJIT_INST_2x(vpmovswb, Vpmovswb, Mem, Zmm) // AVX512_BW{kz}
4009 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4010 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Xmm, Mem) // AVX AVX512_F{kz}-VL
4011 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4012 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4013 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Zmm, Xmm) // AVX512_F{kz}
4014 ASMJIT_INST_2x(vpmovsxbd, Vpmovsxbd, Zmm, Mem) // AVX512_F{kz}
4015 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4016 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4017 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4018 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4019 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Zmm, Xmm) // AVX512_F{kz}
4020 ASMJIT_INST_2x(vpmovsxbq, Vpmovsxbq, Zmm, Mem) // AVX512_F{kz}
4021 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4022 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4023 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
4024 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4025 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Zmm, Ymm) // AVX512_BW{kz}
4026 ASMJIT_INST_2x(vpmovsxbw, Vpmovsxbw, Zmm, Mem) // AVX512_BW{kz}
4027 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4028 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4029 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4030 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4031 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Zmm, Ymm) // AVX512_F{kz}
4032 ASMJIT_INST_2x(vpmovsxdq, Vpmovsxdq, Zmm, Mem) // AVX512_F{kz}
4033 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4034 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Xmm, Mem) // AVX AVX512_F{kz}-VL
4035 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4036 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4037 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Zmm, Ymm) // AVX512_F{kz}
4038 ASMJIT_INST_2x(vpmovsxwd, Vpmovsxwd, Zmm, Mem) // AVX512_F{kz}
4039 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4040 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4041 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4042 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4043 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Zmm, Xmm) // AVX512_F{kz}
4044 ASMJIT_INST_2x(vpmovsxwq, Vpmovsxwq, Zmm, Mem) // AVX512_F{kz}
4045 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Xmm) // AVX512_F{kz}-VL
4046 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Xmm) // AVX512_F{kz}-VL
4047 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Ymm) // AVX512_F{kz}-VL
4048 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Ymm) // AVX512_F{kz}-VL
4049 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Xmm, Zmm) // AVX512_F{kz}
4050 ASMJIT_INST_2x(vpmovusdb, Vpmovusdb, Mem, Zmm) // AVX512_F{kz}
4051 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Xmm, Xmm) // AVX512_F{kz}-VL
4052 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Xmm) // AVX512_F{kz}-VL
4053 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Xmm, Ymm) // AVX512_F{kz}-VL
4054 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Ymm) // AVX512_F{kz}-VL
4055 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Ymm, Zmm) // AVX512_F{kz}
4056 ASMJIT_INST_2x(vpmovusdw, Vpmovusdw, Mem, Zmm) // AVX512_F{kz}
4057 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Xmm) // AVX512_F{kz}-VL
4058 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Xmm) // AVX512_F{kz}-VL
4059 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Ymm) // AVX512_F{kz}-VL
4060 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Ymm) // AVX512_F{kz}-VL
4061 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Xmm, Zmm) // AVX512_F{kz}
4062 ASMJIT_INST_2x(vpmovusqb, Vpmovusqb, Mem, Zmm) // AVX512_F{kz}
4063 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Xmm, Xmm) // AVX512_F{kz}-VL
4064 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Xmm) // AVX512_F{kz}-VL
4065 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Xmm, Ymm) // AVX512_F{kz}-VL
4066 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Ymm) // AVX512_F{kz}-VL
4067 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Ymm, Zmm) // AVX512_F{kz}
4068 ASMJIT_INST_2x(vpmovusqd, Vpmovusqd, Mem, Zmm) // AVX512_F{kz}
4069 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Xmm) // AVX512_F{kz}-VL
4070 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Xmm) // AVX512_F{kz}-VL
4071 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Ymm) // AVX512_F{kz}-VL
4072 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Ymm) // AVX512_F{kz}-VL
4073 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Xmm, Zmm) // AVX512_F{kz}
4074 ASMJIT_INST_2x(vpmovusqw, Vpmovusqw, Mem, Zmm) // AVX512_F{kz}
4075 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Xmm, Xmm) // AVX512_BW{kz}-VL
4076 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Xmm) // AVX512_BW{kz}-VL
4077 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Xmm, Ymm) // AVX512_BW{kz}-VL
4078 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Ymm) // AVX512_BW{kz}-VL
4079 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Ymm, Zmm) // AVX512_BW{kz}
4080 ASMJIT_INST_2x(vpmovuswb, Vpmovuswb, Mem, Zmm) // AVX512_BW{kz}
4081 ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Xmm) // AVX512_BW-VL
4082 ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Ymm) // AVX512_BW-VL
4083 ASMJIT_INST_2x(vpmovw2m, Vpmovw2m, KReg, Zmm) // AVX512_BW
4084 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Xmm, Xmm) // AVX512_BW{kz}-VL
4085 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Xmm) // AVX512_BW{kz}-VL
4086 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Xmm, Ymm) // AVX512_BW{kz}-VL
4087 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Ymm) // AVX512_BW{kz}-VL
4088 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Ymm, Zmm) // AVX512_BW{kz}
4089 ASMJIT_INST_2x(vpmovwb, Vpmovwb, Mem, Zmm) // AVX512_BW{kz}
4090 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4091 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Xmm, Mem) // AVX AVX512_F{kz}-VL
4092 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4093 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4094 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Zmm, Xmm) // AVX512_F{kz}
4095 ASMJIT_INST_2x(vpmovzxbd, Vpmovzxbd, Zmm, Mem) // AVX512_F{kz}
4096 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4097 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4098 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4099 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4100 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Zmm, Xmm) // AVX512_F{kz}
4101 ASMJIT_INST_2x(vpmovzxbq, Vpmovzxbq, Zmm, Mem) // AVX512_F{kz}
4102 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4103 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4104 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
4105 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4106 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Zmm, Ymm) // AVX512_BW{kz}
4107 ASMJIT_INST_2x(vpmovzxbw, Vpmovzxbw, Zmm, Mem) // AVX512_BW{kz}
4108 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4109 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4110 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4111 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4112 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Zmm, Ymm) // AVX512_F{kz}
4113 ASMJIT_INST_2x(vpmovzxdq, Vpmovzxdq, Zmm, Mem) // AVX512_F{kz}
4114 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4115 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Xmm, Mem) // AVX AVX512_F{kz}-VL
4116 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4117 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4118 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Zmm, Ymm) // AVX512_F{kz}
4119 ASMJIT_INST_2x(vpmovzxwd, Vpmovzxwd, Zmm, Mem) // AVX512_F{kz}
4120 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4121 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Xmm, Mem) // AVX AVX512_F{kz}-VL
4122 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4123 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4124 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Zmm, Xmm) // AVX512_F{kz}
4125 ASMJIT_INST_2x(vpmovzxwq, Vpmovzxwq, Zmm, Mem) // AVX512_F{kz}
4126 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4127 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4128 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4129 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4130 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4131 ASMJIT_INST_3x(vpmuldq, Vpmuldq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4132 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4133 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4134 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4135 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4136 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4137 ASMJIT_INST_3x(vpmulhrsw, Vpmulhrsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4138 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4139 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4140 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4141 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4142 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4143 ASMJIT_INST_3x(vpmulhuw, Vpmulhuw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4144 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4145 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4146 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4147 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4148 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4149 ASMJIT_INST_3x(vpmulhw, Vpmulhw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4150 ASMJIT_INST_3x(vpmulld, Vpmulld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4151 ASMJIT_INST_3x(vpmulld, Vpmulld, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4152 ASMJIT_INST_3x(vpmulld, Vpmulld, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4153 ASMJIT_INST_3x(vpmulld, Vpmulld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4154 ASMJIT_INST_3x(vpmulld, Vpmulld, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4155 ASMJIT_INST_3x(vpmulld, Vpmulld, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4156 ASMJIT_INST_3x(vpmullq, Vpmullq, Xmm, Xmm, Xmm) // AVX512_DQ{kz|b64}-VL
4157 ASMJIT_INST_3x(vpmullq, Vpmullq, Xmm, Xmm, Mem) // AVX512_DQ{kz|b64}-VL
4158 ASMJIT_INST_3x(vpmullq, Vpmullq, Ymm, Ymm, Ymm) // AVX512_DQ{kz|b64}-VL
4159 ASMJIT_INST_3x(vpmullq, Vpmullq, Ymm, Ymm, Mem) // AVX512_DQ{kz|b64}-VL
4160 ASMJIT_INST_3x(vpmullq, Vpmullq, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64}
4161 ASMJIT_INST_3x(vpmullq, Vpmullq, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64}
4162 ASMJIT_INST_3x(vpmullw, Vpmullw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4163 ASMJIT_INST_3x(vpmullw, Vpmullw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4164 ASMJIT_INST_3x(vpmullw, Vpmullw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4165 ASMJIT_INST_3x(vpmullw, Vpmullw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4166 ASMJIT_INST_3x(vpmullw, Vpmullw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4167 ASMJIT_INST_3x(vpmullw, Vpmullw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4168 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Xmm, Xmm, Xmm) // AVX512_VBMI{kz|b64}-VL
4169 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Xmm, Xmm, Mem) // AVX512_VBMI{kz|b64}-VL
4170 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Ymm, Ymm, Ymm) // AVX512_VBMI{kz|b64}-VL
4171 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Ymm, Ymm, Mem) // AVX512_VBMI{kz|b64}-VL
4172 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Zmm, Zmm, Zmm) // AVX512_VBMI{kz|b64}
4173 ASMJIT_INST_3x(vpmultishiftqb, Vpmultishiftqb, Zmm, Zmm, Mem) // AVX512_VBMI{kz|b64}
4174 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4175 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4176 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4177 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4178 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4179 ASMJIT_INST_3x(vpmuludq, Vpmuludq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4180 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Xmm, Xmm) // AVX512_BITALG{kz|b32}-VL
4181 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Xmm, Mem) // AVX512_BITALG{kz|b32}-VL
4182 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Ymm, Ymm) // AVX512_BITALG{kz|b32}-VL
4183 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Ymm, Mem) // AVX512_BITALG{kz|b32}-VL
4184 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Zmm, Zmm) // AVX512_BITALG{kz|b32}
4185 ASMJIT_INST_2x(vpopcntb, Vpopcntb, Zmm, Mem) // AVX512_BITALG{kz|b32}
4186 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Xmm, Xmm) // AVX512_VPOPCNTDQ{kz|b32}-VL
4187 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Xmm, Mem) // AVX512_VPOPCNTDQ{kz|b32}-VL
4188 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Ymm, Ymm) // AVX512_VPOPCNTDQ{kz|b32}-VL
4189 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Ymm, Mem) // AVX512_VPOPCNTDQ{kz|b32}-VL
4190 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Zmm, Zmm) // AVX512_VPOPCNTDQ{kz|b32}
4191 ASMJIT_INST_2x(vpopcntd, Vpopcntd, Zmm, Mem) // AVX512_VPOPCNTDQ{kz|b32}
4192 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Xmm, Xmm) // AVX512_VPOPCNTDQ{kz|b64}-VL
4193 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Xmm, Mem) // AVX512_VPOPCNTDQ{kz|b64}-VL
4194 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Ymm, Ymm) // AVX512_VPOPCNTDQ{kz|b64}-VL
4195 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Ymm, Mem) // AVX512_VPOPCNTDQ{kz|b64}-VL
4196 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Zmm, Zmm) // AVX512_VPOPCNTDQ{kz|b64}
4197 ASMJIT_INST_2x(vpopcntq, Vpopcntq, Zmm, Mem) // AVX512_VPOPCNTDQ{kz|b64}
4198 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Xmm, Xmm) // AVX512_BITALG{kz|b32}-VL
4199 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Xmm, Mem) // AVX512_BITALG{kz|b32}-VL
4200 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Ymm, Ymm) // AVX512_BITALG{kz|b32}-VL
4201 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Ymm, Mem) // AVX512_BITALG{kz|b32}-VL
4202 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Zmm, Zmm) // AVX512_BITALG{kz|b32}
4203 ASMJIT_INST_2x(vpopcntw, Vpopcntw, Zmm, Mem) // AVX512_BITALG{kz|b32}
4204 ASMJIT_INST_3x(vpor, Vpor, Xmm, Xmm, Xmm) // AVX
4205 ASMJIT_INST_3x(vpor, Vpor, Xmm, Xmm, Mem) // AVX
4206 ASMJIT_INST_3x(vpor, Vpor, Ymm, Ymm, Ymm) // AVX2
4207 ASMJIT_INST_3x(vpor, Vpor, Ymm, Ymm, Mem) // AVX2
4208 ASMJIT_INST_3x(vpord, Vpord, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4209 ASMJIT_INST_3x(vpord, Vpord, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
4210 ASMJIT_INST_3x(vpord, Vpord, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4211 ASMJIT_INST_3x(vpord, Vpord, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
4212 ASMJIT_INST_3x(vpord, Vpord, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4213 ASMJIT_INST_3x(vpord, Vpord, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4214 ASMJIT_INST_3x(vporq, Vporq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4215 ASMJIT_INST_3x(vporq, Vporq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4216 ASMJIT_INST_3x(vporq, Vporq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4217 ASMJIT_INST_3x(vporq, Vporq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4218 ASMJIT_INST_3x(vporq, Vporq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4219 ASMJIT_INST_3x(vporq, Vporq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4220 ASMJIT_INST_3i(vprold, Vprold, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
4221 ASMJIT_INST_3i(vprold, Vprold, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4222 ASMJIT_INST_3i(vprold, Vprold, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4223 ASMJIT_INST_3i(vprold, Vprold, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4224 ASMJIT_INST_3i(vprold, Vprold, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4225 ASMJIT_INST_3i(vprold, Vprold, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4226 ASMJIT_INST_3i(vprolq, Vprolq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
4227 ASMJIT_INST_3i(vprolq, Vprolq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4228 ASMJIT_INST_3i(vprolq, Vprolq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4229 ASMJIT_INST_3i(vprolq, Vprolq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4230 ASMJIT_INST_3i(vprolq, Vprolq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4231 ASMJIT_INST_3i(vprolq, Vprolq, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4232 ASMJIT_INST_3x(vprolvd, Vprolvd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4233 ASMJIT_INST_3x(vprolvd, Vprolvd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
4234 ASMJIT_INST_3x(vprolvd, Vprolvd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4235 ASMJIT_INST_3x(vprolvd, Vprolvd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
4236 ASMJIT_INST_3x(vprolvd, Vprolvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4237 ASMJIT_INST_3x(vprolvd, Vprolvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4238 ASMJIT_INST_3x(vprolvq, Vprolvq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4239 ASMJIT_INST_3x(vprolvq, Vprolvq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4240 ASMJIT_INST_3x(vprolvq, Vprolvq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4241 ASMJIT_INST_3x(vprolvq, Vprolvq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4242 ASMJIT_INST_3x(vprolvq, Vprolvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4243 ASMJIT_INST_3x(vprolvq, Vprolvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4244 ASMJIT_INST_3i(vprord, Vprord, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
4245 ASMJIT_INST_3i(vprord, Vprord, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4246 ASMJIT_INST_3i(vprord, Vprord, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4247 ASMJIT_INST_3i(vprord, Vprord, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4248 ASMJIT_INST_3i(vprord, Vprord, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4249 ASMJIT_INST_3i(vprord, Vprord, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4250 ASMJIT_INST_3i(vprorq, Vprorq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
4251 ASMJIT_INST_3i(vprorq, Vprorq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4252 ASMJIT_INST_3i(vprorq, Vprorq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4253 ASMJIT_INST_3i(vprorq, Vprorq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4254 ASMJIT_INST_3i(vprorq, Vprorq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4255 ASMJIT_INST_3i(vprorq, Vprorq, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4256 ASMJIT_INST_3x(vprorvd, Vprorvd, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4257 ASMJIT_INST_3x(vprorvd, Vprorvd, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
4258 ASMJIT_INST_3x(vprorvd, Vprorvd, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4259 ASMJIT_INST_3x(vprorvd, Vprorvd, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
4260 ASMJIT_INST_3x(vprorvd, Vprorvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4261 ASMJIT_INST_3x(vprorvd, Vprorvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4262 ASMJIT_INST_3x(vprorvq, Vprorvq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4263 ASMJIT_INST_3x(vprorvq, Vprorvq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4264 ASMJIT_INST_3x(vprorvq, Vprorvq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4265 ASMJIT_INST_3x(vprorvq, Vprorvq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4266 ASMJIT_INST_3x(vprorvq, Vprorvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4267 ASMJIT_INST_3x(vprorvq, Vprorvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4268 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Xmm, Xmm, Xmm) // AVX AVX512_BW-VL
4269 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Xmm, Xmm, Mem) // AVX AVX512_BW-VL
4270 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW-VL
4271 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW-VL
4272 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Zmm, Zmm, Zmm) // AVX512_BW
4273 ASMJIT_INST_3x(vpsadbw, Vpsadbw, Zmm, Zmm, Mem) // AVX512_BW
4274 ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Xmm) // AVX512_F{k}-VL
4275 ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Ymm) // AVX512_F{k}-VL
4276 ASMJIT_INST_2x(vpscatterdd, Vpscatterdd, Mem, Zmm) // AVX512_F{k}
4277 ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Xmm) // AVX512_F{k}-VL
4278 ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Ymm) // AVX512_F{k}-VL
4279 ASMJIT_INST_2x(vpscatterdq, Vpscatterdq, Mem, Zmm) // AVX512_F{k}
4280 ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Xmm) // AVX512_F{k}-VL
4281 ASMJIT_INST_2x(vpscatterqd, Vpscatterqd, Mem, Ymm) // AVX512_F{k}
4282 ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Xmm) // AVX512_F{k}-VL
4283 ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Ymm) // AVX512_F{k}-VL
4284 ASMJIT_INST_2x(vpscatterqq, Vpscatterqq, Mem, Zmm) // AVX512_F{k}
4285 ASMJIT_INST_4i(vpshldd, Vpshldd, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL
4286 ASMJIT_INST_4i(vpshldd, Vpshldd, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4287 ASMJIT_INST_4i(vpshldd, Vpshldd, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL
4288 ASMJIT_INST_4i(vpshldd, Vpshldd, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4289 ASMJIT_INST_4i(vpshldd, Vpshldd, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz}
4290 ASMJIT_INST_4i(vpshldd, Vpshldd, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz}
4291 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4292 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4293 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4294 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4295 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4296 ASMJIT_INST_3x(vpshldvd, Vpshldvd, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4297 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4298 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4299 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4300 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4301 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4302 ASMJIT_INST_3x(vpshldvq, Vpshldvq, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4303 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4304 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4305 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4306 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4307 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4308 ASMJIT_INST_3x(vpshldvw, Vpshldvw, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4309 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL
4310 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4311 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL
4312 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4313 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz}
4314 ASMJIT_INST_4i(vpshrdd, Vpshrdd, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz}
4315 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4316 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4317 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4318 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4319 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4320 ASMJIT_INST_3x(vpshrdvd, Vpshrdvd, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4321 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4322 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4323 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4324 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4325 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4326 ASMJIT_INST_3x(vpshrdvq, Vpshrdvq, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4327 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Xmm, Xmm, Xmm) // AVX512_VBMI2{kz}-VL
4328 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Xmm, Xmm, Mem) // AVX512_VBMI2{kz}-VL
4329 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Ymm, Ymm, Ymm) // AVX512_VBMI2{kz}-VL
4330 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Ymm, Ymm, Mem) // AVX512_VBMI2{kz}-VL
4331 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Zmm, Zmm, Zmm) // AVX512_VBMI2{kz}
4332 ASMJIT_INST_3x(vpshrdvw, Vpshrdvw, Zmm, Zmm, Mem) // AVX512_VBMI2{kz}
4333 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Xmm, Xmm, Xmm, Imm) // AVX512_VBMI2{kz}-VL
4334 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Xmm, Xmm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4335 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Ymm, Ymm, Ymm, Imm) // AVX512_VBMI2{kz}-VL
4336 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Ymm, Ymm, Mem, Imm) // AVX512_VBMI2{kz}-VL
4337 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Zmm, Zmm, Zmm, Imm) // AVX512_VBMI2{kz}
4338 ASMJIT_INST_4i(vpshrdw, Vpshrdw, Zmm, Zmm, Mem, Imm) // AVX512_VBMI2{kz}
4339 ASMJIT_INST_3x(vpshufb, Vpshufb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4340 ASMJIT_INST_3x(vpshufb, Vpshufb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4341 ASMJIT_INST_3x(vpshufb, Vpshufb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4342 ASMJIT_INST_3x(vpshufb, Vpshufb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4343 ASMJIT_INST_3x(vpshufb, Vpshufb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4344 ASMJIT_INST_3x(vpshufb, Vpshufb, Zmm, Zmm, Mem) // AVX512_BW{kz}
4345 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Xmm, Xmm) // AVX512_BITALG{k}-VL
4346 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Xmm, Mem) // AVX512_BITALG{k}-VL
4347 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Ymm, Ymm) // AVX512_BITALG{k}-VL
4348 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Ymm, Mem) // AVX512_BITALG{k}-VL
4349 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Zmm, Zmm) // AVX512_BITALG{k}
4350 ASMJIT_INST_3x(vpshufbitqmb, Vpshufbitqmb, KReg, Zmm, Mem) // AVX512_BITALG{k}
4351 ASMJIT_INST_3i(vpshufd, Vpshufd, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL
4352 ASMJIT_INST_3i(vpshufd, Vpshufd, Xmm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL
4353 ASMJIT_INST_3i(vpshufd, Vpshufd, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL
4354 ASMJIT_INST_3i(vpshufd, Vpshufd, Ymm, Mem, Imm) // AVX2 AVX512_F{kz|b32}-VL
4355 ASMJIT_INST_3i(vpshufd, Vpshufd, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4356 ASMJIT_INST_3i(vpshufd, Vpshufd, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4357 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
4358 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL
4359 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
4360 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL
4361 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Zmm, Zmm, Imm) // AVX512_BW{kz}
4362 ASMJIT_INST_3i(vpshufhw, Vpshufhw, Zmm, Mem, Imm) // AVX512_BW{kz}
4363 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
4364 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Xmm, Mem, Imm) // AVX AVX512_BW{kz}-VL
4365 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
4366 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Ymm, Mem, Imm) // AVX2 AVX512_BW{kz}-VL
4367 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Zmm, Zmm, Imm) // AVX512_BW{kz}
4368 ASMJIT_INST_3i(vpshuflw, Vpshuflw, Zmm, Mem, Imm) // AVX512_BW{kz}
4369 ASMJIT_INST_3x(vpsignb, Vpsignb, Xmm, Xmm, Xmm) // AVX
4370 ASMJIT_INST_3x(vpsignb, Vpsignb, Xmm, Xmm, Mem) // AVX
4371 ASMJIT_INST_3x(vpsignb, Vpsignb, Ymm, Ymm, Ymm) // AVX2
4372 ASMJIT_INST_3x(vpsignb, Vpsignb, Ymm, Ymm, Mem) // AVX2
4373 ASMJIT_INST_3x(vpsignd, Vpsignd, Xmm, Xmm, Xmm) // AVX
4374 ASMJIT_INST_3x(vpsignd, Vpsignd, Xmm, Xmm, Mem) // AVX
4375 ASMJIT_INST_3x(vpsignd, Vpsignd, Ymm, Ymm, Ymm) // AVX2
4376 ASMJIT_INST_3x(vpsignd, Vpsignd, Ymm, Ymm, Mem) // AVX2
4377 ASMJIT_INST_3x(vpsignw, Vpsignw, Xmm, Xmm, Xmm) // AVX
4378 ASMJIT_INST_3x(vpsignw, Vpsignw, Xmm, Xmm, Mem) // AVX
4379 ASMJIT_INST_3x(vpsignw, Vpsignw, Ymm, Ymm, Ymm) // AVX2
4380 ASMJIT_INST_3x(vpsignw, Vpsignw, Ymm, Ymm, Mem) // AVX2
4381 ASMJIT_INST_3i(vpslld, Vpslld, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL
4382 ASMJIT_INST_3x(vpslld, Vpslld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4383 ASMJIT_INST_3x(vpslld, Vpslld, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL
4384 ASMJIT_INST_3i(vpslld, Vpslld, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL
4385 ASMJIT_INST_3x(vpslld, Vpslld, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4386 ASMJIT_INST_3x(vpslld, Vpslld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4387 ASMJIT_INST_3i(vpslld, Vpslld, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4388 ASMJIT_INST_3i(vpslld, Vpslld, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4389 ASMJIT_INST_3x(vpslld, Vpslld, Zmm, Zmm, Xmm) // AVX512_F{kz}
4390 ASMJIT_INST_3x(vpslld, Vpslld, Zmm, Zmm, Mem) // AVX512_F{kz}
4391 ASMJIT_INST_3i(vpslld, Vpslld, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4392 ASMJIT_INST_3i(vpslld, Vpslld, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4393 ASMJIT_INST_3i(vpslldq, Vpslldq, Xmm, Xmm, Imm) // AVX AVX512_BW-VL
4394 ASMJIT_INST_3i(vpslldq, Vpslldq, Ymm, Ymm, Imm) // AVX2 AVX512_BW-VL
4395 ASMJIT_INST_3i(vpslldq, Vpslldq, Xmm, Mem, Imm) // AVX512_BW-VL
4396 ASMJIT_INST_3i(vpslldq, Vpslldq, Ymm, Mem, Imm) // AVX512_BW-VL
4397 ASMJIT_INST_3i(vpslldq, Vpslldq, Zmm, Zmm, Imm) // AVX512_BW
4398 ASMJIT_INST_3i(vpslldq, Vpslldq, Zmm, Mem, Imm) // AVX512_BW
4399 ASMJIT_INST_3i(vpsllq, Vpsllq, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL
4400 ASMJIT_INST_3x(vpsllq, Vpsllq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4401 ASMJIT_INST_3x(vpsllq, Vpsllq, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL
4402 ASMJIT_INST_3i(vpsllq, Vpsllq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL
4403 ASMJIT_INST_3x(vpsllq, Vpsllq, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4404 ASMJIT_INST_3x(vpsllq, Vpsllq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4405 ASMJIT_INST_3i(vpsllq, Vpsllq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4406 ASMJIT_INST_3i(vpsllq, Vpsllq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4407 ASMJIT_INST_3x(vpsllq, Vpsllq, Zmm, Zmm, Xmm) // AVX512_F{kz}
4408 ASMJIT_INST_3x(vpsllq, Vpsllq, Zmm, Zmm, Mem) // AVX512_F{kz}
4409 ASMJIT_INST_3i(vpsllq, Vpsllq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4410 ASMJIT_INST_3i(vpsllq, Vpsllq, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4411 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL
4412 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4413 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4414 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4415 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4416 ASMJIT_INST_3x(vpsllvd, Vpsllvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4417 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b64}-VL
4418 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4419 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4420 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4421 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4422 ASMJIT_INST_3x(vpsllvq, Vpsllvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4423 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
4424 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
4425 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
4426 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
4427 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4428 ASMJIT_INST_3x(vpsllvw, Vpsllvw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4429 ASMJIT_INST_3i(vpsllw, Vpsllw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
4430 ASMJIT_INST_3x(vpsllw, Vpsllw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4431 ASMJIT_INST_3x(vpsllw, Vpsllw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4432 ASMJIT_INST_3i(vpsllw, Vpsllw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
4433 ASMJIT_INST_3x(vpsllw, Vpsllw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
4434 ASMJIT_INST_3x(vpsllw, Vpsllw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4435 ASMJIT_INST_3i(vpsllw, Vpsllw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL
4436 ASMJIT_INST_3i(vpsllw, Vpsllw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL
4437 ASMJIT_INST_3x(vpsllw, Vpsllw, Zmm, Zmm, Xmm) // AVX512_BW{kz}
4438 ASMJIT_INST_3x(vpsllw, Vpsllw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4439 ASMJIT_INST_3i(vpsllw, Vpsllw, Zmm, Zmm, Imm) // AVX512_BW{kz}
4440 ASMJIT_INST_3i(vpsllw, Vpsllw, Zmm, Mem, Imm) // AVX512_BW{kz}
4441 ASMJIT_INST_3i(vpsrad, Vpsrad, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL
4442 ASMJIT_INST_3x(vpsrad, Vpsrad, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4443 ASMJIT_INST_3x(vpsrad, Vpsrad, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL
4444 ASMJIT_INST_3i(vpsrad, Vpsrad, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL
4445 ASMJIT_INST_3x(vpsrad, Vpsrad, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4446 ASMJIT_INST_3x(vpsrad, Vpsrad, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4447 ASMJIT_INST_3i(vpsrad, Vpsrad, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4448 ASMJIT_INST_3i(vpsrad, Vpsrad, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4449 ASMJIT_INST_3x(vpsrad, Vpsrad, Zmm, Zmm, Xmm) // AVX512_F{kz}
4450 ASMJIT_INST_3x(vpsrad, Vpsrad, Zmm, Zmm, Mem) // AVX512_F{kz}
4451 ASMJIT_INST_3i(vpsrad, Vpsrad, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4452 ASMJIT_INST_3i(vpsrad, Vpsrad, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4453 ASMJIT_INST_3x(vpsraq, Vpsraq, Xmm, Xmm, Xmm) // AVX512_F{kz}-VL
4454 ASMJIT_INST_3x(vpsraq, Vpsraq, Xmm, Xmm, Mem) // AVX512_F{kz}-VL
4455 ASMJIT_INST_3i(vpsraq, Vpsraq, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
4456 ASMJIT_INST_3i(vpsraq, Vpsraq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4457 ASMJIT_INST_3x(vpsraq, Vpsraq, Ymm, Ymm, Xmm) // AVX512_F{kz}-VL
4458 ASMJIT_INST_3x(vpsraq, Vpsraq, Ymm, Ymm, Mem) // AVX512_F{kz}-VL
4459 ASMJIT_INST_3i(vpsraq, Vpsraq, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4460 ASMJIT_INST_3i(vpsraq, Vpsraq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4461 ASMJIT_INST_3x(vpsraq, Vpsraq, Zmm, Zmm, Xmm) // AVX512_F{kz}
4462 ASMJIT_INST_3x(vpsraq, Vpsraq, Zmm, Zmm, Mem) // AVX512_F{kz}
4463 ASMJIT_INST_3i(vpsraq, Vpsraq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4464 ASMJIT_INST_3i(vpsraq, Vpsraq, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4465 ASMJIT_INST_3x(vpsravd, Vpsravd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL
4466 ASMJIT_INST_3x(vpsravd, Vpsravd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4467 ASMJIT_INST_3x(vpsravd, Vpsravd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4468 ASMJIT_INST_3x(vpsravd, Vpsravd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4469 ASMJIT_INST_3x(vpsravd, Vpsravd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4470 ASMJIT_INST_3x(vpsravd, Vpsravd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4471 ASMJIT_INST_3x(vpsravq, Vpsravq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4472 ASMJIT_INST_3x(vpsravq, Vpsravq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4473 ASMJIT_INST_3x(vpsravq, Vpsravq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4474 ASMJIT_INST_3x(vpsravq, Vpsravq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4475 ASMJIT_INST_3x(vpsravq, Vpsravq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4476 ASMJIT_INST_3x(vpsravq, Vpsravq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4477 ASMJIT_INST_3x(vpsravw, Vpsravw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
4478 ASMJIT_INST_3x(vpsravw, Vpsravw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
4479 ASMJIT_INST_3x(vpsravw, Vpsravw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
4480 ASMJIT_INST_3x(vpsravw, Vpsravw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
4481 ASMJIT_INST_3x(vpsravw, Vpsravw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4482 ASMJIT_INST_3x(vpsravw, Vpsravw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4483 ASMJIT_INST_3i(vpsraw, Vpsraw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
4484 ASMJIT_INST_3x(vpsraw, Vpsraw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4485 ASMJIT_INST_3x(vpsraw, Vpsraw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4486 ASMJIT_INST_3i(vpsraw, Vpsraw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
4487 ASMJIT_INST_3x(vpsraw, Vpsraw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
4488 ASMJIT_INST_3x(vpsraw, Vpsraw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4489 ASMJIT_INST_3i(vpsraw, Vpsraw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL
4490 ASMJIT_INST_3i(vpsraw, Vpsraw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL
4491 ASMJIT_INST_3x(vpsraw, Vpsraw, Zmm, Zmm, Xmm) // AVX512_BW{kz}
4492 ASMJIT_INST_3x(vpsraw, Vpsraw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4493 ASMJIT_INST_3i(vpsraw, Vpsraw, Zmm, Zmm, Imm) // AVX512_BW{kz}
4494 ASMJIT_INST_3i(vpsraw, Vpsraw, Zmm, Mem, Imm) // AVX512_BW{kz}
4495 ASMJIT_INST_3i(vpsrld, Vpsrld, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL
4496 ASMJIT_INST_3x(vpsrld, Vpsrld, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4497 ASMJIT_INST_3x(vpsrld, Vpsrld, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL
4498 ASMJIT_INST_3i(vpsrld, Vpsrld, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b32}-VL
4499 ASMJIT_INST_3x(vpsrld, Vpsrld, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4500 ASMJIT_INST_3x(vpsrld, Vpsrld, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4501 ASMJIT_INST_3i(vpsrld, Vpsrld, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4502 ASMJIT_INST_3i(vpsrld, Vpsrld, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4503 ASMJIT_INST_3x(vpsrld, Vpsrld, Zmm, Zmm, Xmm) // AVX512_F{kz}
4504 ASMJIT_INST_3x(vpsrld, Vpsrld, Zmm, Zmm, Mem) // AVX512_F{kz}
4505 ASMJIT_INST_3i(vpsrld, Vpsrld, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4506 ASMJIT_INST_3i(vpsrld, Vpsrld, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4507 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Xmm, Xmm, Imm) // AVX AVX512_BW-VL
4508 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Ymm, Ymm, Imm) // AVX2 AVX512_BW-VL
4509 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Xmm, Mem, Imm) // AVX512_BW-VL
4510 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Ymm, Mem, Imm) // AVX512_BW-VL
4511 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Zmm, Zmm, Imm) // AVX512_BW
4512 ASMJIT_INST_3i(vpsrldq, Vpsrldq, Zmm, Mem, Imm) // AVX512_BW
4513 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL
4514 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz}-VL
4515 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Xmm, Xmm, Mem) // AVX AVX512_F{kz}-VL
4516 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Ymm, Ymm, Imm) // AVX2 AVX512_F{kz|b64}-VL
4517 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Ymm, Ymm, Xmm) // AVX2 AVX512_F{kz}-VL
4518 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz}-VL
4519 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4520 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4521 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Zmm, Zmm, Xmm) // AVX512_F{kz}
4522 ASMJIT_INST_3x(vpsrlq, Vpsrlq, Zmm, Zmm, Mem) // AVX512_F{kz}
4523 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4524 ASMJIT_INST_3i(vpsrlq, Vpsrlq, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4525 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b32}-VL
4526 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4527 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4528 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4529 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4530 ASMJIT_INST_3x(vpsrlvd, Vpsrlvd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4531 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Xmm, Xmm, Xmm) // AVX2 AVX512_F{kz|b64}-VL
4532 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Xmm, Xmm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4533 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4534 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4535 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4536 ASMJIT_INST_3x(vpsrlvq, Vpsrlvq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4537 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Xmm, Xmm, Xmm) // AVX512_BW{kz}-VL
4538 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Xmm, Xmm, Mem) // AVX512_BW{kz}-VL
4539 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Ymm, Ymm, Ymm) // AVX512_BW{kz}-VL
4540 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Ymm, Ymm, Mem) // AVX512_BW{kz}-VL
4541 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4542 ASMJIT_INST_3x(vpsrlvw, Vpsrlvw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4543 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Xmm, Xmm, Imm) // AVX AVX512_BW{kz}-VL
4544 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4545 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4546 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Ymm, Ymm, Imm) // AVX2 AVX512_BW{kz}-VL
4547 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Ymm, Ymm, Xmm) // AVX2 AVX512_BW{kz}-VL
4548 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4549 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Xmm, Mem, Imm) // AVX512_BW{kz}-VL
4550 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Ymm, Mem, Imm) // AVX512_BW{kz}-VL
4551 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Zmm, Zmm, Xmm) // AVX512_BW{kz}
4552 ASMJIT_INST_3x(vpsrlw, Vpsrlw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4553 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Zmm, Zmm, Imm) // AVX512_BW{kz}
4554 ASMJIT_INST_3i(vpsrlw, Vpsrlw, Zmm, Mem, Imm) // AVX512_BW{kz}
4555 ASMJIT_INST_3x(vpsubb, Vpsubb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4556 ASMJIT_INST_3x(vpsubb, Vpsubb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4557 ASMJIT_INST_3x(vpsubb, Vpsubb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4558 ASMJIT_INST_3x(vpsubb, Vpsubb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4559 ASMJIT_INST_3x(vpsubb, Vpsubb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4560 ASMJIT_INST_3x(vpsubb, Vpsubb, Zmm, Zmm, Mem) // AVX512_BW{kz}
4561 ASMJIT_INST_3x(vpsubd, Vpsubd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4562 ASMJIT_INST_3x(vpsubd, Vpsubd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4563 ASMJIT_INST_3x(vpsubd, Vpsubd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4564 ASMJIT_INST_3x(vpsubd, Vpsubd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4565 ASMJIT_INST_3x(vpsubd, Vpsubd, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4566 ASMJIT_INST_3x(vpsubd, Vpsubd, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4567 ASMJIT_INST_3x(vpsubq, Vpsubq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4568 ASMJIT_INST_3x(vpsubq, Vpsubq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4569 ASMJIT_INST_3x(vpsubq, Vpsubq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4570 ASMJIT_INST_3x(vpsubq, Vpsubq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4571 ASMJIT_INST_3x(vpsubq, Vpsubq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4572 ASMJIT_INST_3x(vpsubq, Vpsubq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4573 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4574 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4575 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4576 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4577 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4578 ASMJIT_INST_3x(vpsubsb, Vpsubsb, Zmm, Zmm, Mem) // AVX512_BW{kz}
4579 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4580 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4581 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4582 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4583 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4584 ASMJIT_INST_3x(vpsubsw, Vpsubsw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4585 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4586 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4587 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4588 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4589 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4590 ASMJIT_INST_3x(vpsubusb, Vpsubusb, Zmm, Zmm, Mem) // AVX512_BW{kz}
4591 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4592 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4593 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4594 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4595 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4596 ASMJIT_INST_3x(vpsubusw, Vpsubusw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4597 ASMJIT_INST_3x(vpsubw, Vpsubw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4598 ASMJIT_INST_3x(vpsubw, Vpsubw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4599 ASMJIT_INST_3x(vpsubw, Vpsubw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4600 ASMJIT_INST_3x(vpsubw, Vpsubw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4601 ASMJIT_INST_3x(vpsubw, Vpsubw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4602 ASMJIT_INST_3x(vpsubw, Vpsubw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4603 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
4604 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4605 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4606 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4607 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4608 ASMJIT_INST_4i(vpternlogd, Vpternlogd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4609 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
4610 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4611 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4612 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4613 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4614 ASMJIT_INST_4i(vpternlogq, Vpternlogq, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4615 ASMJIT_INST_2x(vptest, Vptest, Xmm, Xmm) // AVX
4616 ASMJIT_INST_2x(vptest, Vptest, Xmm, Mem) // AVX
4617 ASMJIT_INST_2x(vptest, Vptest, Ymm, Ymm) // AVX
4618 ASMJIT_INST_2x(vptest, Vptest, Ymm, Mem) // AVX
4619 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
4620 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Xmm, Mem) // AVX512_BW{k}-VL
4621 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
4622 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Ymm, Mem) // AVX512_BW{k}-VL
4623 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Zmm, Zmm) // AVX512_BW{k}
4624 ASMJIT_INST_3x(vptestmb, Vptestmb, KReg, Zmm, Mem) // AVX512_BW{k}
4625 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL
4626 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL
4627 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL
4628 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL
4629 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Zmm, Zmm) // AVX512_F{k|b32}
4630 ASMJIT_INST_3x(vptestmd, Vptestmd, KReg, Zmm, Mem) // AVX512_F{k|b32}
4631 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL
4632 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL
4633 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL
4634 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL
4635 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Zmm, Zmm) // AVX512_F{k|b64}
4636 ASMJIT_INST_3x(vptestmq, Vptestmq, KReg, Zmm, Mem) // AVX512_F{k|b64}
4637 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
4638 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Xmm, Mem) // AVX512_BW{k}-VL
4639 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
4640 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Ymm, Mem) // AVX512_BW{k}-VL
4641 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Zmm, Zmm) // AVX512_BW{k}
4642 ASMJIT_INST_3x(vptestmw, Vptestmw, KReg, Zmm, Mem) // AVX512_BW{k}
4643 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
4644 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Xmm, Mem) // AVX512_BW{k}-VL
4645 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
4646 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Ymm, Mem) // AVX512_BW{k}-VL
4647 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Zmm, Zmm) // AVX512_BW{k}
4648 ASMJIT_INST_3x(vptestnmb, Vptestnmb, KReg, Zmm, Mem) // AVX512_BW{k}
4649 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Xmm, Xmm) // AVX512_F{k|b32}-VL
4650 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Xmm, Mem) // AVX512_F{k|b32}-VL
4651 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Ymm, Ymm) // AVX512_F{k|b32}-VL
4652 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Ymm, Mem) // AVX512_F{k|b32}-VL
4653 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Zmm, Zmm) // AVX512_F{k|b32}
4654 ASMJIT_INST_3x(vptestnmd, Vptestnmd, KReg, Zmm, Mem) // AVX512_F{k|b32}
4655 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Xmm, Xmm) // AVX512_F{k|b64}-VL
4656 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Xmm, Mem) // AVX512_F{k|b64}-VL
4657 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Ymm, Ymm) // AVX512_F{k|b64}-VL
4658 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Ymm, Mem) // AVX512_F{k|b64}-VL
4659 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Zmm, Zmm) // AVX512_F{k|b64}
4660 ASMJIT_INST_3x(vptestnmq, Vptestnmq, KReg, Zmm, Mem) // AVX512_F{k|b64}
4661 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Xmm, Xmm) // AVX512_BW{k}-VL
4662 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Xmm, Mem) // AVX512_BW{k}-VL
4663 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Ymm, Ymm) // AVX512_BW{k}-VL
4664 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Ymm, Mem) // AVX512_BW{k}-VL
4665 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Zmm, Zmm) // AVX512_BW{k}
4666 ASMJIT_INST_3x(vptestnmw, Vptestnmw, KReg, Zmm, Mem) // AVX512_BW{k}
4667 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4668 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4669 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4670 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4671 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4672 ASMJIT_INST_3x(vpunpckhbw, Vpunpckhbw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4673 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4674 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4675 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4676 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4677 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4678 ASMJIT_INST_3x(vpunpckhdq, Vpunpckhdq, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4679 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4680 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4681 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4682 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4683 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4684 ASMJIT_INST_3x(vpunpckhqdq, Vpunpckhqdq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4685 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4686 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4687 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4688 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4689 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4690 ASMJIT_INST_3x(vpunpckhwd, Vpunpckhwd, Zmm, Zmm, Mem) // AVX512_BW{kz}
4691 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4692 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4693 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4694 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4695 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4696 ASMJIT_INST_3x(vpunpcklbw, Vpunpcklbw, Zmm, Zmm, Mem) // AVX512_BW{kz}
4697 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4698 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4699 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4700 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4701 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4702 ASMJIT_INST_3x(vpunpckldq, Vpunpckldq, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4703 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4704 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4705 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4706 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4707 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4708 ASMJIT_INST_3x(vpunpcklqdq, Vpunpcklqdq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4709 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Xmm, Xmm, Xmm) // AVX AVX512_BW{kz}-VL
4710 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Xmm, Xmm, Mem) // AVX AVX512_BW{kz}-VL
4711 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Ymm, Ymm, Ymm) // AVX2 AVX512_BW{kz}-VL
4712 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Ymm, Ymm, Mem) // AVX2 AVX512_BW{kz}-VL
4713 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Zmm, Zmm, Zmm) // AVX512_BW{kz}
4714 ASMJIT_INST_3x(vpunpcklwd, Vpunpcklwd, Zmm, Zmm, Mem) // AVX512_BW{kz}
4715 ASMJIT_INST_3x(vpxor, Vpxor, Xmm, Xmm, Xmm) // AVX
4716 ASMJIT_INST_3x(vpxor, Vpxor, Xmm, Xmm, Mem) // AVX
4717 ASMJIT_INST_3x(vpxor, Vpxor, Ymm, Ymm, Ymm) // AVX2
4718 ASMJIT_INST_3x(vpxor, Vpxor, Ymm, Ymm, Mem) // AVX2
4719 ASMJIT_INST_3x(vpxord, Vpxord, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4720 ASMJIT_INST_3x(vpxord, Vpxord, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
4721 ASMJIT_INST_3x(vpxord, Vpxord, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4722 ASMJIT_INST_3x(vpxord, Vpxord, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
4723 ASMJIT_INST_3x(vpxord, Vpxord, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4724 ASMJIT_INST_3x(vpxord, Vpxord, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4725 ASMJIT_INST_3x(vpxorq, Vpxorq, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4726 ASMJIT_INST_3x(vpxorq, Vpxorq, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4727 ASMJIT_INST_3x(vpxorq, Vpxorq, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4728 ASMJIT_INST_3x(vpxorq, Vpxorq, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4729 ASMJIT_INST_3x(vpxorq, Vpxorq, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4730 ASMJIT_INST_3x(vpxorq, Vpxorq, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4731 ASMJIT_INST_4i(vrangepd, Vrangepd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|b64}-VL
4732 ASMJIT_INST_4i(vrangepd, Vrangepd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|b64}-VL
4733 ASMJIT_INST_4i(vrangepd, Vrangepd, Ymm, Ymm, Ymm, Imm) // AVX512_DQ{kz|b64}-VL
4734 ASMJIT_INST_4i(vrangepd, Vrangepd, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz|b64}-VL
4735 ASMJIT_INST_4i(vrangepd, Vrangepd, Zmm, Zmm, Zmm, Imm) // AVX512_DQ{kz|sae|b64}
4736 ASMJIT_INST_4i(vrangepd, Vrangepd, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz|sae|b64}
4737 ASMJIT_INST_4i(vrangeps, Vrangeps, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|b32}-VL
4738 ASMJIT_INST_4i(vrangeps, Vrangeps, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|b32}-VL
4739 ASMJIT_INST_4i(vrangeps, Vrangeps, Ymm, Ymm, Ymm, Imm) // AVX512_DQ{kz|b32}-VL
4740 ASMJIT_INST_4i(vrangeps, Vrangeps, Ymm, Ymm, Mem, Imm) // AVX512_DQ{kz|b32}-VL
4741 ASMJIT_INST_4i(vrangeps, Vrangeps, Zmm, Zmm, Zmm, Imm) // AVX512_DQ{kz|sae|b32}
4742 ASMJIT_INST_4i(vrangeps, Vrangeps, Zmm, Zmm, Mem, Imm) // AVX512_DQ{kz|sae|b32}
4743 ASMJIT_INST_4i(vrangesd, Vrangesd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|sae}
4744 ASMJIT_INST_4i(vrangesd, Vrangesd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|sae}
4745 ASMJIT_INST_4i(vrangess, Vrangess, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz|sae}
4746 ASMJIT_INST_4i(vrangess, Vrangess, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz|sae}
4747 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4748 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Xmm, Mem) // AVX512_F{kz|b64}-VL
4749 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4750 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Ymm, Mem) // AVX512_F{kz|b64}-VL
4751 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Zmm, Zmm) // AVX512_F{kz|b64}
4752 ASMJIT_INST_2x(vrcp14pd, Vrcp14pd, Zmm, Mem) // AVX512_F{kz|b64}
4753 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4754 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Xmm, Mem) // AVX512_F{kz|b32}-VL
4755 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4756 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Ymm, Mem) // AVX512_F{kz|b32}-VL
4757 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Zmm, Zmm) // AVX512_F{kz|b32}
4758 ASMJIT_INST_2x(vrcp14ps, Vrcp14ps, Zmm, Mem) // AVX512_F{kz|b32}
4759 ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Xmm) // AVX512_F{kz}
4760 ASMJIT_INST_3x(vrcp14sd, Vrcp14sd, Xmm, Xmm, Mem) // AVX512_F{kz}
4761 ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Xmm) // AVX512_F{kz}
4762 ASMJIT_INST_3x(vrcp14ss, Vrcp14ss, Xmm, Xmm, Mem) // AVX512_F{kz}
4763 ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64}
4764 ASMJIT_INST_2x(vrcp28pd, Vrcp28pd, Zmm, Mem) // AVX512_ER{kz|sae|b64}
4765 ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32}
4766 ASMJIT_INST_2x(vrcp28ps, Vrcp28ps, Zmm, Mem) // AVX512_ER{kz|sae|b32}
4767 ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae}
4768 ASMJIT_INST_3x(vrcp28sd, Vrcp28sd, Xmm, Xmm, Mem) // AVX512_ER{kz|sae}
4769 ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae}
4770 ASMJIT_INST_3x(vrcp28ss, Vrcp28ss, Xmm, Xmm, Mem) // AVX512_ER{kz|sae}
4771 ASMJIT_INST_2x(vrcpps, Vrcpps, Xmm, Xmm) // AVX
4772 ASMJIT_INST_2x(vrcpps, Vrcpps, Xmm, Mem) // AVX
4773 ASMJIT_INST_2x(vrcpps, Vrcpps, Ymm, Ymm) // AVX
4774 ASMJIT_INST_2x(vrcpps, Vrcpps, Ymm, Mem) // AVX
4775 ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Xmm) // AVX
4776 ASMJIT_INST_3x(vrcpss, Vrcpss, Xmm, Xmm, Mem) // AVX
4777 ASMJIT_INST_3i(vreducepd, Vreducepd, Xmm, Xmm, Imm) // AVX512_DQ{kz|b64}-VL
4778 ASMJIT_INST_3i(vreducepd, Vreducepd, Xmm, Mem, Imm) // AVX512_DQ{kz|b64}-VL
4779 ASMJIT_INST_3i(vreducepd, Vreducepd, Ymm, Ymm, Imm) // AVX512_DQ{kz|b64}-VL
4780 ASMJIT_INST_3i(vreducepd, Vreducepd, Ymm, Mem, Imm) // AVX512_DQ{kz|b64}-VL
4781 ASMJIT_INST_3i(vreducepd, Vreducepd, Zmm, Zmm, Imm) // AVX512_DQ{kz|b64}
4782 ASMJIT_INST_3i(vreducepd, Vreducepd, Zmm, Mem, Imm) // AVX512_DQ{kz|b64}
4783 ASMJIT_INST_3i(vreduceps, Vreduceps, Xmm, Xmm, Imm) // AVX512_DQ{kz|b32}-VL
4784 ASMJIT_INST_3i(vreduceps, Vreduceps, Xmm, Mem, Imm) // AVX512_DQ{kz|b32}-VL
4785 ASMJIT_INST_3i(vreduceps, Vreduceps, Ymm, Ymm, Imm) // AVX512_DQ{kz|b32}-VL
4786 ASMJIT_INST_3i(vreduceps, Vreduceps, Ymm, Mem, Imm) // AVX512_DQ{kz|b32}-VL
4787 ASMJIT_INST_3i(vreduceps, Vreduceps, Zmm, Zmm, Imm) // AVX512_DQ{kz|b32}
4788 ASMJIT_INST_3i(vreduceps, Vreduceps, Zmm, Mem, Imm) // AVX512_DQ{kz|b32}
4789 ASMJIT_INST_4i(vreducesd, Vreducesd, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz}
4790 ASMJIT_INST_4i(vreducesd, Vreducesd, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz}
4791 ASMJIT_INST_4i(vreducess, Vreducess, Xmm, Xmm, Xmm, Imm) // AVX512_DQ{kz}
4792 ASMJIT_INST_4i(vreducess, Vreducess, Xmm, Xmm, Mem, Imm) // AVX512_DQ{kz}
4793 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Xmm, Xmm, Imm) // AVX512_F{kz|b64}-VL
4794 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Xmm, Mem, Imm) // AVX512_F{kz|b64}-VL
4795 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4796 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4797 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b64}
4798 ASMJIT_INST_3i(vrndscalepd, Vrndscalepd, Zmm, Mem, Imm) // AVX512_F{kz|sae|b64}
4799 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Xmm, Xmm, Imm) // AVX512_F{kz|b32}-VL
4800 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Xmm, Mem, Imm) // AVX512_F{kz|b32}-VL
4801 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4802 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4803 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Zmm, Zmm, Imm) // AVX512_F{kz|sae|b32}
4804 ASMJIT_INST_3i(vrndscaleps, Vrndscaleps, Zmm, Mem, Imm) // AVX512_F{kz|sae|b32}
4805 ASMJIT_INST_4i(vrndscalesd, Vrndscalesd, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
4806 ASMJIT_INST_4i(vrndscalesd, Vrndscalesd, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
4807 ASMJIT_INST_4i(vrndscaless, Vrndscaless, Xmm, Xmm, Xmm, Imm) // AVX512_F{kz|sae}
4808 ASMJIT_INST_4i(vrndscaless, Vrndscaless, Xmm, Xmm, Mem, Imm) // AVX512_F{kz|sae}
4809 ASMJIT_INST_3i(vroundpd, Vroundpd, Xmm, Xmm, Imm) // AVX
4810 ASMJIT_INST_3i(vroundpd, Vroundpd, Xmm, Mem, Imm) // AVX
4811 ASMJIT_INST_3i(vroundpd, Vroundpd, Ymm, Ymm, Imm) // AVX
4812 ASMJIT_INST_3i(vroundpd, Vroundpd, Ymm, Mem, Imm) // AVX
4813 ASMJIT_INST_3i(vroundps, Vroundps, Xmm, Xmm, Imm) // AVX
4814 ASMJIT_INST_3i(vroundps, Vroundps, Xmm, Mem, Imm) // AVX
4815 ASMJIT_INST_3i(vroundps, Vroundps, Ymm, Ymm, Imm) // AVX
4816 ASMJIT_INST_3i(vroundps, Vroundps, Ymm, Mem, Imm) // AVX
4817 ASMJIT_INST_4i(vroundsd, Vroundsd, Xmm, Xmm, Xmm, Imm) // AVX
4818 ASMJIT_INST_4i(vroundsd, Vroundsd, Xmm, Xmm, Mem, Imm) // AVX
4819 ASMJIT_INST_4i(vroundss, Vroundss, Xmm, Xmm, Xmm, Imm) // AVX
4820 ASMJIT_INST_4i(vroundss, Vroundss, Xmm, Xmm, Mem, Imm) // AVX
4821 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4822 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Xmm, Mem) // AVX512_F{kz|b64}-VL
4823 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4824 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Ymm, Mem) // AVX512_F{kz|b64}-VL
4825 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Zmm, Zmm) // AVX512_F{kz|b64}
4826 ASMJIT_INST_2x(vrsqrt14pd, Vrsqrt14pd, Zmm, Mem) // AVX512_F{kz|b64}
4827 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4828 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Xmm, Mem) // AVX512_F{kz|b32}-VL
4829 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4830 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Ymm, Mem) // AVX512_F{kz|b32}-VL
4831 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Zmm, Zmm) // AVX512_F{kz|b32}
4832 ASMJIT_INST_2x(vrsqrt14ps, Vrsqrt14ps, Zmm, Mem) // AVX512_F{kz|b32}
4833 ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Xmm) // AVX512_F{kz}
4834 ASMJIT_INST_3x(vrsqrt14sd, Vrsqrt14sd, Xmm, Xmm, Mem) // AVX512_F{kz}
4835 ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Xmm) // AVX512_F{kz}
4836 ASMJIT_INST_3x(vrsqrt14ss, Vrsqrt14ss, Xmm, Xmm, Mem) // AVX512_F{kz}
4837 ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Zmm, Zmm) // AVX512_ER{kz|sae|b64}
4838 ASMJIT_INST_2x(vrsqrt28pd, Vrsqrt28pd, Zmm, Mem) // AVX512_ER{kz|sae|b64}
4839 ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Zmm, Zmm) // AVX512_ER{kz|sae|b32}
4840 ASMJIT_INST_2x(vrsqrt28ps, Vrsqrt28ps, Zmm, Mem) // AVX512_ER{kz|sae|b32}
4841 ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae}
4842 ASMJIT_INST_3x(vrsqrt28sd, Vrsqrt28sd, Xmm, Xmm, Mem) // AVX512_ER{kz|sae}
4843 ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Xmm) // AVX512_ER{kz|sae}
4844 ASMJIT_INST_3x(vrsqrt28ss, Vrsqrt28ss, Xmm, Xmm, Mem) // AVX512_ER{kz|sae}
4845 ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Xmm, Xmm) // AVX
4846 ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Xmm, Mem) // AVX
4847 ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Ymm, Ymm) // AVX
4848 ASMJIT_INST_2x(vrsqrtps, Vrsqrtps, Ymm, Mem) // AVX
4849 ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Xmm) // AVX
4850 ASMJIT_INST_3x(vrsqrtss, Vrsqrtss, Xmm, Xmm, Mem) // AVX
4851 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Xmm, Xmm, Xmm) // AVX512_F{kz|b64}-VL
4852 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Xmm, Xmm, Mem) // AVX512_F{kz|b64}-VL
4853 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Ymm, Ymm, Ymm) // AVX512_F{kz|b64}-VL
4854 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Ymm, Ymm, Mem) // AVX512_F{kz|b64}-VL
4855 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64}
4856 ASMJIT_INST_3x(vscalefpd, Vscalefpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64}
4857 ASMJIT_INST_3x(vscalefps, Vscalefps, Xmm, Xmm, Xmm) // AVX512_F{kz|b32}-VL
4858 ASMJIT_INST_3x(vscalefps, Vscalefps, Xmm, Xmm, Mem) // AVX512_F{kz|b32}-VL
4859 ASMJIT_INST_3x(vscalefps, Vscalefps, Ymm, Ymm, Ymm) // AVX512_F{kz|b32}-VL
4860 ASMJIT_INST_3x(vscalefps, Vscalefps, Ymm, Ymm, Mem) // AVX512_F{kz|b32}-VL
4861 ASMJIT_INST_3x(vscalefps, Vscalefps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32}
4862 ASMJIT_INST_3x(vscalefps, Vscalefps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32}
4863 ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Xmm) // AVX512_F{kz|er}
4864 ASMJIT_INST_3x(vscalefsd, Vscalefsd, Xmm, Xmm, Mem) // AVX512_F{kz|er}
4865 ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Xmm) // AVX512_F{kz|er}
4866 ASMJIT_INST_3x(vscalefss, Vscalefss, Xmm, Xmm, Mem) // AVX512_F{kz|er}
4867 ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Xmm) // AVX512_F{k}-VL
4868 ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Ymm) // AVX512_F{k}-VL
4869 ASMJIT_INST_2x(vscatterdpd, Vscatterdpd, Mem, Zmm) // AVX512_F{k}
4870 ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Xmm) // AVX512_F{k}-VL
4871 ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Ymm) // AVX512_F{k}-VL
4872 ASMJIT_INST_2x(vscatterdps, Vscatterdps, Mem, Zmm) // AVX512_F{k}
4873 ASMJIT_INST_1x(vscatterpf0dpd, Vscatterpf0dpd, Mem) // AVX512_PF{k}
4874 ASMJIT_INST_1x(vscatterpf0dps, Vscatterpf0dps, Mem) // AVX512_PF{k}
4875 ASMJIT_INST_1x(vscatterpf0qpd, Vscatterpf0qpd, Mem) // AVX512_PF{k}
4876 ASMJIT_INST_1x(vscatterpf0qps, Vscatterpf0qps, Mem) // AVX512_PF{k}
4877 ASMJIT_INST_1x(vscatterpf1dpd, Vscatterpf1dpd, Mem) // AVX512_PF{k}
4878 ASMJIT_INST_1x(vscatterpf1dps, Vscatterpf1dps, Mem) // AVX512_PF{k}
4879 ASMJIT_INST_1x(vscatterpf1qpd, Vscatterpf1qpd, Mem) // AVX512_PF{k}
4880 ASMJIT_INST_1x(vscatterpf1qps, Vscatterpf1qps, Mem) // AVX512_PF{k}
4881 ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Xmm) // AVX512_F{k}-VL
4882 ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Ymm) // AVX512_F{k}-VL
4883 ASMJIT_INST_2x(vscatterqpd, Vscatterqpd, Mem, Zmm) // AVX512_F{k}
4884 ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Xmm) // AVX512_F{k}-VL
4885 ASMJIT_INST_2x(vscatterqps, Vscatterqps, Mem, Ymm) // AVX512_F{k}
4886 ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4887 ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4888 ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4889 ASMJIT_INST_4i(vshuff32x4, Vshuff32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4890 ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4891 ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4892 ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4893 ASMJIT_INST_4i(vshuff64x2, Vshuff64x2, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4894 ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b32}-VL
4895 ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b32}-VL
4896 ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4897 ASMJIT_INST_4i(vshufi32x4, Vshufi32x4, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4898 ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Ymm, Ymm, Ymm, Imm) // AVX512_F{kz|b64}-VL
4899 ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Ymm, Ymm, Mem, Imm) // AVX512_F{kz|b64}-VL
4900 ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4901 ASMJIT_INST_4i(vshufi64x2, Vshufi64x2, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4902 ASMJIT_INST_4i(vshufpd, Vshufpd, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b32}-VL
4903 ASMJIT_INST_4i(vshufpd, Vshufpd, Xmm, Xmm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL
4904 ASMJIT_INST_4i(vshufpd, Vshufpd, Ymm, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b32}-VL
4905 ASMJIT_INST_4i(vshufpd, Vshufpd, Ymm, Ymm, Mem, Imm) // AVX AVX512_F{kz|b32}-VL
4906 ASMJIT_INST_4i(vshufpd, Vshufpd, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b32}
4907 ASMJIT_INST_4i(vshufpd, Vshufpd, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b32}
4908 ASMJIT_INST_4i(vshufps, Vshufps, Xmm, Xmm, Xmm, Imm) // AVX AVX512_F{kz|b64}-VL
4909 ASMJIT_INST_4i(vshufps, Vshufps, Xmm, Xmm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
4910 ASMJIT_INST_4i(vshufps, Vshufps, Ymm, Ymm, Ymm, Imm) // AVX AVX512_F{kz|b64}-VL
4911 ASMJIT_INST_4i(vshufps, Vshufps, Ymm, Ymm, Mem, Imm) // AVX AVX512_F{kz|b64}-VL
4912 ASMJIT_INST_4i(vshufps, Vshufps, Zmm, Zmm, Zmm, Imm) // AVX512_F{kz|b64}
4913 ASMJIT_INST_4i(vshufps, Vshufps, Zmm, Zmm, Mem, Imm) // AVX512_F{kz|b64}
4914 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4915 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4916 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
4917 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
4918 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Zmm, Zmm) // AVX512_F{kz|er|b64}
4919 ASMJIT_INST_2x(vsqrtpd, Vsqrtpd, Zmm, Mem) // AVX512_F{kz|er|b64}
4920 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4921 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4922 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
4923 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
4924 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Zmm, Zmm) // AVX512_F{kz|er|b32}
4925 ASMJIT_INST_2x(vsqrtps, Vsqrtps, Zmm, Mem) // AVX512_F{kz|er|b32}
4926 ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
4927 ASMJIT_INST_3x(vsqrtsd, Vsqrtsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
4928 ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
4929 ASMJIT_INST_3x(vsqrtss, Vsqrtss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
4930 ASMJIT_INST_1x(vstmxcsr, Vstmxcsr, Mem) // AVX
4931 ASMJIT_INST_3x(vsubpd, Vsubpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4932 ASMJIT_INST_3x(vsubpd, Vsubpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4933 ASMJIT_INST_3x(vsubpd, Vsubpd, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b64}-VL
4934 ASMJIT_INST_3x(vsubpd, Vsubpd, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b64}-VL
4935 ASMJIT_INST_3x(vsubpd, Vsubpd, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b64}
4936 ASMJIT_INST_3x(vsubpd, Vsubpd, Zmm, Zmm, Mem) // AVX512_F{kz|er|b64}
4937 ASMJIT_INST_3x(vsubps, Vsubps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4938 ASMJIT_INST_3x(vsubps, Vsubps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4939 ASMJIT_INST_3x(vsubps, Vsubps, Ymm, Ymm, Ymm) // AVX2 AVX512_F{kz|b32}-VL
4940 ASMJIT_INST_3x(vsubps, Vsubps, Ymm, Ymm, Mem) // AVX2 AVX512_F{kz|b32}-VL
4941 ASMJIT_INST_3x(vsubps, Vsubps, Zmm, Zmm, Zmm) // AVX512_F{kz|er|b32}
4942 ASMJIT_INST_3x(vsubps, Vsubps, Zmm, Zmm, Mem) // AVX512_F{kz|er|b32}
4943 ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
4944 ASMJIT_INST_3x(vsubsd, Vsubsd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
4945 ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|er}
4946 ASMJIT_INST_3x(vsubss, Vsubss, Xmm, Xmm, Mem) // AVX AVX512_F{kz|er}
4947 ASMJIT_INST_2x(vtestpd, Vtestpd, Xmm, Xmm) // AVX
4948 ASMJIT_INST_2x(vtestpd, Vtestpd, Xmm, Mem) // AVX
4949 ASMJIT_INST_2x(vtestpd, Vtestpd, Ymm, Ymm) // AVX
4950 ASMJIT_INST_2x(vtestpd, Vtestpd, Ymm, Mem) // AVX
4951 ASMJIT_INST_2x(vtestps, Vtestps, Xmm, Xmm) // AVX
4952 ASMJIT_INST_2x(vtestps, Vtestps, Xmm, Mem) // AVX
4953 ASMJIT_INST_2x(vtestps, Vtestps, Ymm, Ymm) // AVX
4954 ASMJIT_INST_2x(vtestps, Vtestps, Ymm, Mem) // AVX
4955 ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Xmm) // AVX AVX512_F{sae}
4956 ASMJIT_INST_2x(vucomisd, Vucomisd, Xmm, Mem) // AVX AVX512_F{sae}
4957 ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Xmm) // AVX AVX512_F{sae}
4958 ASMJIT_INST_2x(vucomiss, Vucomiss, Xmm, Mem) // AVX AVX512_F{sae}
4959 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4960 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4961 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
4962 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
4963 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4964 ASMJIT_INST_3x(vunpckhpd, Vunpckhpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4965 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4966 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4967 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
4968 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
4969 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4970 ASMJIT_INST_3x(vunpckhps, Vunpckhps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4971 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b64}-VL
4972 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b64}-VL
4973 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b64}-VL
4974 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b64}-VL
4975 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Zmm, Zmm, Zmm) // AVX512_F{kz|b64}
4976 ASMJIT_INST_3x(vunpcklpd, Vunpcklpd, Zmm, Zmm, Mem) // AVX512_F{kz|b64}
4977 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Xmm, Xmm, Xmm) // AVX AVX512_F{kz|b32}-VL
4978 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Xmm, Xmm, Mem) // AVX AVX512_F{kz|b32}-VL
4979 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Ymm, Ymm, Ymm) // AVX AVX512_F{kz|b32}-VL
4980 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Ymm, Ymm, Mem) // AVX AVX512_F{kz|b32}-VL
4981 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Zmm, Zmm, Zmm) // AVX512_F{kz|b32}
4982 ASMJIT_INST_3x(vunpcklps, Vunpcklps, Zmm, Zmm, Mem) // AVX512_F{kz|b32}
4983 ASMJIT_INST_3x(vxorpd, Vxorpd, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b64}-VL
4984 ASMJIT_INST_3x(vxorpd, Vxorpd, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b64}-VL
4985 ASMJIT_INST_3x(vxorpd, Vxorpd, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b64}-VL
4986 ASMJIT_INST_3x(vxorpd, Vxorpd, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b64}-VL
4987 ASMJIT_INST_3x(vxorpd, Vxorpd, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b64}
4988 ASMJIT_INST_3x(vxorpd, Vxorpd, Zmm, Zmm, Mem) // AVX512_DQ{kz|b64}
4989 ASMJIT_INST_3x(vxorps, Vxorps, Xmm, Xmm, Xmm) // AVX AVX512_DQ{kz|b32}-VL
4990 ASMJIT_INST_3x(vxorps, Vxorps, Xmm, Xmm, Mem) // AVX AVX512_DQ{kz|b32}-VL
4991 ASMJIT_INST_3x(vxorps, Vxorps, Ymm, Ymm, Ymm) // AVX AVX512_DQ{kz|b32}-VL
4992 ASMJIT_INST_3x(vxorps, Vxorps, Ymm, Ymm, Mem) // AVX AVX512_DQ{kz|b32}-VL
4993 ASMJIT_INST_3x(vxorps, Vxorps, Zmm, Zmm, Zmm) // AVX512_DQ{kz|b32}
4994 ASMJIT_INST_3x(vxorps, Vxorps, Zmm, Zmm, Mem) // AVX512_DQ{kz|b32}
4995 ASMJIT_INST_0x(vzeroall, Vzeroall) // AVX
4996 ASMJIT_INST_0x(vzeroupper, Vzeroupper) // AVX
4997
4998 //! \}
4999
5000 //! \name FMA4 Instructions
5001 //! \{
5002
5003 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5004 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Mem, Xmm) // FMA4
5005 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Xmm, Xmm, Xmm, Mem) // FMA4
5006 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5007 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Mem, Ymm) // FMA4
5008 ASMJIT_INST_4x(vfmaddpd, Vfmaddpd, Ymm, Ymm, Ymm, Mem) // FMA4
5009 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Xmm, Xmm) // FMA4
5010 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Mem, Xmm) // FMA4
5011 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Xmm, Xmm, Xmm, Mem) // FMA4
5012 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Ymm, Ymm) // FMA4
5013 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Mem, Ymm) // FMA4
5014 ASMJIT_INST_4x(vfmaddps, Vfmaddps, Ymm, Ymm, Ymm, Mem) // FMA4
5015 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Xmm) // FMA4
5016 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Mem, Xmm) // FMA4
5017 ASMJIT_INST_4x(vfmaddsd, Vfmaddsd, Xmm, Xmm, Xmm, Mem) // FMA4
5018 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Xmm) // FMA4
5019 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Mem, Xmm) // FMA4
5020 ASMJIT_INST_4x(vfmaddss, Vfmaddss, Xmm, Xmm, Xmm, Mem) // FMA4
5021 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5022 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Mem, Xmm) // FMA4
5023 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Xmm, Xmm, Xmm, Mem) // FMA4
5024 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5025 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Mem, Ymm) // FMA4
5026 ASMJIT_INST_4x(vfmaddsubpd, Vfmaddsubpd, Ymm, Ymm, Ymm, Mem) // FMA4
5027 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Xmm, Xmm) // FMA4
5028 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Mem, Xmm) // FMA4
5029 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Xmm, Xmm, Xmm, Mem) // FMA4
5030 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Ymm, Ymm) // FMA4
5031 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Mem, Ymm) // FMA4
5032 ASMJIT_INST_4x(vfmaddsubps, Vfmaddsubps, Ymm, Ymm, Ymm, Mem) // FMA4
5033 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5034 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Mem, Xmm) // FMA4
5035 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Xmm, Xmm, Xmm, Mem) // FMA4
5036 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5037 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Mem, Ymm) // FMA4
5038 ASMJIT_INST_4x(vfmsubaddpd, Vfmsubaddpd, Ymm, Ymm, Ymm, Mem) // FMA4
5039 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Xmm, Xmm) // FMA4
5040 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Mem, Xmm) // FMA4
5041 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Xmm, Xmm, Xmm, Mem) // FMA4
5042 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Ymm, Ymm) // FMA4
5043 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Mem, Ymm) // FMA4
5044 ASMJIT_INST_4x(vfmsubaddps, Vfmsubaddps, Ymm, Ymm, Ymm, Mem) // FMA4
5045 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5046 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Mem, Xmm) // FMA4
5047 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Xmm, Xmm, Xmm, Mem) // FMA4
5048 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5049 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Mem, Ymm) // FMA4
5050 ASMJIT_INST_4x(vfmsubpd, Vfmsubpd, Ymm, Ymm, Ymm, Mem) // FMA4
5051 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Xmm, Xmm) // FMA4
5052 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Mem, Xmm) // FMA4
5053 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Xmm, Xmm, Xmm, Mem) // FMA4
5054 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Ymm, Ymm) // FMA4
5055 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Mem, Ymm) // FMA4
5056 ASMJIT_INST_4x(vfmsubps, Vfmsubps, Ymm, Ymm, Ymm, Mem) // FMA4
5057 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Xmm) // FMA4
5058 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Mem, Xmm) // FMA4
5059 ASMJIT_INST_4x(vfmsubsd, Vfmsubsd, Xmm, Xmm, Xmm, Mem) // FMA4
5060 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Xmm) // FMA4
5061 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Mem, Xmm) // FMA4
5062 ASMJIT_INST_4x(vfmsubss, Vfmsubss, Xmm, Xmm, Xmm, Mem) // FMA4
5063 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5064 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Mem, Xmm) // FMA4
5065 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Xmm, Xmm, Xmm, Mem) // FMA4
5066 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5067 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Mem, Ymm) // FMA4
5068 ASMJIT_INST_4x(vfnmaddpd, Vfnmaddpd, Ymm, Ymm, Ymm, Mem) // FMA4
5069 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Xmm, Xmm) // FMA4
5070 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Mem, Xmm) // FMA4
5071 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Xmm, Xmm, Xmm, Mem) // FMA4
5072 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Ymm, Ymm) // FMA4
5073 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Mem, Ymm) // FMA4
5074 ASMJIT_INST_4x(vfnmaddps, Vfnmaddps, Ymm, Ymm, Ymm, Mem) // FMA4
5075 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Xmm) // FMA4
5076 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Mem, Xmm) // FMA4
5077 ASMJIT_INST_4x(vfnmaddsd, Vfnmaddsd, Xmm, Xmm, Xmm, Mem) // FMA4
5078 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Xmm) // FMA4
5079 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Mem, Xmm) // FMA4
5080 ASMJIT_INST_4x(vfnmaddss, Vfnmaddss, Xmm, Xmm, Xmm, Mem) // FMA4
5081 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Xmm, Xmm) // FMA4
5082 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Mem, Xmm) // FMA4
5083 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Xmm, Xmm, Xmm, Mem) // FMA4
5084 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Ymm, Ymm) // FMA4
5085 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Mem, Ymm) // FMA4
5086 ASMJIT_INST_4x(vfnmsubpd, Vfnmsubpd, Ymm, Ymm, Ymm, Mem) // FMA4
5087 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Xmm, Xmm) // FMA4
5088 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Mem, Xmm) // FMA4
5089 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Xmm, Xmm, Xmm, Mem) // FMA4
5090 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Ymm, Ymm) // FMA4
5091 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Mem, Ymm) // FMA4
5092 ASMJIT_INST_4x(vfnmsubps, Vfnmsubps, Ymm, Ymm, Ymm, Mem) // FMA4
5093 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Xmm) // FMA4
5094 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Mem, Xmm) // FMA4
5095 ASMJIT_INST_4x(vfnmsubsd, Vfnmsubsd, Xmm, Xmm, Xmm, Mem) // FMA4
5096 ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Xmm) // FMA4
5097 ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Mem, Xmm) // FMA4
5098 ASMJIT_INST_4x(vfnmsubss, Vfnmsubss, Xmm, Xmm, Xmm, Mem) // FMA4
5099
5100 //! \}
5101
5102 //! \name XOP Instructions (Deprecated)
5103 //! \{
5104
5105 ASMJIT_INST_2x(vfrczpd, Vfrczpd, Xmm, Xmm) // XOP
5106 ASMJIT_INST_2x(vfrczpd, Vfrczpd, Xmm, Mem) // XOP
5107 ASMJIT_INST_2x(vfrczpd, Vfrczpd, Ymm, Ymm) // XOP
5108 ASMJIT_INST_2x(vfrczpd, Vfrczpd, Ymm, Mem) // XOP
5109 ASMJIT_INST_2x(vfrczps, Vfrczps, Xmm, Xmm) // XOP
5110 ASMJIT_INST_2x(vfrczps, Vfrczps, Xmm, Mem) // XOP
5111 ASMJIT_INST_2x(vfrczps, Vfrczps, Ymm, Ymm) // XOP
5112 ASMJIT_INST_2x(vfrczps, Vfrczps, Ymm, Mem) // XOP
5113 ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Xmm) // XOP
5114 ASMJIT_INST_2x(vfrczsd, Vfrczsd, Xmm, Mem) // XOP
5115 ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Xmm) // XOP
5116 ASMJIT_INST_2x(vfrczss, Vfrczss, Xmm, Mem) // XOP
5117 ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Xmm, Xmm) // XOP
5118 ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Mem, Xmm) // XOP
5119 ASMJIT_INST_4x(vpcmov, Vpcmov, Xmm, Xmm, Xmm, Mem) // XOP
5120 ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Ymm, Ymm) // XOP
5121 ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Mem, Ymm) // XOP
5122 ASMJIT_INST_4x(vpcmov, Vpcmov, Ymm, Ymm, Ymm, Mem) // XOP
5123 ASMJIT_INST_4i(vpcomb, Vpcomb, Xmm, Xmm, Xmm, Imm) // XOP
5124 ASMJIT_INST_4i(vpcomb, Vpcomb, Xmm, Xmm, Mem, Imm) // XOP
5125 ASMJIT_INST_4i(vpcomd, Vpcomd, Xmm, Xmm, Xmm, Imm) // XOP
5126 ASMJIT_INST_4i(vpcomd, Vpcomd, Xmm, Xmm, Mem, Imm) // XOP
5127 ASMJIT_INST_4i(vpcomq, Vpcomq, Xmm, Xmm, Xmm, Imm) // XOP
5128 ASMJIT_INST_4i(vpcomq, Vpcomq, Xmm, Xmm, Mem, Imm) // XOP
5129 ASMJIT_INST_4i(vpcomw, Vpcomw, Xmm, Xmm, Xmm, Imm) // XOP
5130 ASMJIT_INST_4i(vpcomw, Vpcomw, Xmm, Xmm, Mem, Imm) // XOP
5131 ASMJIT_INST_4i(vpcomub, Vpcomub, Xmm, Xmm, Xmm, Imm) // XOP
5132 ASMJIT_INST_4i(vpcomub, Vpcomub, Xmm, Xmm, Mem, Imm) // XOP
5133 ASMJIT_INST_4i(vpcomud, Vpcomud, Xmm, Xmm, Xmm, Imm) // XOP
5134 ASMJIT_INST_4i(vpcomud, Vpcomud, Xmm, Xmm, Mem, Imm) // XOP
5135 ASMJIT_INST_4i(vpcomuq, Vpcomuq, Xmm, Xmm, Xmm, Imm) // XOP
5136 ASMJIT_INST_4i(vpcomuq, Vpcomuq, Xmm, Xmm, Mem, Imm) // XOP
5137 ASMJIT_INST_4i(vpcomuw, Vpcomuw, Xmm, Xmm, Xmm, Imm) // XOP
5138 ASMJIT_INST_4i(vpcomuw, Vpcomuw, Xmm, Xmm, Mem, Imm) // XOP
5139 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Xmm, Xmm, Imm) // XOP
5140 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Mem, Xmm, Imm) // XOP
5141 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Xmm, Xmm, Xmm, Mem, Imm) // XOP
5142 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Ymm, Ymm, Imm) // XOP
5143 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Mem, Ymm, Imm) // XOP
5144 ASMJIT_INST_5i(vpermil2pd, Vpermil2pd, Ymm, Ymm, Ymm, Mem, Imm) // XOP
5145 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Xmm, Xmm, Imm) // XOP
5146 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Mem, Xmm, Imm) // XOP
5147 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Xmm, Xmm, Xmm, Mem, Imm) // XOP
5148 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Ymm, Ymm, Imm) // XOP
5149 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Mem, Ymm, Imm) // XOP
5150 ASMJIT_INST_5i(vpermil2ps, Vpermil2ps, Ymm, Ymm, Ymm, Mem, Imm) // XOP
5151 ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Xmm) // XOP
5152 ASMJIT_INST_2x(vphaddbd, Vphaddbd, Xmm, Mem) // XOP
5153 ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Xmm) // XOP
5154 ASMJIT_INST_2x(vphaddbq, Vphaddbq, Xmm, Mem) // XOP
5155 ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Xmm) // XOP
5156 ASMJIT_INST_2x(vphaddbw, Vphaddbw, Xmm, Mem) // XOP
5157 ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Xmm) // XOP
5158 ASMJIT_INST_2x(vphadddq, Vphadddq, Xmm, Mem) // XOP
5159 ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Xmm) // XOP
5160 ASMJIT_INST_2x(vphaddwd, Vphaddwd, Xmm, Mem) // XOP
5161 ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Xmm) // XOP
5162 ASMJIT_INST_2x(vphaddwq, Vphaddwq, Xmm, Mem) // XOP
5163 ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Xmm) // XOP
5164 ASMJIT_INST_2x(vphaddubd, Vphaddubd, Xmm, Mem) // XOP
5165 ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Xmm) // XOP
5166 ASMJIT_INST_2x(vphaddubq, Vphaddubq, Xmm, Mem) // XOP
5167 ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Xmm) // XOP
5168 ASMJIT_INST_2x(vphaddubw, Vphaddubw, Xmm, Mem) // XOP
5169 ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Xmm) // XOP
5170 ASMJIT_INST_2x(vphaddudq, Vphaddudq, Xmm, Mem) // XOP
5171 ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Xmm) // XOP
5172 ASMJIT_INST_2x(vphadduwd, Vphadduwd, Xmm, Mem) // XOP
5173 ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Xmm) // XOP
5174 ASMJIT_INST_2x(vphadduwq, Vphadduwq, Xmm, Mem) // XOP
5175 ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Xmm) // XOP
5176 ASMJIT_INST_2x(vphsubbw, Vphsubbw, Xmm, Mem) // XOP
5177 ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Xmm) // XOP
5178 ASMJIT_INST_2x(vphsubdq, Vphsubdq, Xmm, Mem) // XOP
5179 ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Xmm) // XOP
5180 ASMJIT_INST_2x(vphsubwd, Vphsubwd, Xmm, Mem) // XOP
5181 ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Xmm, Xmm) // XOP
5182 ASMJIT_INST_4x(vpmacsdd, Vpmacsdd, Xmm, Xmm, Mem, Xmm) // XOP
5183 ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Xmm, Xmm) // XOP
5184 ASMJIT_INST_4x(vpmacsdqh, Vpmacsdqh, Xmm, Xmm, Mem, Xmm) // XOP
5185 ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Xmm, Xmm) // XOP
5186 ASMJIT_INST_4x(vpmacsdql, Vpmacsdql, Xmm, Xmm, Mem, Xmm) // XOP
5187 ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Xmm, Xmm) // XOP
5188 ASMJIT_INST_4x(vpmacswd, Vpmacswd, Xmm, Xmm, Mem, Xmm) // XOP
5189 ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Xmm, Xmm) // XOP
5190 ASMJIT_INST_4x(vpmacsww, Vpmacsww, Xmm, Xmm, Mem, Xmm) // XOP
5191 ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Xmm, Xmm) // XOP
5192 ASMJIT_INST_4x(vpmacssdd, Vpmacssdd, Xmm, Xmm, Mem, Xmm) // XOP
5193 ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Xmm, Xmm) // XOP
5194 ASMJIT_INST_4x(vpmacssdqh, Vpmacssdqh, Xmm, Xmm, Mem, Xmm) // XOP
5195 ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Xmm, Xmm) // XOP
5196 ASMJIT_INST_4x(vpmacssdql, Vpmacssdql, Xmm, Xmm, Mem, Xmm) // XOP
5197 ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Xmm, Xmm) // XOP
5198 ASMJIT_INST_4x(vpmacsswd, Vpmacsswd, Xmm, Xmm, Mem, Xmm) // XOP
5199 ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Xmm, Xmm) // XOP
5200 ASMJIT_INST_4x(vpmacssww, Vpmacssww, Xmm, Xmm, Mem, Xmm) // XOP
5201 ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Xmm, Xmm) // XOP
5202 ASMJIT_INST_4x(vpmadcsswd, Vpmadcsswd, Xmm, Xmm, Mem, Xmm) // XOP
5203 ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Xmm, Xmm) // XOP
5204 ASMJIT_INST_4x(vpmadcswd, Vpmadcswd, Xmm, Xmm, Mem, Xmm) // XOP
5205 ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Xmm) // XOP
5206 ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Mem, Xmm) // XOP
5207 ASMJIT_INST_4x(vpperm, Vpperm, Xmm, Xmm, Xmm, Mem) // XOP
5208 ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Xmm) // XOP
5209 ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Mem, Xmm) // XOP
5210 ASMJIT_INST_3x(vprotb, Vprotb, Xmm, Xmm, Mem) // XOP
5211 ASMJIT_INST_3i(vprotb, Vprotb, Xmm, Xmm, Imm) // XOP
5212 ASMJIT_INST_3i(vprotb, Vprotb, Xmm, Mem, Imm) // XOP
5213 ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Xmm) // XOP
5214 ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Mem, Xmm) // XOP
5215 ASMJIT_INST_3x(vprotd, Vprotd, Xmm, Xmm, Mem) // XOP
5216 ASMJIT_INST_3i(vprotd, Vprotd, Xmm, Xmm, Imm) // XOP
5217 ASMJIT_INST_3i(vprotd, Vprotd, Xmm, Mem, Imm) // XOP
5218 ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Xmm) // XOP
5219 ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Mem, Xmm) // XOP
5220 ASMJIT_INST_3x(vprotq, Vprotq, Xmm, Xmm, Mem) // XOP
5221 ASMJIT_INST_3i(vprotq, Vprotq, Xmm, Xmm, Imm) // XOP
5222 ASMJIT_INST_3i(vprotq, Vprotq, Xmm, Mem, Imm) // XOP
5223 ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Xmm) // XOP
5224 ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Mem, Xmm) // XOP
5225 ASMJIT_INST_3x(vprotw, Vprotw, Xmm, Xmm, Mem) // XOP
5226 ASMJIT_INST_3i(vprotw, Vprotw, Xmm, Xmm, Imm) // XOP
5227 ASMJIT_INST_3i(vprotw, Vprotw, Xmm, Mem, Imm) // XOP
5228 ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Xmm) // XOP
5229 ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Mem, Xmm) // XOP
5230 ASMJIT_INST_3x(vpshab, Vpshab, Xmm, Xmm, Mem) // XOP
5231 ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Xmm) // XOP
5232 ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Mem, Xmm) // XOP
5233 ASMJIT_INST_3x(vpshad, Vpshad, Xmm, Xmm, Mem) // XOP
5234 ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Xmm) // XOP
5235 ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Mem, Xmm) // XOP
5236 ASMJIT_INST_3x(vpshaq, Vpshaq, Xmm, Xmm, Mem) // XOP
5237 ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Xmm) // XOP
5238 ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Mem, Xmm) // XOP
5239 ASMJIT_INST_3x(vpshaw, Vpshaw, Xmm, Xmm, Mem) // XOP
5240 ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Xmm) // XOP
5241 ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Mem, Xmm) // XOP
5242 ASMJIT_INST_3x(vpshlb, Vpshlb, Xmm, Xmm, Mem) // XOP
5243 ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Xmm) // XOP
5244 ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Mem, Xmm) // XOP
5245 ASMJIT_INST_3x(vpshld, Vpshld, Xmm, Xmm, Mem) // XOP
5246 ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Xmm) // XOP
5247 ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Mem, Xmm) // XOP
5248 ASMJIT_INST_3x(vpshlq, Vpshlq, Xmm, Xmm, Mem) // XOP
5249 ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Xmm) // XOP
5250 ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Mem, Xmm) // XOP
5251 ASMJIT_INST_3x(vpshlw, Vpshlw, Xmm, Xmm, Mem) // XOP
5252
5253 //! \}
5254 };
5255
5256 // ============================================================================
5257 // [asmjit::x86::EmitterImplicitT]
5258 // ============================================================================
5259
5260 template<typename This>
5261 struct EmitterImplicitT : public EmitterExplicitT<This> {
5262 //! \name Prefix Options
5263 //! \{
5264
5265 //! Use REP/REPE prefix.
5266 inline This& rep() noexcept { return EmitterExplicitT<This>::_addInstOptions(Inst::kOptionRep); }
5267 //! Use REP/REPE prefix.
5268 inline This& repe() noexcept { return rep(); }
5269 //! Use REP/REPE prefix.
5270 inline This& repz() noexcept { return rep(); }
5271
5272 //! Use REPNE prefix.
5273 inline This& repne() noexcept { return EmitterExplicitT<This>::_addInstOptions(Inst::kOptionRepne); }
5274 //! Use REPNE prefix.
5275 inline This& repnz() noexcept { return repne(); }
5276
5277 //! \}
5278
5279 //! \name Base Instructions & GP Extensions
5280 //! \{
5281
5282 //! \cond
5283 using EmitterExplicitT<This>::_emitter;
5284
5285 // TODO: xrstor and xsave don't have explicit variants yet.
5286 using EmitterExplicitT<This>::cbw;
5287 using EmitterExplicitT<This>::cdq;
5288 using EmitterExplicitT<This>::cdqe;
5289 using EmitterExplicitT<This>::clzero;
5290 using EmitterExplicitT<This>::cqo;
5291 using EmitterExplicitT<This>::cwd;
5292 using EmitterExplicitT<This>::cwde;
5293 using EmitterExplicitT<This>::cmpsd;
5294 using EmitterExplicitT<This>::cmpxchg;
5295 using EmitterExplicitT<This>::cmpxchg8b;
5296 using EmitterExplicitT<This>::cmpxchg16b;
5297 using EmitterExplicitT<This>::cpuid;
5298 using EmitterExplicitT<This>::div;
5299 using EmitterExplicitT<This>::idiv;
5300 using EmitterExplicitT<This>::imul;
5301 using EmitterExplicitT<This>::jecxz;
5302 using EmitterExplicitT<This>::lahf;
5303 using EmitterExplicitT<This>::mulx;
5304 using EmitterExplicitT<This>::movsd;
5305 using EmitterExplicitT<This>::mul;
5306 using EmitterExplicitT<This>::rdmsr;
5307 using EmitterExplicitT<This>::rdpmc;
5308 using EmitterExplicitT<This>::rdtsc;
5309 using EmitterExplicitT<This>::rdtscp;
5310 using EmitterExplicitT<This>::sahf;
5311 using EmitterExplicitT<This>::wrmsr;
5312 using EmitterExplicitT<This>::xgetbv;
5313 using EmitterExplicitT<This>::xsetbv;
5314 //! \endcond
5315
5316 ASMJIT_INST_0x(cbw, Cbw) // ANY [IMPLICIT] AX <- Sign Extend AL
5317 ASMJIT_INST_0x(cdq, Cdq) // ANY [IMPLICIT] EDX:EAX <- Sign Extend EAX
5318 ASMJIT_INST_0x(cdqe, Cdqe) // X64 [IMPLICIT] RAX <- Sign Extend EAX
5319 ASMJIT_INST_2x(cmpxchg, Cmpxchg, Gp, Gp) // I486 [IMPLICIT]
5320 ASMJIT_INST_2x(cmpxchg, Cmpxchg, Mem, Gp) // I486 [IMPLICIT]
5321 ASMJIT_INST_1x(cmpxchg16b, Cmpxchg16b, Mem) // CMPXCHG8B [IMPLICIT] m == RDX:RAX ? m <- RCX:RBX
5322 ASMJIT_INST_1x(cmpxchg8b, Cmpxchg8b, Mem) // CMPXCHG16B[IMPLICIT] m == EDX:EAX ? m <- ECX:EBX
5323 ASMJIT_INST_0x(cpuid, Cpuid) // I486 [IMPLICIT] EAX:EBX:ECX:EDX <- CPUID[EAX:ECX]
5324 ASMJIT_INST_0x(cqo, Cqo) // X64 [IMPLICIT] RDX:RAX <- Sign Extend RAX
5325 ASMJIT_INST_0x(cwd, Cwd) // ANY [IMPLICIT] DX:AX <- Sign Extend AX
5326 ASMJIT_INST_0x(cwde, Cwde) // ANY [IMPLICIT] EAX <- Sign Extend AX
5327 ASMJIT_INST_0x(daa, Daa)
5328 ASMJIT_INST_0x(das, Das)
5329 ASMJIT_INST_1x(div, Div, Gp) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
5330 ASMJIT_INST_1x(div, Div, Mem) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
5331 ASMJIT_INST_1x(idiv, Idiv, Gp) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / r8} {xDX[Rem]:xAX[Quot] <- DX:AX / r16|r32|r64}
5332 ASMJIT_INST_1x(idiv, Idiv, Mem) // ANY [IMPLICIT] {AH[Rem]: AL[Quot] <- AX / m8} {xDX[Rem]:xAX[Quot] <- DX:AX / m16|m32|m64}
5333 ASMJIT_INST_1x(imul, Imul, Gp) // ANY [IMPLICIT] {AX <- AL * r8} {xAX:xDX <- xAX * r16|r32|r64}
5334 ASMJIT_INST_1x(imul, Imul, Mem) // ANY [IMPLICIT] {AX <- AL * m8} {xAX:xDX <- xAX * m16|m32|m64}
5335 ASMJIT_INST_0x(iret, Iret) // ANY [IMPLICIT]
5336 ASMJIT_INST_0x(iretd, Iretd) // ANY [IMPLICIT]
5337 ASMJIT_INST_0x(iretq, Iretq) // X64 [IMPLICIT]
5338 ASMJIT_INST_0x(iretw, Iretw) // ANY [IMPLICIT]
5339 ASMJIT_INST_1x(jecxz, Jecxz, Label) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero.
5340 ASMJIT_INST_1x(jecxz, Jecxz, Imm) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero.
5341 ASMJIT_INST_1x(jecxz, Jecxz, uint64_t) // ANY [IMPLICIT] Short jump if CX/ECX/RCX is zero.
5342 ASMJIT_INST_0x(lahf, Lahf) // LAHFSAHF [IMPLICIT] AH <- EFL
5343 ASMJIT_INST_1x(loop, Loop, Label) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0.
5344 ASMJIT_INST_1x(loop, Loop, Imm) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0.
5345 ASMJIT_INST_1x(loop, Loop, uint64_t) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0.
5346 ASMJIT_INST_1x(loope, Loope, Label) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
5347 ASMJIT_INST_1x(loope, Loope, Imm) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
5348 ASMJIT_INST_1x(loope, Loope, uint64_t) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 1.
5349 ASMJIT_INST_1x(loopne, Loopne, Label) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
5350 ASMJIT_INST_1x(loopne, Loopne, Imm) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
5351 ASMJIT_INST_1x(loopne, Loopne, uint64_t) // ANY [IMPLICIT] Decrement xCX; short jump if xCX != 0 && ZF == 0.
5352 ASMJIT_INST_1x(mul, Mul, Gp) // ANY [IMPLICIT] {AX <- AL * r8} {xDX:xAX <- xAX * r16|r32|r64}
5353 ASMJIT_INST_1x(mul, Mul, Mem) // ANY [IMPLICIT] {AX <- AL * m8} {xDX:xAX <- xAX * m16|m32|m64}
5354 ASMJIT_INST_0x(rdmsr, Rdmsr) // ANY [IMPLICIT]
5355 ASMJIT_INST_0x(rdpmc, Rdpmc) // ANY [IMPLICIT]
5356 ASMJIT_INST_0x(rdtsc, Rdtsc) // RDTSC [IMPLICIT] EDX:EAX <- CNT
5357 ASMJIT_INST_0x(rdtscp, Rdtscp) // RDTSCP [IMPLICIT] EDX:EAX:EXC <- CNT
5358 ASMJIT_INST_0x(ret, Ret)
5359 ASMJIT_INST_1i(ret, Ret, Imm)
5360 ASMJIT_INST_0x(sahf, Sahf) // LAHFSAHF [IMPLICIT] EFL <- AH
5361 ASMJIT_INST_0x(syscall, Syscall) // X64 [IMPLICIT]
5362 ASMJIT_INST_0x(sysenter, Sysenter) // X64 [IMPLICIT]
5363 ASMJIT_INST_0x(sysexit, Sysexit) // X64 [IMPLICIT]
5364 ASMJIT_INST_0x(sysexit64, Sysexit64) // X64 [IMPLICIT]
5365 ASMJIT_INST_0x(sysret, Sysret) // X64 [IMPLICIT]
5366 ASMJIT_INST_0x(sysret64, Sysret64) // X64 [IMPLICIT]
5367 ASMJIT_INST_0x(wrmsr, Wrmsr) // ANY [IMPLICIT]
5368 ASMJIT_INST_0x(xlatb, Xlatb) // ANY [IMPLICIT]
5369
5370 //! \}
5371
5372 //! \name String Instruction Aliases
5373 //! \{
5374
5375 inline Error cmpsb() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 1), EmitterExplicitT<This>::ptr_zdi(0, 1)); }
5376 inline Error cmpsd() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 4), EmitterExplicitT<This>::ptr_zdi(0, 4)); }
5377 inline Error cmpsq() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 8), EmitterExplicitT<This>::ptr_zdi(0, 8)); }
5378 inline Error cmpsw() { return _emitter()->emit(Inst::kIdCmps, EmitterExplicitT<This>::ptr_zsi(0, 2), EmitterExplicitT<This>::ptr_zdi(0, 2)); }
5379
5380 inline Error lodsb() { return _emitter()->emit(Inst::kIdLods, al , EmitterExplicitT<This>::ptr_zdi(0, 1)); }
5381 inline Error lodsd() { return _emitter()->emit(Inst::kIdLods, eax, EmitterExplicitT<This>::ptr_zdi(0, 4)); }
5382 inline Error lodsq() { return _emitter()->emit(Inst::kIdLods, rax, EmitterExplicitT<This>::ptr_zdi(0, 8)); }
5383 inline Error lodsw() { return _emitter()->emit(Inst::kIdLods, ax , EmitterExplicitT<This>::ptr_zdi(0, 2)); }
5384
5385 inline Error movsb() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 1), EmitterExplicitT<This>::ptr_zsi(0, 1)); }
5386 inline Error movsd() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 4), EmitterExplicitT<This>::ptr_zsi(0, 4)); }
5387 inline Error movsq() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 8), EmitterExplicitT<This>::ptr_zsi(0, 8)); }
5388 inline Error movsw() { return _emitter()->emit(Inst::kIdMovs, EmitterExplicitT<This>::ptr_zdi(0, 2), EmitterExplicitT<This>::ptr_zsi(0, 2)); }
5389
5390 inline Error scasb() { return _emitter()->emit(Inst::kIdScas, al , EmitterExplicitT<This>::ptr_zdi(0, 1)); }
5391 inline Error scasd() { return _emitter()->emit(Inst::kIdScas, eax, EmitterExplicitT<This>::ptr_zdi(0, 4)); }
5392 inline Error scasq() { return _emitter()->emit(Inst::kIdScas, rax, EmitterExplicitT<This>::ptr_zdi(0, 8)); }
5393 inline Error scasw() { return _emitter()->emit(Inst::kIdScas, ax , EmitterExplicitT<This>::ptr_zdi(0, 2)); }
5394
5395 inline Error stosb() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 1), al ); }
5396 inline Error stosd() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 4), eax); }
5397 inline Error stosq() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 8), rax); }
5398 inline Error stosw() { return _emitter()->emit(Inst::kIdStos, EmitterExplicitT<This>::ptr_zdi(0, 2), ax ); }
5399
5400 //! \}
5401
5402 //! \name CL Instructions
5403 //! \{
5404
5405 ASMJIT_INST_0x(clzero, Clzero) // CLZERO [IMPLICIT]
5406
5407 //! \}
5408
5409 //! \name BMI2 Instructions
5410 //! \{
5411
5412 ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Gp) // BMI2 [IMPLICIT]
5413 ASMJIT_INST_3x(mulx, Mulx, Gp, Gp, Mem) // BMI2 [IMPLICIT]
5414
5415 //! \}
5416
5417 //! \name FXSR & XSAVE Instructions
5418 //! \{
5419
5420 ASMJIT_INST_0x(xgetbv, Xgetbv) // XSAVE [IMPLICIT] EDX:EAX <- XCR[ECX]
5421 ASMJIT_INST_1x(xrstor, Xrstor, Mem) // XSAVE [IMPLICIT]
5422 ASMJIT_INST_1x(xrstor64, Xrstor64, Mem) // XSAVE+X64 [IMPLICIT]
5423 ASMJIT_INST_1x(xrstors, Xrstors, Mem) // XSAVE [IMPLICIT]
5424 ASMJIT_INST_1x(xrstors64, Xrstors64, Mem) // XSAVE+X64 [IMPLICIT]
5425 ASMJIT_INST_1x(xsave, Xsave, Mem) // XSAVE [IMPLICIT]
5426 ASMJIT_INST_1x(xsave64, Xsave64, Mem) // XSAVE+X64 [IMPLICIT]
5427 ASMJIT_INST_1x(xsavec, Xsavec, Mem) // XSAVE [IMPLICIT]
5428 ASMJIT_INST_1x(xsavec64, Xsavec64, Mem) // XSAVE+X64 [IMPLICIT]
5429 ASMJIT_INST_1x(xsaveopt, Xsaveopt, Mem) // XSAVE [IMPLICIT]
5430 ASMJIT_INST_1x(xsaveopt64, Xsaveopt64, Mem) // XSAVE+X64 [IMPLICIT]
5431 ASMJIT_INST_1x(xsaves, Xsaves, Mem) // XSAVE [IMPLICIT]
5432 ASMJIT_INST_1x(xsaves64, Xsaves64, Mem) // XSAVE+X64 [IMPLICIT]
5433 ASMJIT_INST_0x(xsetbv, Xsetbv) // XSAVE [IMPLICIT] XCR[ECX] <- EDX:EAX
5434
5435 //! \}
5436
5437 //! \name Monitor & MWait Instructions
5438 //! \{
5439
5440 ASMJIT_INST_0x(monitor, Monitor)
5441 ASMJIT_INST_0x(monitorx, Monitorx)
5442 ASMJIT_INST_0x(mwait, Mwait)
5443 ASMJIT_INST_0x(mwaitx, Mwaitx)
5444
5445 //! \}
5446
5447 //! \name MMX & SSE Instructions
5448 //! \{
5449
5450 //! \cond
5451 using EmitterExplicitT<This>::blendvpd;
5452 using EmitterExplicitT<This>::blendvps;
5453 using EmitterExplicitT<This>::maskmovq;
5454 using EmitterExplicitT<This>::maskmovdqu;
5455 using EmitterExplicitT<This>::pblendvb;
5456 using EmitterExplicitT<This>::pcmpestri;
5457 using EmitterExplicitT<This>::pcmpestrm;
5458 using EmitterExplicitT<This>::pcmpistri;
5459 using EmitterExplicitT<This>::pcmpistrm;
5460 //! \endcond
5461
5462 ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Xmm) // SSE4_1 [IMPLICIT]
5463 ASMJIT_INST_2x(blendvpd, Blendvpd, Xmm, Mem) // SSE4_1 [IMPLICIT]
5464 ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Xmm) // SSE4_1 [IMPLICIT]
5465 ASMJIT_INST_2x(blendvps, Blendvps, Xmm, Mem) // SSE4_1 [IMPLICIT]
5466 ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Xmm) // SSE4_1 [IMPLICIT]
5467 ASMJIT_INST_2x(pblendvb, Pblendvb, Xmm, Mem) // SSE4_1 [IMPLICIT]
5468 ASMJIT_INST_2x(maskmovq, Maskmovq, Mm, Mm) // SSE [IMPLICIT]
5469 ASMJIT_INST_2x(maskmovdqu, Maskmovdqu, Xmm, Xmm) // SSE2 [IMPLICIT]
5470 ASMJIT_INST_3i(pcmpestri, Pcmpestri, Xmm, Xmm, Imm) // SSE4_1 [IMPLICIT]
5471 ASMJIT_INST_3i(pcmpestri, Pcmpestri, Xmm, Mem, Imm) // SSE4_1 [IMPLICIT]
5472 ASMJIT_INST_3i(pcmpestrm, Pcmpestrm, Xmm, Xmm, Imm) // SSE4_1 [IMPLICIT]
5473 ASMJIT_INST_3i(pcmpestrm, Pcmpestrm, Xmm, Mem, Imm) // SSE4_1 [IMPLICIT]
5474 ASMJIT_INST_3i(pcmpistri, Pcmpistri, Xmm, Xmm, Imm) // SSE4_1 [IMPLICIT]
5475 ASMJIT_INST_3i(pcmpistri, Pcmpistri, Xmm, Mem, Imm) // SSE4_1 [IMPLICIT]
5476 ASMJIT_INST_3i(pcmpistrm, Pcmpistrm, Xmm, Xmm, Imm) // SSE4_1 [IMPLICIT]
5477 ASMJIT_INST_3i(pcmpistrm, Pcmpistrm, Xmm, Mem, Imm) // SSE4_1 [IMPLICIT]
5478
5479 //! \}
5480
5481 //! \name SHA Instructions
5482 //! \{
5483
5484 using EmitterExplicitT<This>::sha256rnds2;
5485
5486 ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Xmm) // SHA [IMPLICIT]
5487 ASMJIT_INST_2x(sha256rnds2, Sha256rnds2, Xmm, Mem) // SHA [IMPLICIT]
5488
5489 //! \}
5490
5491 //! \name AVX, FMA, and AVX512 Instructions
5492 //! \{
5493
5494 using EmitterExplicitT<This>::vmaskmovdqu;
5495 using EmitterExplicitT<This>::vpcmpestri;
5496 using EmitterExplicitT<This>::vpcmpestrm;
5497 using EmitterExplicitT<This>::vpcmpistri;
5498 using EmitterExplicitT<This>::vpcmpistrm;
5499
5500 ASMJIT_INST_2x(vmaskmovdqu, Vmaskmovdqu, Xmm, Xmm) // AVX [IMPLICIT]
5501 ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Xmm, Imm) // AVX [IMPLICIT]
5502 ASMJIT_INST_3i(vpcmpestri, Vpcmpestri, Xmm, Mem, Imm) // AVX [IMPLICIT]
5503 ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Xmm, Imm) // AVX [IMPLICIT]
5504 ASMJIT_INST_3i(vpcmpestrm, Vpcmpestrm, Xmm, Mem, Imm) // AVX [IMPLICIT]
5505 ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Xmm, Imm) // AVX [IMPLICIT]
5506 ASMJIT_INST_3i(vpcmpistri, Vpcmpistri, Xmm, Mem, Imm) // AVX [IMPLICIT]
5507 ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Xmm, Imm) // AVX [IMPLICIT]
5508 ASMJIT_INST_3i(vpcmpistrm, Vpcmpistrm, Xmm, Mem, Imm) // AVX [IMPLICIT]
5509
5510 //! \}
5511 };
5512
5513 // ============================================================================
5514 // [asmjit::x86::Emitter]
5515 // ============================================================================
5516
5517 //! Emitter (X86).
5518 //!
5519 //! \note This class cannot be instantiated, you can only cast to it and use
5520 //! it as emitter that emits to either `x86::Assembler`, `x86::Builder`, or
5521 //! `x86::Compiler` (use with caution with `x86::Compiler` as it requires virtual
5522 //! registers).
5523 class Emitter : public BaseEmitter, public EmitterImplicitT<Emitter> {
5524 ASMJIT_NONCONSTRUCTIBLE(Emitter)
5525 };
5526
5527 //! \}
5528
5529 #undef ASMJIT_INST_0x
5530 #undef ASMJIT_INST_1x
5531 #undef ASMJIT_INST_1i
5532 #undef ASMJIT_INST_1c
5533 #undef ASMJIT_INST_2x
5534 #undef ASMJIT_INST_2i
5535 #undef ASMJIT_INST_2c
5536 #undef ASMJIT_INST_3x
5537 #undef ASMJIT_INST_3i
5538 #undef ASMJIT_INST_3ii
5539 #undef ASMJIT_INST_4x
5540 #undef ASMJIT_INST_4i
5541 #undef ASMJIT_INST_4ii
5542 #undef ASMJIT_INST_5x
5543 #undef ASMJIT_INST_5i
5544 #undef ASMJIT_INST_6x
5545
5546 ASMJIT_END_SUB_NAMESPACE
5547
5548 #endif // _ASMJIT_X86_X86EMITTER_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #if defined(ASMJIT_BUILD_X86) && ASMJIT_ARCH_X86
8
9 #include "../core/cpuinfo.h"
10 #include "../core/support.h"
11 #include "../x86/x86features.h"
12
13 // Required by `__cpuidex()` and `_xgetbv()`.
14 #if defined(_MSC_VER)
15 #include <intrin.h>
16 #endif
17
18 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
19
20 // ============================================================================
21 // [asmjit::x86::Features - Detect]
22 // ============================================================================
23
24 struct cpuid_t { uint32_t eax, ebx, ecx, edx; };
25 struct xgetbv_t { uint32_t eax, edx; };
26
27 // Executes `cpuid` instruction.
28 static inline void cpuidQuery(cpuid_t* out, uint32_t inEax, uint32_t inEcx = 0) noexcept {
29 #if defined(_MSC_VER)
30 __cpuidex(reinterpret_cast<int*>(out), inEax, inEcx);
31 #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 32
32 __asm__ __volatile__(
33 "mov %%ebx, %%edi\n"
34 "cpuid\n"
35 "xchg %%edi, %%ebx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
36 #elif defined(__GNUC__) && ASMJIT_ARCH_X86 == 64
37 __asm__ __volatile__(
38 "mov %%rbx, %%rdi\n"
39 "cpuid\n"
40 "xchg %%rdi, %%rbx\n" : "=a"(out->eax), "=D"(out->ebx), "=c"(out->ecx), "=d"(out->edx) : "a"(inEax), "c"(inEcx));
41 #else
42 #error "[asmjit] x86::cpuidQuery() - Unsupported compiler."
43 #endif
44 }
45
46 // Executes 'xgetbv' instruction.
47 static inline void xgetbvQuery(xgetbv_t* out, uint32_t inEcx) noexcept {
48 #if defined(_MSC_VER)
49 uint64_t value = _xgetbv(inEcx);
50 out->eax = uint32_t(value & 0xFFFFFFFFu);
51 out->edx = uint32_t(value >> 32);
52 #elif defined(__GNUC__)
53 uint32_t outEax;
54 uint32_t outEdx;
55
56 // Replaced, because the world is not perfect:
57 // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
58 __asm__ __volatile__(".byte 0x0F, 0x01, 0xD0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
59
60 out->eax = outEax;
61 out->edx = outEdx;
62 #else
63 out->eax = 0;
64 out->edx = 0;
65 #endif
66 }
67
68 // Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
69 static inline void simplifyCpuVendor(CpuInfo& cpu, uint32_t d0, uint32_t d1, uint32_t d2) noexcept {
70 struct Vendor {
71 char normalized[8];
72 union { char text[12]; uint32_t d[3]; };
73 };
74
75 static const Vendor table[] = {
76 { { 'A', 'M', 'D' }, {{ 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' }} },
77 { { 'I', 'N', 'T', 'E', 'L' }, {{ 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' }} },
78 { { 'V', 'I', 'A' }, {{ 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' }} },
79 { { 'V', 'I', 'A' }, {{ 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 }} },
80 { { 'U', 'N', 'K', 'N', 'O', 'W', 'N' }, {{ 0 }} }
81 };
82
83 uint32_t i;
84 for (i = 0; i < ASMJIT_ARRAY_SIZE(table) - 1; i++)
85 if (table[i].d[0] == d0 && table[i].d[1] == d1 && table[i].d[2] == d2)
86 break;
87 memcpy(cpu._vendor.str, table[i].normalized, 8);
88 }
89
90 static inline void simplifyCpuBrand(char* s) noexcept {
91 // Used to always clear the current character to ensure that the result
92 // doesn't contain garbage after the new zero terminator.
93 char* d = s;
94
95 char prev = 0;
96 char curr = s[0];
97 s[0] = '\0';
98
99 for (;;) {
100 if (curr == 0)
101 break;
102
103 if (!(curr == ' ' && (prev == '@' || s[1] == ' ' || s[1] == '@')))
104 *d++ = prev = curr;
105
106 curr = *++s;
107 s[0] = '\0';
108 }
109
110 d[0] = '\0';
111 }
112
113 ASMJIT_FAVOR_SIZE void detectCpu(CpuInfo& cpu) noexcept {
114 using Support::bitTest;
115
116 cpuid_t regs;
117 xgetbv_t xcr0 { 0, 0 };
118 Features& features = cpu._features.as<Features>();
119
120 cpu.reset();
121 cpu._archInfo.init(ArchInfo::kIdHost);
122 cpu._maxLogicalProcessors = 1;
123 features.add(Features::kI486);
124
125 // --------------------------------------------------------------------------
126 // [CPUID EAX=0x0]
127 // --------------------------------------------------------------------------
128
129 // Get vendor string/id.
130 cpuidQuery(&regs, 0x0);
131
132 uint32_t maxId = regs.eax;
133 simplifyCpuVendor(cpu, regs.ebx, regs.edx, regs.ecx);
134
135 // --------------------------------------------------------------------------
136 // [CPUID EAX=0x1]
137 // --------------------------------------------------------------------------
138
139 if (maxId >= 0x1) {
140 // Get feature flags in ECX/EDX and family/model in EAX.
141 cpuidQuery(&regs, 0x1);
142
143 // Fill family and model fields.
144 uint32_t modelId = (regs.eax >> 4) & 0x0F;
145 uint32_t familyId = (regs.eax >> 8) & 0x0F;
146
147 // Use extended family and model fields.
148 if (familyId == 0x06u || familyId == 0x0Fu)
149 modelId += (((regs.eax >> 16) & 0x0Fu) << 4);
150
151 if (familyId == 0x0Fu)
152 familyId += (((regs.eax >> 20) & 0xFFu) << 4);
153
154 cpu._modelId = modelId;
155 cpu._familyId = familyId;
156 cpu._brandId = ((regs.ebx ) & 0xFF);
157 cpu._processorType = ((regs.eax >> 12) & 0x03);
158 cpu._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
159 cpu._stepping = ((regs.eax ) & 0x0F);
160 cpu._cacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
161
162 if (bitTest(regs.ecx, 0)) features.add(Features::kSSE3);
163 if (bitTest(regs.ecx, 1)) features.add(Features::kPCLMULQDQ);
164 if (bitTest(regs.ecx, 3)) features.add(Features::kMONITOR);
165 if (bitTest(regs.ecx, 5)) features.add(Features::kVMX);
166 if (bitTest(regs.ecx, 6)) features.add(Features::kSMX);
167 if (bitTest(regs.ecx, 9)) features.add(Features::kSSSE3);
168 if (bitTest(regs.ecx, 13)) features.add(Features::kCMPXCHG16B);
169 if (bitTest(regs.ecx, 19)) features.add(Features::kSSE4_1);
170 if (bitTest(regs.ecx, 20)) features.add(Features::kSSE4_2);
171 if (bitTest(regs.ecx, 22)) features.add(Features::kMOVBE);
172 if (bitTest(regs.ecx, 23)) features.add(Features::kPOPCNT);
173 if (bitTest(regs.ecx, 25)) features.add(Features::kAESNI);
174 if (bitTest(regs.ecx, 26)) features.add(Features::kXSAVE);
175 if (bitTest(regs.ecx, 27)) features.add(Features::kOSXSAVE);
176 if (bitTest(regs.ecx, 30)) features.add(Features::kRDRAND);
177 if (bitTest(regs.edx, 0)) features.add(Features::kFPU);
178 if (bitTest(regs.edx, 4)) features.add(Features::kRDTSC);
179 if (bitTest(regs.edx, 5)) features.add(Features::kMSR);
180 if (bitTest(regs.edx, 8)) features.add(Features::kCMPXCHG8B);
181 if (bitTest(regs.edx, 15)) features.add(Features::kCMOV);
182 if (bitTest(regs.edx, 19)) features.add(Features::kCLFLUSH);
183 if (bitTest(regs.edx, 23)) features.add(Features::kMMX);
184 if (bitTest(regs.edx, 24)) features.add(Features::kFXSR);
185 if (bitTest(regs.edx, 25)) features.add(Features::kSSE, Features::kMMX2);
186 if (bitTest(regs.edx, 26)) features.add(Features::kSSE, Features::kSSE2);
187 if (bitTest(regs.edx, 28)) features.add(Features::kMT);
188
189 // Get the content of XCR0 if supported by CPU and enabled by OS.
190 if ((regs.ecx & 0x0C000000u) == 0x0C000000u) {
191 xgetbvQuery(&xcr0, 0);
192 }
193
194 // Detect AVX+.
195 if (bitTest(regs.ecx, 28)) {
196 // - XCR0[2:1] == 11b
197 // XMM & YMM states need to be enabled by OS.
198 if ((xcr0.eax & 0x00000006u) == 0x00000006u) {
199 features.add(Features::kAVX);
200
201 if (bitTest(regs.ecx, 12)) features.add(Features::kFMA);
202 if (bitTest(regs.ecx, 29)) features.add(Features::kF16C);
203 }
204 }
205 }
206
207 // --------------------------------------------------------------------------
208 // [CPUID EAX=0x7]
209 // --------------------------------------------------------------------------
210
211 // Detect new features if the processor supports CPUID-07.
212 bool maybeMPX = false;
213
214 if (maxId >= 0x7) {
215 cpuidQuery(&regs, 0x7);
216 uint32_t maxSubLeafId = regs.eax;
217
218 if (bitTest(regs.ebx, 0)) features.add(Features::kFSGSBASE);
219 if (bitTest(regs.ebx, 3)) features.add(Features::kBMI);
220 if (bitTest(regs.ebx, 4)) features.add(Features::kHLE);
221 if (bitTest(regs.ebx, 7)) features.add(Features::kSMEP);
222 if (bitTest(regs.ebx, 8)) features.add(Features::kBMI2);
223 if (bitTest(regs.ebx, 9)) features.add(Features::kERMS);
224 if (bitTest(regs.ebx, 11)) features.add(Features::kRTM);
225 if (bitTest(regs.ebx, 14)) maybeMPX = true;
226 if (bitTest(regs.ebx, 18)) features.add(Features::kRDSEED);
227 if (bitTest(regs.ebx, 19)) features.add(Features::kADX);
228 if (bitTest(regs.ebx, 20)) features.add(Features::kSMAP);
229 if (bitTest(regs.ebx, 22)) features.add(Features::kPCOMMIT);
230 if (bitTest(regs.ebx, 23)) features.add(Features::kCLFLUSHOPT);
231 if (bitTest(regs.ebx, 24)) features.add(Features::kCLWB);
232 if (bitTest(regs.ebx, 29)) features.add(Features::kSHA);
233 if (bitTest(regs.ecx, 0)) features.add(Features::kPREFETCHWT1);
234 if (bitTest(regs.ecx, 22)) features.add(Features::kRDPID);
235 if (bitTest(regs.ecx, 25)) features.add(Features::kCLDEMOTE);
236 if (bitTest(regs.ecx, 27)) features.add(Features::kMOVDIRI);
237 if (bitTest(regs.ecx, 28)) features.add(Features::kMOVDIR64B);
238 if (bitTest(regs.ecx, 29)) features.add(Features::kENQCMD);
239 if (bitTest(regs.edx, 18)) features.add(Features::kPCONFIG);
240
241 // Detect 'TSX' - Requires at least one of `HLE` and `RTM` features.
242 if (features.hasHLE() || features.hasRTM())
243 features.add(Features::kTSX);
244
245 // Detect 'AVX2' - Requires AVX as well.
246 if (bitTest(regs.ebx, 5) && features.hasAVX())
247 features.add(Features::kAVX2);
248
249 // Detect 'AVX_512'.
250 if (bitTest(regs.ebx, 16)) {
251 // - XCR0[2:1] == 11b - XMM/YMM states need to be enabled by OS.
252 // - XCR0[7:5] == 111b - Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by OS.
253 if ((xcr0.eax & 0x000000E6u) == 0x000000E6u) {
254 features.add(Features::kAVX512_F);
255
256 if (bitTest(regs.ebx, 17)) features.add(Features::kAVX512_DQ);
257 if (bitTest(regs.ebx, 21)) features.add(Features::kAVX512_IFMA);
258 if (bitTest(regs.ebx, 26)) features.add(Features::kAVX512_PFI);
259 if (bitTest(regs.ebx, 27)) features.add(Features::kAVX512_ERI);
260 if (bitTest(regs.ebx, 28)) features.add(Features::kAVX512_CDI);
261 if (bitTest(regs.ebx, 30)) features.add(Features::kAVX512_BW);
262 if (bitTest(regs.ebx, 31)) features.add(Features::kAVX512_VL);
263 if (bitTest(regs.ecx, 1)) features.add(Features::kAVX512_VBMI);
264 if (bitTest(regs.ecx, 5)) features.add(Features::kWAITPKG);
265 if (bitTest(regs.ecx, 6)) features.add(Features::kAVX512_VBMI2);
266 if (bitTest(regs.ecx, 8)) features.add(Features::kGFNI);
267 if (bitTest(regs.ecx, 9)) features.add(Features::kVAES);
268 if (bitTest(regs.ecx, 10)) features.add(Features::kVPCLMULQDQ);
269 if (bitTest(regs.ecx, 11)) features.add(Features::kAVX512_VNNI);
270 if (bitTest(regs.ecx, 12)) features.add(Features::kAVX512_BITALG);
271 if (bitTest(regs.ecx, 14)) features.add(Features::kAVX512_VPOPCNTDQ);
272 if (bitTest(regs.edx, 2)) features.add(Features::kAVX512_4VNNIW);
273 if (bitTest(regs.edx, 3)) features.add(Features::kAVX512_4FMAPS);
274 if (bitTest(regs.edx, 8)) features.add(Features::kAVX512_VP2INTERSECT);
275 }
276 }
277
278 if (maxSubLeafId >= 1 && features.hasAVX512_F()) {
279 cpuidQuery(&regs, 0x7, 1);
280
281 if (bitTest(regs.eax, 5)) features.add(Features::kAVX512_BF16);
282 }
283 }
284
285 // --------------------------------------------------------------------------
286 // [CPUID EAX=0xD]
287 // --------------------------------------------------------------------------
288
289 if (maxId >= 0xD) {
290 cpuidQuery(&regs, 0xD, 0);
291
292 // Both CPUID result and XCR0 has to be enabled to have support for MPX.
293 if (((regs.eax & xcr0.eax) & 0x00000018u) == 0x00000018u && maybeMPX)
294 features.add(Features::kMPX);
295
296 cpuidQuery(&regs, 0xD, 1);
297 if (bitTest(regs.eax, 0)) features.add(Features::kXSAVEOPT);
298 if (bitTest(regs.eax, 1)) features.add(Features::kXSAVEC);
299 if (bitTest(regs.eax, 3)) features.add(Features::kXSAVES);
300 }
301
302 // --------------------------------------------------------------------------
303 // [CPUID EAX=0x80000000...maxId]
304 // --------------------------------------------------------------------------
305
306 maxId = 0x80000000u;
307 uint32_t i = maxId;
308
309 // The highest EAX that we understand.
310 uint32_t kHighestProcessedEAX = 0x80000008u;
311
312 // Several CPUID calls are required to get the whole branc string. It's easy
313 // to copy one DWORD at a time instead of performing a byte copy.
314 uint32_t* brand = cpu._brand.u32;
315 do {
316 cpuidQuery(&regs, i);
317 switch (i) {
318 case 0x80000000u:
319 maxId = Support::min<uint32_t>(regs.eax, kHighestProcessedEAX);
320 break;
321
322 case 0x80000001u:
323 if (bitTest(regs.ecx, 0)) features.add(Features::kLAHFSAHF);
324 if (bitTest(regs.ecx, 2)) features.add(Features::kSVM);
325 if (bitTest(regs.ecx, 5)) features.add(Features::kLZCNT);
326 if (bitTest(regs.ecx, 6)) features.add(Features::kSSE4A);
327 if (bitTest(regs.ecx, 7)) features.add(Features::kMSSE);
328 if (bitTest(regs.ecx, 8)) features.add(Features::kPREFETCHW);
329 if (bitTest(regs.ecx, 12)) features.add(Features::kSKINIT);
330 if (bitTest(regs.ecx, 15)) features.add(Features::kLWP);
331 if (bitTest(regs.ecx, 21)) features.add(Features::kTBM);
332 if (bitTest(regs.ecx, 29)) features.add(Features::kMONITORX);
333 if (bitTest(regs.edx, 20)) features.add(Features::kNX);
334 if (bitTest(regs.edx, 21)) features.add(Features::kFXSROPT);
335 if (bitTest(regs.edx, 22)) features.add(Features::kMMX2);
336 if (bitTest(regs.edx, 27)) features.add(Features::kRDTSCP);
337 if (bitTest(regs.edx, 30)) features.add(Features::k3DNOW2, Features::kMMX2);
338 if (bitTest(regs.edx, 31)) features.add(Features::k3DNOW);
339
340 if (cpu.hasFeature(Features::kAVX)) {
341 if (bitTest(regs.ecx, 11)) features.add(Features::kXOP);
342 if (bitTest(regs.ecx, 16)) features.add(Features::kFMA4);
343 }
344
345 // These seem to be only supported by AMD.
346 if (cpu.isVendor("AMD")) {
347 if (bitTest(regs.ecx, 4)) features.add(Features::kALTMOVCR8);
348 }
349 break;
350
351 case 0x80000002u:
352 case 0x80000003u:
353 case 0x80000004u:
354 *brand++ = regs.eax;
355 *brand++ = regs.ebx;
356 *brand++ = regs.ecx;
357 *brand++ = regs.edx;
358
359 // Go directly to the last one.
360 if (i == 0x80000004u) i = 0x80000008u - 1;
361 break;
362
363 case 0x80000008u:
364 if (bitTest(regs.ebx, 0)) features.add(Features::kCLZERO);
365 break;
366 }
367 } while (++i <= maxId);
368
369 // Simplify CPU brand string a bit by removing some unnecessary spaces.
370 simplifyCpuBrand(cpu._brand.str);
371 }
372
373 ASMJIT_END_SUB_NAMESPACE
374
375 #endif // ASMJIT_BUILD_X86 && ASMJIT_ARCH_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86FEATURES_H
7 #define _ASMJIT_X86_X86FEATURES_H
8
9 #include "../core/features.h"
10
11 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
12
13 //! \addtogroup asmjit_x86
14 //! \{
15
16 // ============================================================================
17 // [asmjit::x86::Features]
18 // ============================================================================
19
20 //! CPU features (X86).
21 class Features : public BaseFeatures {
22 public:
23 //! CPU feature ID.
24 enum Id : uint32_t {
25 kNone = 0, //!< No feature (never set, used internally).
26
27 kMT, //!< CPU has multi-threading capabilities.
28 kNX, //!< CPU has Not-Execute-Bit aka DEP (data-execution prevention).
29
30 k3DNOW, //!< CPU has 3DNOW (3DNOW base instructions) [AMD].
31 k3DNOW2, //!< CPU has 3DNOW2 (enhanced 3DNOW) [AMD].
32 kADX, //!< CPU has ADX (multi-precision add-carry instruction extensions).
33 kAESNI, //!< CPU has AESNI (AES encode/decode instructions).
34 kALTMOVCR8, //!< CPU has LOCK MOV R<->CR0 (supports `MOV R<->CR8` via `LOCK MOV R<->CR0` in 32-bit mode) [AMD].
35 kAVX, //!< CPU has AVX (advanced vector extensions).
36 kAVX2, //!< CPU has AVX2 (advanced vector extensions 2).
37 kAVX512_4FMAPS, //!< CPU has AVX512_FMAPS (FMA packed single).
38 kAVX512_4VNNIW, //!< CPU has AVX512_VNNIW (vector NN instructions word variable precision).
39 kAVX512_BF16, //!< CPU has AVX512_BF16 (BFLOAT16 support instruction).
40 kAVX512_BITALG, //!< CPU has AVX512_BITALG (VPOPCNT[B|W], VPSHUFBITQMB).
41 kAVX512_BW, //!< CPU has AVX512_BW (packed BYTE|WORD).
42 kAVX512_CDI, //!< CPU has AVX512_CDI (conflict detection).
43 kAVX512_DQ, //!< CPU has AVX512_DQ (packed DWORD|QWORD).
44 kAVX512_ERI, //!< CPU has AVX512_ERI (exponential and reciprocal).
45 kAVX512_F, //!< CPU has AVX512_F (AVX512 foundation).
46 kAVX512_IFMA, //!< CPU has AVX512_IFMA (integer fused-multiply-add using 52-bit precision).
47 kAVX512_PFI, //!< CPU has AVX512_PFI (prefetch instructions).
48 kAVX512_VBMI, //!< CPU has AVX512_VBMI (vector byte manipulation).
49 kAVX512_VBMI2, //!< CPU has AVX512_VBMI2 (vector byte manipulation 2).
50 kAVX512_VL, //!< CPU has AVX512_VL (vector length extensions).
51 kAVX512_VNNI, //!< CPU has AVX512_VNNI (vector neural network instructions).
52 kAVX512_VP2INTERSECT, //!< CPU has AVX512_VP2INTERSECT
53 kAVX512_VPOPCNTDQ, //!< CPU has AVX512_VPOPCNTDQ (VPOPCNT[D|Q] instructions).
54 kBMI, //!< CPU has BMI (bit manipulation instructions #1).
55 kBMI2, //!< CPU has BMI2 (bit manipulation instructions #2).
56 kCLDEMOTE, //!< CPU has CLDEMOTE (cache line demote).
57 kCLFLUSH, //!< CPU has CLFUSH (Cache Line flush).
58 kCLFLUSHOPT, //!< CPU has CLFUSHOPT (Cache Line flush - optimized).
59 kCLWB, //!< CPU has CLWB.
60 kCLZERO, //!< CPU has CLZERO.
61 kCMOV, //!< CPU has CMOV (CMOV and FCMOV instructions).
62 kCMPXCHG16B, //!< CPU has CMPXCHG16B (compare-exchange 16 bytes) [X86_64].
63 kCMPXCHG8B, //!< CPU has CMPXCHG8B (compare-exchange 8 bytes).
64 kENCLV, //!< CPU has ENCLV.
65 kENQCMD, //!< CPU has ENQCMD (enqueue stores).
66 kERMS, //!< CPU has ERMS (enhanced REP MOVSB/STOSB).
67 kF16C, //!< CPU has F16C.
68 kFMA, //!< CPU has FMA (fused-multiply-add 3 operand form).
69 kFMA4, //!< CPU has FMA4 (fused-multiply-add 4 operand form).
70 kFPU, //!< CPU has FPU (FPU support).
71 kFSGSBASE, //!< CPU has FSGSBASE.
72 kFXSR, //!< CPU has FXSR (FXSAVE/FXRSTOR instructions).
73 kFXSROPT, //!< CPU has FXSROTP (FXSAVE/FXRSTOR is optimized).
74 kGEODE, //!< CPU has GEODE extensions (3DNOW additions).
75 kGFNI, //!< CPU has GFNI (Galois field instructions).
76 kHLE, //!< CPU has HLE.
77 kI486, //!< CPU has I486 features (I486+ support).
78 kLAHFSAHF, //!< CPU has LAHF/SAHF (LAHF/SAHF in 64-bit mode) [X86_64].
79 kLWP, //!< CPU has LWP (lightweight profiling) [AMD].
80 kLZCNT, //!< CPU has LZCNT (LZCNT instruction).
81 kMMX, //!< CPU has MMX (MMX base instructions).
82 kMMX2, //!< CPU has MMX2 (MMX extensions or MMX2).
83 kMONITOR, //!< CPU has MONITOR (MONITOR/MWAIT instructions).
84 kMONITORX, //!< CPU has MONITORX (MONITORX/MWAITX instructions).
85 kMOVBE, //!< CPU has MOVBE (move with byte-order swap).
86 kMOVDIR64B, //!< CPU has MOVDIR64B (move 64 bytes as direct store).
87 kMOVDIRI, //!< CPU has MOVDIRI (move dword/qword as direct store).
88 kMPX, //!< CPU has MPX (memory protection extensions).
89 kMSR, //!< CPU has MSR (RDMSR/WRMSR instructions).
90 kMSSE, //!< CPU has MSSE (misaligned SSE support).
91 kOSXSAVE, //!< CPU has OSXSAVE (XSAVE enabled by OS).
92 kPCLMULQDQ, //!< CPU has PCLMULQDQ (packed carry-less multiplication).
93 kPCOMMIT, //!< CPU has PCOMMIT (PCOMMIT instruction).
94 kPCONFIG, //!< CPU has PCONFIG (PCONFIG instruction).
95 kPOPCNT, //!< CPU has POPCNT (POPCNT instruction).
96 kPREFETCHW, //!< CPU has PREFETCHW.
97 kPREFETCHWT1, //!< CPU has PREFETCHWT1.
98 kRDPID, //!< CPU has RDPID.
99 kRDRAND, //!< CPU has RDRAND.
100 kRDSEED, //!< CPU has RDSEED.
101 kRDTSC, //!< CPU has RDTSC.
102 kRDTSCP, //!< CPU has RDTSCP.
103 kRTM, //!< CPU has RTM.
104 kSHA, //!< CPU has SHA (SHA-1 and SHA-256 instructions).
105 kSKINIT, //!< CPU has SKINIT (SKINIT/STGI instructions) [AMD].
106 kSMAP, //!< CPU has SMAP (supervisor-mode access prevention).
107 kSMEP, //!< CPU has SMEP (supervisor-mode execution prevention).
108 kSMX, //!< CPU has SMX (safer mode extensions).
109 kSSE, //!< CPU has SSE.
110 kSSE2, //!< CPU has SSE2.
111 kSSE3, //!< CPU has SSE3.
112 kSSE4_1, //!< CPU has SSE4.1.
113 kSSE4_2, //!< CPU has SSE4.2.
114 kSSE4A, //!< CPU has SSE4A [AMD].
115 kSSSE3, //!< CPU has SSSE3.
116 kSVM, //!< CPU has SVM (virtualization) [AMD].
117 kTBM, //!< CPU has TBM (trailing bit manipulation) [AMD].
118 kTSX, //!< CPU has TSX.
119 kVAES, //!< CPU has VAES (vector AES 256|512 bit support).
120 kVMX, //!< CPU has VMX (virtualization) [INTEL].
121 kVPCLMULQDQ, //!< CPU has VPCLMULQDQ (vector PCLMULQDQ 256|512-bit support).
122 kWAITPKG, //!< CPU has WAITPKG (UMONITOR, UMWAIT, TPAUSE).
123 kWBNOINVD, //!< CPU has WBNOINVD.
124 kXOP, //!< CPU has XOP (XOP instructions) [AMD].
125 kXSAVE, //!< CPU has XSAVE.
126 kXSAVEC, //!< CPU has XSAVEC.
127 kXSAVEOPT, //!< CPU has XSAVEOPT.
128 kXSAVES, //!< CPU has XSAVES.
129
130 kCount //!< Count of X86 CPU features.
131 };
132
133 //! \name Construction / Destruction
134 //! \{
135
136 inline Features() noexcept
137 : BaseFeatures() {}
138 inline Features(const Features& other) noexcept
139 : BaseFeatures(other) {}
140
141 //! \}
142
143 //! \name Overloaded Operators
144 //! \{
145
146 inline Features& operator=(const Features& other) noexcept = default;
147
148 //! \}
149
150 //! \name Accessors
151 //! \{
152
153 #define ASMJIT_X86_FEATURE(FEATURE) \
154 inline bool has##FEATURE() const noexcept { return has(k##FEATURE); }
155
156 ASMJIT_X86_FEATURE(MT)
157 ASMJIT_X86_FEATURE(NX)
158
159 ASMJIT_X86_FEATURE(3DNOW)
160 ASMJIT_X86_FEATURE(3DNOW2)
161 ASMJIT_X86_FEATURE(ADX)
162 ASMJIT_X86_FEATURE(AESNI)
163 ASMJIT_X86_FEATURE(ALTMOVCR8)
164 ASMJIT_X86_FEATURE(AVX)
165 ASMJIT_X86_FEATURE(AVX2)
166 ASMJIT_X86_FEATURE(AVX512_4FMAPS)
167 ASMJIT_X86_FEATURE(AVX512_4VNNIW)
168 ASMJIT_X86_FEATURE(AVX512_BF16)
169 ASMJIT_X86_FEATURE(AVX512_BITALG)
170 ASMJIT_X86_FEATURE(AVX512_BW)
171 ASMJIT_X86_FEATURE(AVX512_CDI)
172 ASMJIT_X86_FEATURE(AVX512_DQ)
173 ASMJIT_X86_FEATURE(AVX512_ERI)
174 ASMJIT_X86_FEATURE(AVX512_F)
175 ASMJIT_X86_FEATURE(AVX512_IFMA)
176 ASMJIT_X86_FEATURE(AVX512_PFI)
177 ASMJIT_X86_FEATURE(AVX512_VBMI)
178 ASMJIT_X86_FEATURE(AVX512_VBMI2)
179 ASMJIT_X86_FEATURE(AVX512_VL)
180 ASMJIT_X86_FEATURE(AVX512_VNNI)
181 ASMJIT_X86_FEATURE(AVX512_VP2INTERSECT)
182 ASMJIT_X86_FEATURE(AVX512_VPOPCNTDQ)
183 ASMJIT_X86_FEATURE(BMI)
184 ASMJIT_X86_FEATURE(BMI2)
185 ASMJIT_X86_FEATURE(CLDEMOTE)
186 ASMJIT_X86_FEATURE(CLFLUSH)
187 ASMJIT_X86_FEATURE(CLFLUSHOPT)
188 ASMJIT_X86_FEATURE(CLWB)
189 ASMJIT_X86_FEATURE(CLZERO)
190 ASMJIT_X86_FEATURE(CMOV)
191 ASMJIT_X86_FEATURE(CMPXCHG16B)
192 ASMJIT_X86_FEATURE(CMPXCHG8B)
193 ASMJIT_X86_FEATURE(ENCLV)
194 ASMJIT_X86_FEATURE(ENQCMD)
195 ASMJIT_X86_FEATURE(ERMS)
196 ASMJIT_X86_FEATURE(F16C)
197 ASMJIT_X86_FEATURE(FMA)
198 ASMJIT_X86_FEATURE(FMA4)
199 ASMJIT_X86_FEATURE(FPU)
200 ASMJIT_X86_FEATURE(FSGSBASE)
201 ASMJIT_X86_FEATURE(FXSR)
202 ASMJIT_X86_FEATURE(FXSROPT)
203 ASMJIT_X86_FEATURE(GEODE)
204 ASMJIT_X86_FEATURE(GFNI)
205 ASMJIT_X86_FEATURE(HLE)
206 ASMJIT_X86_FEATURE(I486)
207 ASMJIT_X86_FEATURE(LAHFSAHF)
208 ASMJIT_X86_FEATURE(LWP)
209 ASMJIT_X86_FEATURE(LZCNT)
210 ASMJIT_X86_FEATURE(MMX)
211 ASMJIT_X86_FEATURE(MMX2)
212 ASMJIT_X86_FEATURE(MONITOR)
213 ASMJIT_X86_FEATURE(MONITORX)
214 ASMJIT_X86_FEATURE(MOVBE)
215 ASMJIT_X86_FEATURE(MOVDIR64B)
216 ASMJIT_X86_FEATURE(MOVDIRI)
217 ASMJIT_X86_FEATURE(MPX)
218 ASMJIT_X86_FEATURE(MSR)
219 ASMJIT_X86_FEATURE(MSSE)
220 ASMJIT_X86_FEATURE(OSXSAVE)
221 ASMJIT_X86_FEATURE(PCLMULQDQ)
222 ASMJIT_X86_FEATURE(PCOMMIT)
223 ASMJIT_X86_FEATURE(PCONFIG)
224 ASMJIT_X86_FEATURE(POPCNT)
225 ASMJIT_X86_FEATURE(PREFETCHW)
226 ASMJIT_X86_FEATURE(PREFETCHWT1)
227 ASMJIT_X86_FEATURE(RDPID)
228 ASMJIT_X86_FEATURE(RDRAND)
229 ASMJIT_X86_FEATURE(RDSEED)
230 ASMJIT_X86_FEATURE(RDTSC)
231 ASMJIT_X86_FEATURE(RDTSCP)
232 ASMJIT_X86_FEATURE(RTM)
233 ASMJIT_X86_FEATURE(SHA)
234 ASMJIT_X86_FEATURE(SKINIT)
235 ASMJIT_X86_FEATURE(SMAP)
236 ASMJIT_X86_FEATURE(SMEP)
237 ASMJIT_X86_FEATURE(SMX)
238 ASMJIT_X86_FEATURE(SSE)
239 ASMJIT_X86_FEATURE(SSE2)
240 ASMJIT_X86_FEATURE(SSE3)
241 ASMJIT_X86_FEATURE(SSSE3)
242 ASMJIT_X86_FEATURE(SSE4A)
243 ASMJIT_X86_FEATURE(SSE4_1)
244 ASMJIT_X86_FEATURE(SSE4_2)
245 ASMJIT_X86_FEATURE(SVM)
246 ASMJIT_X86_FEATURE(TBM)
247 ASMJIT_X86_FEATURE(TSX)
248 ASMJIT_X86_FEATURE(XSAVE)
249 ASMJIT_X86_FEATURE(XSAVEC)
250 ASMJIT_X86_FEATURE(XSAVEOPT)
251 ASMJIT_X86_FEATURE(XSAVES)
252 ASMJIT_X86_FEATURE(VAES)
253 ASMJIT_X86_FEATURE(VMX)
254 ASMJIT_X86_FEATURE(VPCLMULQDQ)
255 ASMJIT_X86_FEATURE(WAITPKG)
256 ASMJIT_X86_FEATURE(WBNOINVD)
257 ASMJIT_X86_FEATURE(XOP)
258
259 #undef ASMJIT_X86_FEATURE
260
261 //! \}
262 };
263
264 //! \}
265
266 ASMJIT_END_SUB_NAMESPACE
267
268 #endif // _ASMJIT_X86_X86FEATURES_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86GLOBALS_H
7 #define _ASMJIT_X86_X86GLOBALS_H
8
9 #include "../core/arch.h"
10 #include "../core/inst.h"
11
12 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
13
14 //! \namespace asmjit::x86
15 //! \ingroup asmjit_x86
16 //!
17 //! X86/X64 API.
18
19 //! \addtogroup asmjit_x86
20 //! \{
21
22 // ============================================================================
23 // [asmjit::x86::Inst]
24 // ============================================================================
25
26 //! Instruction.
27 //!
28 //! \note Only used to hold x86-specific enumerations and static functions.
29 struct Inst : public BaseInst {
30 //! Instruction id.
31 enum Id : uint32_t {
32 // ${InstId:Begin}
33 kIdNone = 0, //!< Invalid instruction id.
34 kIdAaa, //!< Instruction 'aaa' (X86).
35 kIdAad, //!< Instruction 'aad' (X86).
36 kIdAam, //!< Instruction 'aam' (X86).
37 kIdAas, //!< Instruction 'aas' (X86).
38 kIdAdc, //!< Instruction 'adc'.
39 kIdAdcx, //!< Instruction 'adcx' {ADX}.
40 kIdAdd, //!< Instruction 'add'.
41 kIdAddpd, //!< Instruction 'addpd' {SSE2}.
42 kIdAddps, //!< Instruction 'addps' {SSE}.
43 kIdAddsd, //!< Instruction 'addsd' {SSE2}.
44 kIdAddss, //!< Instruction 'addss' {SSE}.
45 kIdAddsubpd, //!< Instruction 'addsubpd' {SSE3}.
46 kIdAddsubps, //!< Instruction 'addsubps' {SSE3}.
47 kIdAdox, //!< Instruction 'adox' {ADX}.
48 kIdAesdec, //!< Instruction 'aesdec' {AESNI}.
49 kIdAesdeclast, //!< Instruction 'aesdeclast' {AESNI}.
50 kIdAesenc, //!< Instruction 'aesenc' {AESNI}.
51 kIdAesenclast, //!< Instruction 'aesenclast' {AESNI}.
52 kIdAesimc, //!< Instruction 'aesimc' {AESNI}.
53 kIdAeskeygenassist, //!< Instruction 'aeskeygenassist' {AESNI}.
54 kIdAnd, //!< Instruction 'and'.
55 kIdAndn, //!< Instruction 'andn' {BMI}.
56 kIdAndnpd, //!< Instruction 'andnpd' {SSE2}.
57 kIdAndnps, //!< Instruction 'andnps' {SSE}.
58 kIdAndpd, //!< Instruction 'andpd' {SSE2}.
59 kIdAndps, //!< Instruction 'andps' {SSE}.
60 kIdArpl, //!< Instruction 'arpl' (X86).
61 kIdBextr, //!< Instruction 'bextr' {BMI}.
62 kIdBlcfill, //!< Instruction 'blcfill' {TBM}.
63 kIdBlci, //!< Instruction 'blci' {TBM}.
64 kIdBlcic, //!< Instruction 'blcic' {TBM}.
65 kIdBlcmsk, //!< Instruction 'blcmsk' {TBM}.
66 kIdBlcs, //!< Instruction 'blcs' {TBM}.
67 kIdBlendpd, //!< Instruction 'blendpd' {SSE4_1}.
68 kIdBlendps, //!< Instruction 'blendps' {SSE4_1}.
69 kIdBlendvpd, //!< Instruction 'blendvpd' {SSE4_1}.
70 kIdBlendvps, //!< Instruction 'blendvps' {SSE4_1}.
71 kIdBlsfill, //!< Instruction 'blsfill' {TBM}.
72 kIdBlsi, //!< Instruction 'blsi' {BMI}.
73 kIdBlsic, //!< Instruction 'blsic' {TBM}.
74 kIdBlsmsk, //!< Instruction 'blsmsk' {BMI}.
75 kIdBlsr, //!< Instruction 'blsr' {BMI}.
76 kIdBndcl, //!< Instruction 'bndcl' {MPX}.
77 kIdBndcn, //!< Instruction 'bndcn' {MPX}.
78 kIdBndcu, //!< Instruction 'bndcu' {MPX}.
79 kIdBndldx, //!< Instruction 'bndldx' {MPX}.
80 kIdBndmk, //!< Instruction 'bndmk' {MPX}.
81 kIdBndmov, //!< Instruction 'bndmov' {MPX}.
82 kIdBndstx, //!< Instruction 'bndstx' {MPX}.
83 kIdBound, //!< Instruction 'bound' (X86).
84 kIdBsf, //!< Instruction 'bsf'.
85 kIdBsr, //!< Instruction 'bsr'.
86 kIdBswap, //!< Instruction 'bswap'.
87 kIdBt, //!< Instruction 'bt'.
88 kIdBtc, //!< Instruction 'btc'.
89 kIdBtr, //!< Instruction 'btr'.
90 kIdBts, //!< Instruction 'bts'.
91 kIdBzhi, //!< Instruction 'bzhi' {BMI2}.
92 kIdCall, //!< Instruction 'call'.
93 kIdCbw, //!< Instruction 'cbw'.
94 kIdCdq, //!< Instruction 'cdq'.
95 kIdCdqe, //!< Instruction 'cdqe' (X64).
96 kIdClac, //!< Instruction 'clac' {SMAP}.
97 kIdClc, //!< Instruction 'clc'.
98 kIdCld, //!< Instruction 'cld'.
99 kIdCldemote, //!< Instruction 'cldemote' {CLDEMOTE}.
100 kIdClflush, //!< Instruction 'clflush' {CLFLUSH}.
101 kIdClflushopt, //!< Instruction 'clflushopt' {CLFLUSHOPT}.
102 kIdClgi, //!< Instruction 'clgi' {SVM}.
103 kIdCli, //!< Instruction 'cli'.
104 kIdClts, //!< Instruction 'clts'.
105 kIdClwb, //!< Instruction 'clwb' {CLWB}.
106 kIdClzero, //!< Instruction 'clzero' {CLZERO}.
107 kIdCmc, //!< Instruction 'cmc'.
108 kIdCmova, //!< Instruction 'cmova' {CMOV}.
109 kIdCmovae, //!< Instruction 'cmovae' {CMOV}.
110 kIdCmovb, //!< Instruction 'cmovb' {CMOV}.
111 kIdCmovbe, //!< Instruction 'cmovbe' {CMOV}.
112 kIdCmovc, //!< Instruction 'cmovc' {CMOV}.
113 kIdCmove, //!< Instruction 'cmove' {CMOV}.
114 kIdCmovg, //!< Instruction 'cmovg' {CMOV}.
115 kIdCmovge, //!< Instruction 'cmovge' {CMOV}.
116 kIdCmovl, //!< Instruction 'cmovl' {CMOV}.
117 kIdCmovle, //!< Instruction 'cmovle' {CMOV}.
118 kIdCmovna, //!< Instruction 'cmovna' {CMOV}.
119 kIdCmovnae, //!< Instruction 'cmovnae' {CMOV}.
120 kIdCmovnb, //!< Instruction 'cmovnb' {CMOV}.
121 kIdCmovnbe, //!< Instruction 'cmovnbe' {CMOV}.
122 kIdCmovnc, //!< Instruction 'cmovnc' {CMOV}.
123 kIdCmovne, //!< Instruction 'cmovne' {CMOV}.
124 kIdCmovng, //!< Instruction 'cmovng' {CMOV}.
125 kIdCmovnge, //!< Instruction 'cmovnge' {CMOV}.
126 kIdCmovnl, //!< Instruction 'cmovnl' {CMOV}.
127 kIdCmovnle, //!< Instruction 'cmovnle' {CMOV}.
128 kIdCmovno, //!< Instruction 'cmovno' {CMOV}.
129 kIdCmovnp, //!< Instruction 'cmovnp' {CMOV}.
130 kIdCmovns, //!< Instruction 'cmovns' {CMOV}.
131 kIdCmovnz, //!< Instruction 'cmovnz' {CMOV}.
132 kIdCmovo, //!< Instruction 'cmovo' {CMOV}.
133 kIdCmovp, //!< Instruction 'cmovp' {CMOV}.
134 kIdCmovpe, //!< Instruction 'cmovpe' {CMOV}.
135 kIdCmovpo, //!< Instruction 'cmovpo' {CMOV}.
136 kIdCmovs, //!< Instruction 'cmovs' {CMOV}.
137 kIdCmovz, //!< Instruction 'cmovz' {CMOV}.
138 kIdCmp, //!< Instruction 'cmp'.
139 kIdCmppd, //!< Instruction 'cmppd' {SSE2}.
140 kIdCmpps, //!< Instruction 'cmpps' {SSE}.
141 kIdCmps, //!< Instruction 'cmps'.
142 kIdCmpsd, //!< Instruction 'cmpsd' {SSE2}.
143 kIdCmpss, //!< Instruction 'cmpss' {SSE}.
144 kIdCmpxchg, //!< Instruction 'cmpxchg' {I486}.
145 kIdCmpxchg16b, //!< Instruction 'cmpxchg16b' {CMPXCHG16B} (X64).
146 kIdCmpxchg8b, //!< Instruction 'cmpxchg8b' {CMPXCHG8B}.
147 kIdComisd, //!< Instruction 'comisd' {SSE2}.
148 kIdComiss, //!< Instruction 'comiss' {SSE}.
149 kIdCpuid, //!< Instruction 'cpuid' {I486}.
150 kIdCqo, //!< Instruction 'cqo' (X64).
151 kIdCrc32, //!< Instruction 'crc32' {SSE4_2}.
152 kIdCvtdq2pd, //!< Instruction 'cvtdq2pd' {SSE2}.
153 kIdCvtdq2ps, //!< Instruction 'cvtdq2ps' {SSE2}.
154 kIdCvtpd2dq, //!< Instruction 'cvtpd2dq' {SSE2}.
155 kIdCvtpd2pi, //!< Instruction 'cvtpd2pi' {SSE2}.
156 kIdCvtpd2ps, //!< Instruction 'cvtpd2ps' {SSE2}.
157 kIdCvtpi2pd, //!< Instruction 'cvtpi2pd' {SSE2}.
158 kIdCvtpi2ps, //!< Instruction 'cvtpi2ps' {SSE}.
159 kIdCvtps2dq, //!< Instruction 'cvtps2dq' {SSE2}.
160 kIdCvtps2pd, //!< Instruction 'cvtps2pd' {SSE2}.
161 kIdCvtps2pi, //!< Instruction 'cvtps2pi' {SSE}.
162 kIdCvtsd2si, //!< Instruction 'cvtsd2si' {SSE2}.
163 kIdCvtsd2ss, //!< Instruction 'cvtsd2ss' {SSE2}.
164 kIdCvtsi2sd, //!< Instruction 'cvtsi2sd' {SSE2}.
165 kIdCvtsi2ss, //!< Instruction 'cvtsi2ss' {SSE}.
166 kIdCvtss2sd, //!< Instruction 'cvtss2sd' {SSE2}.
167 kIdCvtss2si, //!< Instruction 'cvtss2si' {SSE}.
168 kIdCvttpd2dq, //!< Instruction 'cvttpd2dq' {SSE2}.
169 kIdCvttpd2pi, //!< Instruction 'cvttpd2pi' {SSE2}.
170 kIdCvttps2dq, //!< Instruction 'cvttps2dq' {SSE2}.
171 kIdCvttps2pi, //!< Instruction 'cvttps2pi' {SSE}.
172 kIdCvttsd2si, //!< Instruction 'cvttsd2si' {SSE2}.
173 kIdCvttss2si, //!< Instruction 'cvttss2si' {SSE}.
174 kIdCwd, //!< Instruction 'cwd'.
175 kIdCwde, //!< Instruction 'cwde'.
176 kIdDaa, //!< Instruction 'daa' (X86).
177 kIdDas, //!< Instruction 'das' (X86).
178 kIdDec, //!< Instruction 'dec'.
179 kIdDiv, //!< Instruction 'div'.
180 kIdDivpd, //!< Instruction 'divpd' {SSE2}.
181 kIdDivps, //!< Instruction 'divps' {SSE}.
182 kIdDivsd, //!< Instruction 'divsd' {SSE2}.
183 kIdDivss, //!< Instruction 'divss' {SSE}.
184 kIdDppd, //!< Instruction 'dppd' {SSE4_1}.
185 kIdDpps, //!< Instruction 'dpps' {SSE4_1}.
186 kIdEmms, //!< Instruction 'emms' {MMX}.
187 kIdEnqcmd, //!< Instruction 'enqcmd' {ENQCMD}.
188 kIdEnqcmds, //!< Instruction 'enqcmds' {ENQCMD}.
189 kIdEnter, //!< Instruction 'enter'.
190 kIdExtractps, //!< Instruction 'extractps' {SSE4_1}.
191 kIdExtrq, //!< Instruction 'extrq' {SSE4A}.
192 kIdF2xm1, //!< Instruction 'f2xm1'.
193 kIdFabs, //!< Instruction 'fabs'.
194 kIdFadd, //!< Instruction 'fadd'.
195 kIdFaddp, //!< Instruction 'faddp'.
196 kIdFbld, //!< Instruction 'fbld'.
197 kIdFbstp, //!< Instruction 'fbstp'.
198 kIdFchs, //!< Instruction 'fchs'.
199 kIdFclex, //!< Instruction 'fclex'.
200 kIdFcmovb, //!< Instruction 'fcmovb' {CMOV}.
201 kIdFcmovbe, //!< Instruction 'fcmovbe' {CMOV}.
202 kIdFcmove, //!< Instruction 'fcmove' {CMOV}.
203 kIdFcmovnb, //!< Instruction 'fcmovnb' {CMOV}.
204 kIdFcmovnbe, //!< Instruction 'fcmovnbe' {CMOV}.
205 kIdFcmovne, //!< Instruction 'fcmovne' {CMOV}.
206 kIdFcmovnu, //!< Instruction 'fcmovnu' {CMOV}.
207 kIdFcmovu, //!< Instruction 'fcmovu' {CMOV}.
208 kIdFcom, //!< Instruction 'fcom'.
209 kIdFcomi, //!< Instruction 'fcomi'.
210 kIdFcomip, //!< Instruction 'fcomip'.
211 kIdFcomp, //!< Instruction 'fcomp'.
212 kIdFcompp, //!< Instruction 'fcompp'.
213 kIdFcos, //!< Instruction 'fcos'.
214 kIdFdecstp, //!< Instruction 'fdecstp'.
215 kIdFdiv, //!< Instruction 'fdiv'.
216 kIdFdivp, //!< Instruction 'fdivp'.
217 kIdFdivr, //!< Instruction 'fdivr'.
218 kIdFdivrp, //!< Instruction 'fdivrp'.
219 kIdFemms, //!< Instruction 'femms' {3DNOW}.
220 kIdFfree, //!< Instruction 'ffree'.
221 kIdFiadd, //!< Instruction 'fiadd'.
222 kIdFicom, //!< Instruction 'ficom'.
223 kIdFicomp, //!< Instruction 'ficomp'.
224 kIdFidiv, //!< Instruction 'fidiv'.
225 kIdFidivr, //!< Instruction 'fidivr'.
226 kIdFild, //!< Instruction 'fild'.
227 kIdFimul, //!< Instruction 'fimul'.
228 kIdFincstp, //!< Instruction 'fincstp'.
229 kIdFinit, //!< Instruction 'finit'.
230 kIdFist, //!< Instruction 'fist'.
231 kIdFistp, //!< Instruction 'fistp'.
232 kIdFisttp, //!< Instruction 'fisttp' {SSE3}.
233 kIdFisub, //!< Instruction 'fisub'.
234 kIdFisubr, //!< Instruction 'fisubr'.
235 kIdFld, //!< Instruction 'fld'.
236 kIdFld1, //!< Instruction 'fld1'.
237 kIdFldcw, //!< Instruction 'fldcw'.
238 kIdFldenv, //!< Instruction 'fldenv'.
239 kIdFldl2e, //!< Instruction 'fldl2e'.
240 kIdFldl2t, //!< Instruction 'fldl2t'.
241 kIdFldlg2, //!< Instruction 'fldlg2'.
242 kIdFldln2, //!< Instruction 'fldln2'.
243 kIdFldpi, //!< Instruction 'fldpi'.
244 kIdFldz, //!< Instruction 'fldz'.
245 kIdFmul, //!< Instruction 'fmul'.
246 kIdFmulp, //!< Instruction 'fmulp'.
247 kIdFnclex, //!< Instruction 'fnclex'.
248 kIdFninit, //!< Instruction 'fninit'.
249 kIdFnop, //!< Instruction 'fnop'.
250 kIdFnsave, //!< Instruction 'fnsave'.
251 kIdFnstcw, //!< Instruction 'fnstcw'.
252 kIdFnstenv, //!< Instruction 'fnstenv'.
253 kIdFnstsw, //!< Instruction 'fnstsw'.
254 kIdFpatan, //!< Instruction 'fpatan'.
255 kIdFprem, //!< Instruction 'fprem'.
256 kIdFprem1, //!< Instruction 'fprem1'.
257 kIdFptan, //!< Instruction 'fptan'.
258 kIdFrndint, //!< Instruction 'frndint'.
259 kIdFrstor, //!< Instruction 'frstor'.
260 kIdFsave, //!< Instruction 'fsave'.
261 kIdFscale, //!< Instruction 'fscale'.
262 kIdFsin, //!< Instruction 'fsin'.
263 kIdFsincos, //!< Instruction 'fsincos'.
264 kIdFsqrt, //!< Instruction 'fsqrt'.
265 kIdFst, //!< Instruction 'fst'.
266 kIdFstcw, //!< Instruction 'fstcw'.
267 kIdFstenv, //!< Instruction 'fstenv'.
268 kIdFstp, //!< Instruction 'fstp'.
269 kIdFstsw, //!< Instruction 'fstsw'.
270 kIdFsub, //!< Instruction 'fsub'.
271 kIdFsubp, //!< Instruction 'fsubp'.
272 kIdFsubr, //!< Instruction 'fsubr'.
273 kIdFsubrp, //!< Instruction 'fsubrp'.
274 kIdFtst, //!< Instruction 'ftst'.
275 kIdFucom, //!< Instruction 'fucom'.
276 kIdFucomi, //!< Instruction 'fucomi'.
277 kIdFucomip, //!< Instruction 'fucomip'.
278 kIdFucomp, //!< Instruction 'fucomp'.
279 kIdFucompp, //!< Instruction 'fucompp'.
280 kIdFwait, //!< Instruction 'fwait'.
281 kIdFxam, //!< Instruction 'fxam'.
282 kIdFxch, //!< Instruction 'fxch'.
283 kIdFxrstor, //!< Instruction 'fxrstor' {FXSR}.
284 kIdFxrstor64, //!< Instruction 'fxrstor64' {FXSR} (X64).
285 kIdFxsave, //!< Instruction 'fxsave' {FXSR}.
286 kIdFxsave64, //!< Instruction 'fxsave64' {FXSR} (X64).
287 kIdFxtract, //!< Instruction 'fxtract'.
288 kIdFyl2x, //!< Instruction 'fyl2x'.
289 kIdFyl2xp1, //!< Instruction 'fyl2xp1'.
290 kIdGetsec, //!< Instruction 'getsec' {SMX}.
291 kIdGf2p8affineinvqb, //!< Instruction 'gf2p8affineinvqb' {GFNI}.
292 kIdGf2p8affineqb, //!< Instruction 'gf2p8affineqb' {GFNI}.
293 kIdGf2p8mulb, //!< Instruction 'gf2p8mulb' {GFNI}.
294 kIdHaddpd, //!< Instruction 'haddpd' {SSE3}.
295 kIdHaddps, //!< Instruction 'haddps' {SSE3}.
296 kIdHlt, //!< Instruction 'hlt'.
297 kIdHsubpd, //!< Instruction 'hsubpd' {SSE3}.
298 kIdHsubps, //!< Instruction 'hsubps' {SSE3}.
299 kIdIdiv, //!< Instruction 'idiv'.
300 kIdImul, //!< Instruction 'imul'.
301 kIdIn, //!< Instruction 'in'.
302 kIdInc, //!< Instruction 'inc'.
303 kIdIns, //!< Instruction 'ins'.
304 kIdInsertps, //!< Instruction 'insertps' {SSE4_1}.
305 kIdInsertq, //!< Instruction 'insertq' {SSE4A}.
306 kIdInt, //!< Instruction 'int'.
307 kIdInt3, //!< Instruction 'int3'.
308 kIdInto, //!< Instruction 'into' (X86).
309 kIdInvd, //!< Instruction 'invd' {I486}.
310 kIdInvept, //!< Instruction 'invept' {VMX}.
311 kIdInvlpg, //!< Instruction 'invlpg' {I486}.
312 kIdInvlpga, //!< Instruction 'invlpga' {SVM}.
313 kIdInvpcid, //!< Instruction 'invpcid' {I486}.
314 kIdInvvpid, //!< Instruction 'invvpid' {VMX}.
315 kIdIret, //!< Instruction 'iret'.
316 kIdIretd, //!< Instruction 'iretd'.
317 kIdIretq, //!< Instruction 'iretq' (X64).
318 kIdIretw, //!< Instruction 'iretw'.
319 kIdJa, //!< Instruction 'ja'.
320 kIdJae, //!< Instruction 'jae'.
321 kIdJb, //!< Instruction 'jb'.
322 kIdJbe, //!< Instruction 'jbe'.
323 kIdJc, //!< Instruction 'jc'.
324 kIdJe, //!< Instruction 'je'.
325 kIdJecxz, //!< Instruction 'jecxz'.
326 kIdJg, //!< Instruction 'jg'.
327 kIdJge, //!< Instruction 'jge'.
328 kIdJl, //!< Instruction 'jl'.
329 kIdJle, //!< Instruction 'jle'.
330 kIdJmp, //!< Instruction 'jmp'.
331 kIdJna, //!< Instruction 'jna'.
332 kIdJnae, //!< Instruction 'jnae'.
333 kIdJnb, //!< Instruction 'jnb'.
334 kIdJnbe, //!< Instruction 'jnbe'.
335 kIdJnc, //!< Instruction 'jnc'.
336 kIdJne, //!< Instruction 'jne'.
337 kIdJng, //!< Instruction 'jng'.
338 kIdJnge, //!< Instruction 'jnge'.
339 kIdJnl, //!< Instruction 'jnl'.
340 kIdJnle, //!< Instruction 'jnle'.
341 kIdJno, //!< Instruction 'jno'.
342 kIdJnp, //!< Instruction 'jnp'.
343 kIdJns, //!< Instruction 'jns'.
344 kIdJnz, //!< Instruction 'jnz'.
345 kIdJo, //!< Instruction 'jo'.
346 kIdJp, //!< Instruction 'jp'.
347 kIdJpe, //!< Instruction 'jpe'.
348 kIdJpo, //!< Instruction 'jpo'.
349 kIdJs, //!< Instruction 'js'.
350 kIdJz, //!< Instruction 'jz'.
351 kIdKaddb, //!< Instruction 'kaddb' {AVX512_DQ}.
352 kIdKaddd, //!< Instruction 'kaddd' {AVX512_BW}.
353 kIdKaddq, //!< Instruction 'kaddq' {AVX512_BW}.
354 kIdKaddw, //!< Instruction 'kaddw' {AVX512_DQ}.
355 kIdKandb, //!< Instruction 'kandb' {AVX512_DQ}.
356 kIdKandd, //!< Instruction 'kandd' {AVX512_BW}.
357 kIdKandnb, //!< Instruction 'kandnb' {AVX512_DQ}.
358 kIdKandnd, //!< Instruction 'kandnd' {AVX512_BW}.
359 kIdKandnq, //!< Instruction 'kandnq' {AVX512_BW}.
360 kIdKandnw, //!< Instruction 'kandnw' {AVX512_F}.
361 kIdKandq, //!< Instruction 'kandq' {AVX512_BW}.
362 kIdKandw, //!< Instruction 'kandw' {AVX512_F}.
363 kIdKmovb, //!< Instruction 'kmovb' {AVX512_DQ}.
364 kIdKmovd, //!< Instruction 'kmovd' {AVX512_BW}.
365 kIdKmovq, //!< Instruction 'kmovq' {AVX512_BW}.
366 kIdKmovw, //!< Instruction 'kmovw' {AVX512_F}.
367 kIdKnotb, //!< Instruction 'knotb' {AVX512_DQ}.
368 kIdKnotd, //!< Instruction 'knotd' {AVX512_BW}.
369 kIdKnotq, //!< Instruction 'knotq' {AVX512_BW}.
370 kIdKnotw, //!< Instruction 'knotw' {AVX512_F}.
371 kIdKorb, //!< Instruction 'korb' {AVX512_DQ}.
372 kIdKord, //!< Instruction 'kord' {AVX512_BW}.
373 kIdKorq, //!< Instruction 'korq' {AVX512_BW}.
374 kIdKortestb, //!< Instruction 'kortestb' {AVX512_DQ}.
375 kIdKortestd, //!< Instruction 'kortestd' {AVX512_BW}.
376 kIdKortestq, //!< Instruction 'kortestq' {AVX512_BW}.
377 kIdKortestw, //!< Instruction 'kortestw' {AVX512_F}.
378 kIdKorw, //!< Instruction 'korw' {AVX512_F}.
379 kIdKshiftlb, //!< Instruction 'kshiftlb' {AVX512_DQ}.
380 kIdKshiftld, //!< Instruction 'kshiftld' {AVX512_BW}.
381 kIdKshiftlq, //!< Instruction 'kshiftlq' {AVX512_BW}.
382 kIdKshiftlw, //!< Instruction 'kshiftlw' {AVX512_F}.
383 kIdKshiftrb, //!< Instruction 'kshiftrb' {AVX512_DQ}.
384 kIdKshiftrd, //!< Instruction 'kshiftrd' {AVX512_BW}.
385 kIdKshiftrq, //!< Instruction 'kshiftrq' {AVX512_BW}.
386 kIdKshiftrw, //!< Instruction 'kshiftrw' {AVX512_F}.
387 kIdKtestb, //!< Instruction 'ktestb' {AVX512_DQ}.
388 kIdKtestd, //!< Instruction 'ktestd' {AVX512_BW}.
389 kIdKtestq, //!< Instruction 'ktestq' {AVX512_BW}.
390 kIdKtestw, //!< Instruction 'ktestw' {AVX512_DQ}.
391 kIdKunpckbw, //!< Instruction 'kunpckbw' {AVX512_F}.
392 kIdKunpckdq, //!< Instruction 'kunpckdq' {AVX512_BW}.
393 kIdKunpckwd, //!< Instruction 'kunpckwd' {AVX512_BW}.
394 kIdKxnorb, //!< Instruction 'kxnorb' {AVX512_DQ}.
395 kIdKxnord, //!< Instruction 'kxnord' {AVX512_BW}.
396 kIdKxnorq, //!< Instruction 'kxnorq' {AVX512_BW}.
397 kIdKxnorw, //!< Instruction 'kxnorw' {AVX512_F}.
398 kIdKxorb, //!< Instruction 'kxorb' {AVX512_DQ}.
399 kIdKxord, //!< Instruction 'kxord' {AVX512_BW}.
400 kIdKxorq, //!< Instruction 'kxorq' {AVX512_BW}.
401 kIdKxorw, //!< Instruction 'kxorw' {AVX512_F}.
402 kIdLahf, //!< Instruction 'lahf' {LAHFSAHF}.
403 kIdLar, //!< Instruction 'lar'.
404 kIdLddqu, //!< Instruction 'lddqu' {SSE3}.
405 kIdLdmxcsr, //!< Instruction 'ldmxcsr' {SSE}.
406 kIdLds, //!< Instruction 'lds' (X86).
407 kIdLea, //!< Instruction 'lea'.
408 kIdLeave, //!< Instruction 'leave'.
409 kIdLes, //!< Instruction 'les' (X86).
410 kIdLfence, //!< Instruction 'lfence' {SSE2}.
411 kIdLfs, //!< Instruction 'lfs'.
412 kIdLgdt, //!< Instruction 'lgdt'.
413 kIdLgs, //!< Instruction 'lgs'.
414 kIdLidt, //!< Instruction 'lidt'.
415 kIdLldt, //!< Instruction 'lldt'.
416 kIdLlwpcb, //!< Instruction 'llwpcb' {LWP}.
417 kIdLmsw, //!< Instruction 'lmsw'.
418 kIdLods, //!< Instruction 'lods'.
419 kIdLoop, //!< Instruction 'loop'.
420 kIdLoope, //!< Instruction 'loope'.
421 kIdLoopne, //!< Instruction 'loopne'.
422 kIdLsl, //!< Instruction 'lsl'.
423 kIdLss, //!< Instruction 'lss'.
424 kIdLtr, //!< Instruction 'ltr'.
425 kIdLwpins, //!< Instruction 'lwpins' {LWP}.
426 kIdLwpval, //!< Instruction 'lwpval' {LWP}.
427 kIdLzcnt, //!< Instruction 'lzcnt' {LZCNT}.
428 kIdMaskmovdqu, //!< Instruction 'maskmovdqu' {SSE2}.
429 kIdMaskmovq, //!< Instruction 'maskmovq' {MMX2}.
430 kIdMaxpd, //!< Instruction 'maxpd' {SSE2}.
431 kIdMaxps, //!< Instruction 'maxps' {SSE}.
432 kIdMaxsd, //!< Instruction 'maxsd' {SSE2}.
433 kIdMaxss, //!< Instruction 'maxss' {SSE}.
434 kIdMfence, //!< Instruction 'mfence' {SSE2}.
435 kIdMinpd, //!< Instruction 'minpd' {SSE2}.
436 kIdMinps, //!< Instruction 'minps' {SSE}.
437 kIdMinsd, //!< Instruction 'minsd' {SSE2}.
438 kIdMinss, //!< Instruction 'minss' {SSE}.
439 kIdMonitor, //!< Instruction 'monitor' {MONITOR}.
440 kIdMonitorx, //!< Instruction 'monitorx' {MONITORX}.
441 kIdMov, //!< Instruction 'mov'.
442 kIdMovapd, //!< Instruction 'movapd' {SSE2}.
443 kIdMovaps, //!< Instruction 'movaps' {SSE}.
444 kIdMovbe, //!< Instruction 'movbe' {MOVBE}.
445 kIdMovd, //!< Instruction 'movd' {MMX|SSE2}.
446 kIdMovddup, //!< Instruction 'movddup' {SSE3}.
447 kIdMovdir64b, //!< Instruction 'movdir64b' {MOVDIR64B}.
448 kIdMovdiri, //!< Instruction 'movdiri' {MOVDIRI}.
449 kIdMovdq2q, //!< Instruction 'movdq2q' {SSE2}.
450 kIdMovdqa, //!< Instruction 'movdqa' {SSE2}.
451 kIdMovdqu, //!< Instruction 'movdqu' {SSE2}.
452 kIdMovhlps, //!< Instruction 'movhlps' {SSE}.
453 kIdMovhpd, //!< Instruction 'movhpd' {SSE2}.
454 kIdMovhps, //!< Instruction 'movhps' {SSE}.
455 kIdMovlhps, //!< Instruction 'movlhps' {SSE}.
456 kIdMovlpd, //!< Instruction 'movlpd' {SSE2}.
457 kIdMovlps, //!< Instruction 'movlps' {SSE}.
458 kIdMovmskpd, //!< Instruction 'movmskpd' {SSE2}.
459 kIdMovmskps, //!< Instruction 'movmskps' {SSE}.
460 kIdMovntdq, //!< Instruction 'movntdq' {SSE2}.
461 kIdMovntdqa, //!< Instruction 'movntdqa' {SSE4_1}.
462 kIdMovnti, //!< Instruction 'movnti' {SSE2}.
463 kIdMovntpd, //!< Instruction 'movntpd' {SSE2}.
464 kIdMovntps, //!< Instruction 'movntps' {SSE}.
465 kIdMovntq, //!< Instruction 'movntq' {MMX2}.
466 kIdMovntsd, //!< Instruction 'movntsd' {SSE4A}.
467 kIdMovntss, //!< Instruction 'movntss' {SSE4A}.
468 kIdMovq, //!< Instruction 'movq' {MMX|SSE2}.
469 kIdMovq2dq, //!< Instruction 'movq2dq' {SSE2}.
470 kIdMovs, //!< Instruction 'movs'.
471 kIdMovsd, //!< Instruction 'movsd' {SSE2}.
472 kIdMovshdup, //!< Instruction 'movshdup' {SSE3}.
473 kIdMovsldup, //!< Instruction 'movsldup' {SSE3}.
474 kIdMovss, //!< Instruction 'movss' {SSE}.
475 kIdMovsx, //!< Instruction 'movsx'.
476 kIdMovsxd, //!< Instruction 'movsxd' (X64).
477 kIdMovupd, //!< Instruction 'movupd' {SSE2}.
478 kIdMovups, //!< Instruction 'movups' {SSE}.
479 kIdMovzx, //!< Instruction 'movzx'.
480 kIdMpsadbw, //!< Instruction 'mpsadbw' {SSE4_1}.
481 kIdMul, //!< Instruction 'mul'.
482 kIdMulpd, //!< Instruction 'mulpd' {SSE2}.
483 kIdMulps, //!< Instruction 'mulps' {SSE}.
484 kIdMulsd, //!< Instruction 'mulsd' {SSE2}.
485 kIdMulss, //!< Instruction 'mulss' {SSE}.
486 kIdMulx, //!< Instruction 'mulx' {BMI2}.
487 kIdMwait, //!< Instruction 'mwait' {MONITOR}.
488 kIdMwaitx, //!< Instruction 'mwaitx' {MONITORX}.
489 kIdNeg, //!< Instruction 'neg'.
490 kIdNop, //!< Instruction 'nop'.
491 kIdNot, //!< Instruction 'not'.
492 kIdOr, //!< Instruction 'or'.
493 kIdOrpd, //!< Instruction 'orpd' {SSE2}.
494 kIdOrps, //!< Instruction 'orps' {SSE}.
495 kIdOut, //!< Instruction 'out'.
496 kIdOuts, //!< Instruction 'outs'.
497 kIdPabsb, //!< Instruction 'pabsb' {SSSE3}.
498 kIdPabsd, //!< Instruction 'pabsd' {SSSE3}.
499 kIdPabsw, //!< Instruction 'pabsw' {SSSE3}.
500 kIdPackssdw, //!< Instruction 'packssdw' {MMX|SSE2}.
501 kIdPacksswb, //!< Instruction 'packsswb' {MMX|SSE2}.
502 kIdPackusdw, //!< Instruction 'packusdw' {SSE4_1}.
503 kIdPackuswb, //!< Instruction 'packuswb' {MMX|SSE2}.
504 kIdPaddb, //!< Instruction 'paddb' {MMX|SSE2}.
505 kIdPaddd, //!< Instruction 'paddd' {MMX|SSE2}.
506 kIdPaddq, //!< Instruction 'paddq' {SSE2}.
507 kIdPaddsb, //!< Instruction 'paddsb' {MMX|SSE2}.
508 kIdPaddsw, //!< Instruction 'paddsw' {MMX|SSE2}.
509 kIdPaddusb, //!< Instruction 'paddusb' {MMX|SSE2}.
510 kIdPaddusw, //!< Instruction 'paddusw' {MMX|SSE2}.
511 kIdPaddw, //!< Instruction 'paddw' {MMX|SSE2}.
512 kIdPalignr, //!< Instruction 'palignr' {SSE3}.
513 kIdPand, //!< Instruction 'pand' {MMX|SSE2}.
514 kIdPandn, //!< Instruction 'pandn' {MMX|SSE2}.
515 kIdPause, //!< Instruction 'pause'.
516 kIdPavgb, //!< Instruction 'pavgb' {MMX2|SSE2}.
517 kIdPavgusb, //!< Instruction 'pavgusb' {3DNOW}.
518 kIdPavgw, //!< Instruction 'pavgw' {MMX2|SSE2}.
519 kIdPblendvb, //!< Instruction 'pblendvb' {SSE4_1}.
520 kIdPblendw, //!< Instruction 'pblendw' {SSE4_1}.
521 kIdPclmulqdq, //!< Instruction 'pclmulqdq' {PCLMULQDQ}.
522 kIdPcmpeqb, //!< Instruction 'pcmpeqb' {MMX|SSE2}.
523 kIdPcmpeqd, //!< Instruction 'pcmpeqd' {MMX|SSE2}.
524 kIdPcmpeqq, //!< Instruction 'pcmpeqq' {SSE4_1}.
525 kIdPcmpeqw, //!< Instruction 'pcmpeqw' {MMX|SSE2}.
526 kIdPcmpestri, //!< Instruction 'pcmpestri' {SSE4_2}.
527 kIdPcmpestrm, //!< Instruction 'pcmpestrm' {SSE4_2}.
528 kIdPcmpgtb, //!< Instruction 'pcmpgtb' {MMX|SSE2}.
529 kIdPcmpgtd, //!< Instruction 'pcmpgtd' {MMX|SSE2}.
530 kIdPcmpgtq, //!< Instruction 'pcmpgtq' {SSE4_2}.
531 kIdPcmpgtw, //!< Instruction 'pcmpgtw' {MMX|SSE2}.
532 kIdPcmpistri, //!< Instruction 'pcmpistri' {SSE4_2}.
533 kIdPcmpistrm, //!< Instruction 'pcmpistrm' {SSE4_2}.
534 kIdPcommit, //!< Instruction 'pcommit' {PCOMMIT}.
535 kIdPdep, //!< Instruction 'pdep' {BMI2}.
536 kIdPext, //!< Instruction 'pext' {BMI2}.
537 kIdPextrb, //!< Instruction 'pextrb' {SSE4_1}.
538 kIdPextrd, //!< Instruction 'pextrd' {SSE4_1}.
539 kIdPextrq, //!< Instruction 'pextrq' {SSE4_1} (X64).
540 kIdPextrw, //!< Instruction 'pextrw' {MMX2|SSE2|SSE4_1}.
541 kIdPf2id, //!< Instruction 'pf2id' {3DNOW}.
542 kIdPf2iw, //!< Instruction 'pf2iw' {3DNOW2}.
543 kIdPfacc, //!< Instruction 'pfacc' {3DNOW}.
544 kIdPfadd, //!< Instruction 'pfadd' {3DNOW}.
545 kIdPfcmpeq, //!< Instruction 'pfcmpeq' {3DNOW}.
546 kIdPfcmpge, //!< Instruction 'pfcmpge' {3DNOW}.
547 kIdPfcmpgt, //!< Instruction 'pfcmpgt' {3DNOW}.
548 kIdPfmax, //!< Instruction 'pfmax' {3DNOW}.
549 kIdPfmin, //!< Instruction 'pfmin' {3DNOW}.
550 kIdPfmul, //!< Instruction 'pfmul' {3DNOW}.
551 kIdPfnacc, //!< Instruction 'pfnacc' {3DNOW2}.
552 kIdPfpnacc, //!< Instruction 'pfpnacc' {3DNOW2}.
553 kIdPfrcp, //!< Instruction 'pfrcp' {3DNOW}.
554 kIdPfrcpit1, //!< Instruction 'pfrcpit1' {3DNOW}.
555 kIdPfrcpit2, //!< Instruction 'pfrcpit2' {3DNOW}.
556 kIdPfrcpv, //!< Instruction 'pfrcpv' {GEODE}.
557 kIdPfrsqit1, //!< Instruction 'pfrsqit1' {3DNOW}.
558 kIdPfrsqrt, //!< Instruction 'pfrsqrt' {3DNOW}.
559 kIdPfrsqrtv, //!< Instruction 'pfrsqrtv' {GEODE}.
560 kIdPfsub, //!< Instruction 'pfsub' {3DNOW}.
561 kIdPfsubr, //!< Instruction 'pfsubr' {3DNOW}.
562 kIdPhaddd, //!< Instruction 'phaddd' {SSSE3}.
563 kIdPhaddsw, //!< Instruction 'phaddsw' {SSSE3}.
564 kIdPhaddw, //!< Instruction 'phaddw' {SSSE3}.
565 kIdPhminposuw, //!< Instruction 'phminposuw' {SSE4_1}.
566 kIdPhsubd, //!< Instruction 'phsubd' {SSSE3}.
567 kIdPhsubsw, //!< Instruction 'phsubsw' {SSSE3}.
568 kIdPhsubw, //!< Instruction 'phsubw' {SSSE3}.
569 kIdPi2fd, //!< Instruction 'pi2fd' {3DNOW}.
570 kIdPi2fw, //!< Instruction 'pi2fw' {3DNOW2}.
571 kIdPinsrb, //!< Instruction 'pinsrb' {SSE4_1}.
572 kIdPinsrd, //!< Instruction 'pinsrd' {SSE4_1}.
573 kIdPinsrq, //!< Instruction 'pinsrq' {SSE4_1} (X64).
574 kIdPinsrw, //!< Instruction 'pinsrw' {MMX2|SSE2}.
575 kIdPmaddubsw, //!< Instruction 'pmaddubsw' {SSSE3}.
576 kIdPmaddwd, //!< Instruction 'pmaddwd' {MMX|SSE2}.
577 kIdPmaxsb, //!< Instruction 'pmaxsb' {SSE4_1}.
578 kIdPmaxsd, //!< Instruction 'pmaxsd' {SSE4_1}.
579 kIdPmaxsw, //!< Instruction 'pmaxsw' {MMX2|SSE2}.
580 kIdPmaxub, //!< Instruction 'pmaxub' {MMX2|SSE2}.
581 kIdPmaxud, //!< Instruction 'pmaxud' {SSE4_1}.
582 kIdPmaxuw, //!< Instruction 'pmaxuw' {SSE4_1}.
583 kIdPminsb, //!< Instruction 'pminsb' {SSE4_1}.
584 kIdPminsd, //!< Instruction 'pminsd' {SSE4_1}.
585 kIdPminsw, //!< Instruction 'pminsw' {MMX2|SSE2}.
586 kIdPminub, //!< Instruction 'pminub' {MMX2|SSE2}.
587 kIdPminud, //!< Instruction 'pminud' {SSE4_1}.
588 kIdPminuw, //!< Instruction 'pminuw' {SSE4_1}.
589 kIdPmovmskb, //!< Instruction 'pmovmskb' {MMX2|SSE2}.
590 kIdPmovsxbd, //!< Instruction 'pmovsxbd' {SSE4_1}.
591 kIdPmovsxbq, //!< Instruction 'pmovsxbq' {SSE4_1}.
592 kIdPmovsxbw, //!< Instruction 'pmovsxbw' {SSE4_1}.
593 kIdPmovsxdq, //!< Instruction 'pmovsxdq' {SSE4_1}.
594 kIdPmovsxwd, //!< Instruction 'pmovsxwd' {SSE4_1}.
595 kIdPmovsxwq, //!< Instruction 'pmovsxwq' {SSE4_1}.
596 kIdPmovzxbd, //!< Instruction 'pmovzxbd' {SSE4_1}.
597 kIdPmovzxbq, //!< Instruction 'pmovzxbq' {SSE4_1}.
598 kIdPmovzxbw, //!< Instruction 'pmovzxbw' {SSE4_1}.
599 kIdPmovzxdq, //!< Instruction 'pmovzxdq' {SSE4_1}.
600 kIdPmovzxwd, //!< Instruction 'pmovzxwd' {SSE4_1}.
601 kIdPmovzxwq, //!< Instruction 'pmovzxwq' {SSE4_1}.
602 kIdPmuldq, //!< Instruction 'pmuldq' {SSE4_1}.
603 kIdPmulhrsw, //!< Instruction 'pmulhrsw' {SSSE3}.
604 kIdPmulhrw, //!< Instruction 'pmulhrw' {3DNOW}.
605 kIdPmulhuw, //!< Instruction 'pmulhuw' {MMX2|SSE2}.
606 kIdPmulhw, //!< Instruction 'pmulhw' {MMX|SSE2}.
607 kIdPmulld, //!< Instruction 'pmulld' {SSE4_1}.
608 kIdPmullw, //!< Instruction 'pmullw' {MMX|SSE2}.
609 kIdPmuludq, //!< Instruction 'pmuludq' {SSE2}.
610 kIdPop, //!< Instruction 'pop'.
611 kIdPopa, //!< Instruction 'popa' (X86).
612 kIdPopad, //!< Instruction 'popad' (X86).
613 kIdPopcnt, //!< Instruction 'popcnt' {POPCNT}.
614 kIdPopf, //!< Instruction 'popf'.
615 kIdPopfd, //!< Instruction 'popfd' (X86).
616 kIdPopfq, //!< Instruction 'popfq' (X64).
617 kIdPor, //!< Instruction 'por' {MMX|SSE2}.
618 kIdPrefetch, //!< Instruction 'prefetch' {3DNOW}.
619 kIdPrefetchnta, //!< Instruction 'prefetchnta' {MMX2}.
620 kIdPrefetcht0, //!< Instruction 'prefetcht0' {MMX2}.
621 kIdPrefetcht1, //!< Instruction 'prefetcht1' {MMX2}.
622 kIdPrefetcht2, //!< Instruction 'prefetcht2' {MMX2}.
623 kIdPrefetchw, //!< Instruction 'prefetchw' {PREFETCHW}.
624 kIdPrefetchwt1, //!< Instruction 'prefetchwt1' {PREFETCHWT1}.
625 kIdPsadbw, //!< Instruction 'psadbw' {MMX2|SSE2}.
626 kIdPshufb, //!< Instruction 'pshufb' {SSSE3}.
627 kIdPshufd, //!< Instruction 'pshufd' {SSE2}.
628 kIdPshufhw, //!< Instruction 'pshufhw' {SSE2}.
629 kIdPshuflw, //!< Instruction 'pshuflw' {SSE2}.
630 kIdPshufw, //!< Instruction 'pshufw' {MMX2}.
631 kIdPsignb, //!< Instruction 'psignb' {SSSE3}.
632 kIdPsignd, //!< Instruction 'psignd' {SSSE3}.
633 kIdPsignw, //!< Instruction 'psignw' {SSSE3}.
634 kIdPslld, //!< Instruction 'pslld' {MMX|SSE2}.
635 kIdPslldq, //!< Instruction 'pslldq' {SSE2}.
636 kIdPsllq, //!< Instruction 'psllq' {MMX|SSE2}.
637 kIdPsllw, //!< Instruction 'psllw' {MMX|SSE2}.
638 kIdPsrad, //!< Instruction 'psrad' {MMX|SSE2}.
639 kIdPsraw, //!< Instruction 'psraw' {MMX|SSE2}.
640 kIdPsrld, //!< Instruction 'psrld' {MMX|SSE2}.
641 kIdPsrldq, //!< Instruction 'psrldq' {SSE2}.
642 kIdPsrlq, //!< Instruction 'psrlq' {MMX|SSE2}.
643 kIdPsrlw, //!< Instruction 'psrlw' {MMX|SSE2}.
644 kIdPsubb, //!< Instruction 'psubb' {MMX|SSE2}.
645 kIdPsubd, //!< Instruction 'psubd' {MMX|SSE2}.
646 kIdPsubq, //!< Instruction 'psubq' {SSE2}.
647 kIdPsubsb, //!< Instruction 'psubsb' {MMX|SSE2}.
648 kIdPsubsw, //!< Instruction 'psubsw' {MMX|SSE2}.
649 kIdPsubusb, //!< Instruction 'psubusb' {MMX|SSE2}.
650 kIdPsubusw, //!< Instruction 'psubusw' {MMX|SSE2}.
651 kIdPsubw, //!< Instruction 'psubw' {MMX|SSE2}.
652 kIdPswapd, //!< Instruction 'pswapd' {3DNOW2}.
653 kIdPtest, //!< Instruction 'ptest' {SSE4_1}.
654 kIdPunpckhbw, //!< Instruction 'punpckhbw' {MMX|SSE2}.
655 kIdPunpckhdq, //!< Instruction 'punpckhdq' {MMX|SSE2}.
656 kIdPunpckhqdq, //!< Instruction 'punpckhqdq' {SSE2}.
657 kIdPunpckhwd, //!< Instruction 'punpckhwd' {MMX|SSE2}.
658 kIdPunpcklbw, //!< Instruction 'punpcklbw' {MMX|SSE2}.
659 kIdPunpckldq, //!< Instruction 'punpckldq' {MMX|SSE2}.
660 kIdPunpcklqdq, //!< Instruction 'punpcklqdq' {SSE2}.
661 kIdPunpcklwd, //!< Instruction 'punpcklwd' {MMX|SSE2}.
662 kIdPush, //!< Instruction 'push'.
663 kIdPusha, //!< Instruction 'pusha' (X86).
664 kIdPushad, //!< Instruction 'pushad' (X86).
665 kIdPushf, //!< Instruction 'pushf'.
666 kIdPushfd, //!< Instruction 'pushfd' (X86).
667 kIdPushfq, //!< Instruction 'pushfq' (X64).
668 kIdPxor, //!< Instruction 'pxor' {MMX|SSE2}.
669 kIdRcl, //!< Instruction 'rcl'.
670 kIdRcpps, //!< Instruction 'rcpps' {SSE}.
671 kIdRcpss, //!< Instruction 'rcpss' {SSE}.
672 kIdRcr, //!< Instruction 'rcr'.
673 kIdRdfsbase, //!< Instruction 'rdfsbase' {FSGSBASE} (X64).
674 kIdRdgsbase, //!< Instruction 'rdgsbase' {FSGSBASE} (X64).
675 kIdRdmsr, //!< Instruction 'rdmsr' {MSR}.
676 kIdRdpid, //!< Instruction 'rdpid' {RDPID}.
677 kIdRdpmc, //!< Instruction 'rdpmc'.
678 kIdRdrand, //!< Instruction 'rdrand' {RDRAND}.
679 kIdRdseed, //!< Instruction 'rdseed' {RDSEED}.
680 kIdRdtsc, //!< Instruction 'rdtsc' {RDTSC}.
681 kIdRdtscp, //!< Instruction 'rdtscp' {RDTSCP}.
682 kIdRet, //!< Instruction 'ret'.
683 kIdRol, //!< Instruction 'rol'.
684 kIdRor, //!< Instruction 'ror'.
685 kIdRorx, //!< Instruction 'rorx' {BMI2}.
686 kIdRoundpd, //!< Instruction 'roundpd' {SSE4_1}.
687 kIdRoundps, //!< Instruction 'roundps' {SSE4_1}.
688 kIdRoundsd, //!< Instruction 'roundsd' {SSE4_1}.
689 kIdRoundss, //!< Instruction 'roundss' {SSE4_1}.
690 kIdRsm, //!< Instruction 'rsm' (X86).
691 kIdRsqrtps, //!< Instruction 'rsqrtps' {SSE}.
692 kIdRsqrtss, //!< Instruction 'rsqrtss' {SSE}.
693 kIdSahf, //!< Instruction 'sahf' {LAHFSAHF}.
694 kIdSal, //!< Instruction 'sal'.
695 kIdSar, //!< Instruction 'sar'.
696 kIdSarx, //!< Instruction 'sarx' {BMI2}.
697 kIdSbb, //!< Instruction 'sbb'.
698 kIdScas, //!< Instruction 'scas'.
699 kIdSeta, //!< Instruction 'seta'.
700 kIdSetae, //!< Instruction 'setae'.
701 kIdSetb, //!< Instruction 'setb'.
702 kIdSetbe, //!< Instruction 'setbe'.
703 kIdSetc, //!< Instruction 'setc'.
704 kIdSete, //!< Instruction 'sete'.
705 kIdSetg, //!< Instruction 'setg'.
706 kIdSetge, //!< Instruction 'setge'.
707 kIdSetl, //!< Instruction 'setl'.
708 kIdSetle, //!< Instruction 'setle'.
709 kIdSetna, //!< Instruction 'setna'.
710 kIdSetnae, //!< Instruction 'setnae'.
711 kIdSetnb, //!< Instruction 'setnb'.
712 kIdSetnbe, //!< Instruction 'setnbe'.
713 kIdSetnc, //!< Instruction 'setnc'.
714 kIdSetne, //!< Instruction 'setne'.
715 kIdSetng, //!< Instruction 'setng'.
716 kIdSetnge, //!< Instruction 'setnge'.
717 kIdSetnl, //!< Instruction 'setnl'.
718 kIdSetnle, //!< Instruction 'setnle'.
719 kIdSetno, //!< Instruction 'setno'.
720 kIdSetnp, //!< Instruction 'setnp'.
721 kIdSetns, //!< Instruction 'setns'.
722 kIdSetnz, //!< Instruction 'setnz'.
723 kIdSeto, //!< Instruction 'seto'.
724 kIdSetp, //!< Instruction 'setp'.
725 kIdSetpe, //!< Instruction 'setpe'.
726 kIdSetpo, //!< Instruction 'setpo'.
727 kIdSets, //!< Instruction 'sets'.
728 kIdSetz, //!< Instruction 'setz'.
729 kIdSfence, //!< Instruction 'sfence' {MMX2}.
730 kIdSgdt, //!< Instruction 'sgdt'.
731 kIdSha1msg1, //!< Instruction 'sha1msg1' {SHA}.
732 kIdSha1msg2, //!< Instruction 'sha1msg2' {SHA}.
733 kIdSha1nexte, //!< Instruction 'sha1nexte' {SHA}.
734 kIdSha1rnds4, //!< Instruction 'sha1rnds4' {SHA}.
735 kIdSha256msg1, //!< Instruction 'sha256msg1' {SHA}.
736 kIdSha256msg2, //!< Instruction 'sha256msg2' {SHA}.
737 kIdSha256rnds2, //!< Instruction 'sha256rnds2' {SHA}.
738 kIdShl, //!< Instruction 'shl'.
739 kIdShld, //!< Instruction 'shld'.
740 kIdShlx, //!< Instruction 'shlx' {BMI2}.
741 kIdShr, //!< Instruction 'shr'.
742 kIdShrd, //!< Instruction 'shrd'.
743 kIdShrx, //!< Instruction 'shrx' {BMI2}.
744 kIdShufpd, //!< Instruction 'shufpd' {SSE2}.
745 kIdShufps, //!< Instruction 'shufps' {SSE}.
746 kIdSidt, //!< Instruction 'sidt'.
747 kIdSkinit, //!< Instruction 'skinit' {SKINIT}.
748 kIdSldt, //!< Instruction 'sldt'.
749 kIdSlwpcb, //!< Instruction 'slwpcb' {LWP}.
750 kIdSmsw, //!< Instruction 'smsw'.
751 kIdSqrtpd, //!< Instruction 'sqrtpd' {SSE2}.
752 kIdSqrtps, //!< Instruction 'sqrtps' {SSE}.
753 kIdSqrtsd, //!< Instruction 'sqrtsd' {SSE2}.
754 kIdSqrtss, //!< Instruction 'sqrtss' {SSE}.
755 kIdStac, //!< Instruction 'stac' {SMAP}.
756 kIdStc, //!< Instruction 'stc'.
757 kIdStd, //!< Instruction 'std'.
758 kIdStgi, //!< Instruction 'stgi' {SKINIT}.
759 kIdSti, //!< Instruction 'sti'.
760 kIdStmxcsr, //!< Instruction 'stmxcsr' {SSE}.
761 kIdStos, //!< Instruction 'stos'.
762 kIdStr, //!< Instruction 'str'.
763 kIdSub, //!< Instruction 'sub'.
764 kIdSubpd, //!< Instruction 'subpd' {SSE2}.
765 kIdSubps, //!< Instruction 'subps' {SSE}.
766 kIdSubsd, //!< Instruction 'subsd' {SSE2}.
767 kIdSubss, //!< Instruction 'subss' {SSE}.
768 kIdSwapgs, //!< Instruction 'swapgs' (X64).
769 kIdSyscall, //!< Instruction 'syscall' (X64).
770 kIdSysenter, //!< Instruction 'sysenter'.
771 kIdSysexit, //!< Instruction 'sysexit'.
772 kIdSysexit64, //!< Instruction 'sysexit64'.
773 kIdSysret, //!< Instruction 'sysret' (X64).
774 kIdSysret64, //!< Instruction 'sysret64' (X64).
775 kIdT1mskc, //!< Instruction 't1mskc' {TBM}.
776 kIdTest, //!< Instruction 'test'.
777 kIdTzcnt, //!< Instruction 'tzcnt' {BMI}.
778 kIdTzmsk, //!< Instruction 'tzmsk' {TBM}.
779 kIdUcomisd, //!< Instruction 'ucomisd' {SSE2}.
780 kIdUcomiss, //!< Instruction 'ucomiss' {SSE}.
781 kIdUd2, //!< Instruction 'ud2'.
782 kIdUnpckhpd, //!< Instruction 'unpckhpd' {SSE2}.
783 kIdUnpckhps, //!< Instruction 'unpckhps' {SSE}.
784 kIdUnpcklpd, //!< Instruction 'unpcklpd' {SSE2}.
785 kIdUnpcklps, //!< Instruction 'unpcklps' {SSE}.
786 kIdV4fmaddps, //!< Instruction 'v4fmaddps' {AVX512_4FMAPS}.
787 kIdV4fmaddss, //!< Instruction 'v4fmaddss' {AVX512_4FMAPS}.
788 kIdV4fnmaddps, //!< Instruction 'v4fnmaddps' {AVX512_4FMAPS}.
789 kIdV4fnmaddss, //!< Instruction 'v4fnmaddss' {AVX512_4FMAPS}.
790 kIdVaddpd, //!< Instruction 'vaddpd' {AVX|AVX512_F+VL}.
791 kIdVaddps, //!< Instruction 'vaddps' {AVX|AVX512_F+VL}.
792 kIdVaddsd, //!< Instruction 'vaddsd' {AVX|AVX512_F}.
793 kIdVaddss, //!< Instruction 'vaddss' {AVX|AVX512_F}.
794 kIdVaddsubpd, //!< Instruction 'vaddsubpd' {AVX}.
795 kIdVaddsubps, //!< Instruction 'vaddsubps' {AVX}.
796 kIdVaesdec, //!< Instruction 'vaesdec' {AVX|AVX512_F+VL & AESNI|VAES}.
797 kIdVaesdeclast, //!< Instruction 'vaesdeclast' {AVX|AVX512_F+VL & AESNI|VAES}.
798 kIdVaesenc, //!< Instruction 'vaesenc' {AVX|AVX512_F+VL & AESNI|VAES}.
799 kIdVaesenclast, //!< Instruction 'vaesenclast' {AVX|AVX512_F+VL & AESNI|VAES}.
800 kIdVaesimc, //!< Instruction 'vaesimc' {AVX & AESNI}.
801 kIdVaeskeygenassist, //!< Instruction 'vaeskeygenassist' {AVX & AESNI}.
802 kIdValignd, //!< Instruction 'valignd' {AVX512_F+VL}.
803 kIdValignq, //!< Instruction 'valignq' {AVX512_F+VL}.
804 kIdVandnpd, //!< Instruction 'vandnpd' {AVX|AVX512_DQ+VL}.
805 kIdVandnps, //!< Instruction 'vandnps' {AVX|AVX512_DQ+VL}.
806 kIdVandpd, //!< Instruction 'vandpd' {AVX|AVX512_DQ+VL}.
807 kIdVandps, //!< Instruction 'vandps' {AVX|AVX512_DQ+VL}.
808 kIdVblendmb, //!< Instruction 'vblendmb' {AVX512_BW+VL}.
809 kIdVblendmd, //!< Instruction 'vblendmd' {AVX512_F+VL}.
810 kIdVblendmpd, //!< Instruction 'vblendmpd' {AVX512_F+VL}.
811 kIdVblendmps, //!< Instruction 'vblendmps' {AVX512_F+VL}.
812 kIdVblendmq, //!< Instruction 'vblendmq' {AVX512_F+VL}.
813 kIdVblendmw, //!< Instruction 'vblendmw' {AVX512_BW+VL}.
814 kIdVblendpd, //!< Instruction 'vblendpd' {AVX}.
815 kIdVblendps, //!< Instruction 'vblendps' {AVX}.
816 kIdVblendvpd, //!< Instruction 'vblendvpd' {AVX}.
817 kIdVblendvps, //!< Instruction 'vblendvps' {AVX}.
818 kIdVbroadcastf128, //!< Instruction 'vbroadcastf128' {AVX}.
819 kIdVbroadcastf32x2, //!< Instruction 'vbroadcastf32x2' {AVX512_DQ+VL}.
820 kIdVbroadcastf32x4, //!< Instruction 'vbroadcastf32x4' {AVX512_F}.
821 kIdVbroadcastf32x8, //!< Instruction 'vbroadcastf32x8' {AVX512_DQ}.
822 kIdVbroadcastf64x2, //!< Instruction 'vbroadcastf64x2' {AVX512_DQ+VL}.
823 kIdVbroadcastf64x4, //!< Instruction 'vbroadcastf64x4' {AVX512_F}.
824 kIdVbroadcasti128, //!< Instruction 'vbroadcasti128' {AVX2}.
825 kIdVbroadcasti32x2, //!< Instruction 'vbroadcasti32x2' {AVX512_DQ+VL}.
826 kIdVbroadcasti32x4, //!< Instruction 'vbroadcasti32x4' {AVX512_F+VL}.
827 kIdVbroadcasti32x8, //!< Instruction 'vbroadcasti32x8' {AVX512_DQ}.
828 kIdVbroadcasti64x2, //!< Instruction 'vbroadcasti64x2' {AVX512_DQ+VL}.
829 kIdVbroadcasti64x4, //!< Instruction 'vbroadcasti64x4' {AVX512_F}.
830 kIdVbroadcastsd, //!< Instruction 'vbroadcastsd' {AVX|AVX2|AVX512_F+VL}.
831 kIdVbroadcastss, //!< Instruction 'vbroadcastss' {AVX|AVX2|AVX512_F+VL}.
832 kIdVcmppd, //!< Instruction 'vcmppd' {AVX|AVX512_F+VL}.
833 kIdVcmpps, //!< Instruction 'vcmpps' {AVX|AVX512_F+VL}.
834 kIdVcmpsd, //!< Instruction 'vcmpsd' {AVX|AVX512_F}.
835 kIdVcmpss, //!< Instruction 'vcmpss' {AVX|AVX512_F}.
836 kIdVcomisd, //!< Instruction 'vcomisd' {AVX|AVX512_F}.
837 kIdVcomiss, //!< Instruction 'vcomiss' {AVX|AVX512_F}.
838 kIdVcompresspd, //!< Instruction 'vcompresspd' {AVX512_F+VL}.
839 kIdVcompressps, //!< Instruction 'vcompressps' {AVX512_F+VL}.
840 kIdVcvtdq2pd, //!< Instruction 'vcvtdq2pd' {AVX|AVX512_F+VL}.
841 kIdVcvtdq2ps, //!< Instruction 'vcvtdq2ps' {AVX|AVX512_F+VL}.
842 kIdVcvtne2ps2bf16, //!< Instruction 'vcvtne2ps2bf16' {AVX512_BF16+VL}.
843 kIdVcvtneps2bf16, //!< Instruction 'vcvtneps2bf16' {AVX512_BF16+VL}.
844 kIdVcvtpd2dq, //!< Instruction 'vcvtpd2dq' {AVX|AVX512_F+VL}.
845 kIdVcvtpd2ps, //!< Instruction 'vcvtpd2ps' {AVX|AVX512_F+VL}.
846 kIdVcvtpd2qq, //!< Instruction 'vcvtpd2qq' {AVX512_DQ+VL}.
847 kIdVcvtpd2udq, //!< Instruction 'vcvtpd2udq' {AVX512_F+VL}.
848 kIdVcvtpd2uqq, //!< Instruction 'vcvtpd2uqq' {AVX512_DQ+VL}.
849 kIdVcvtph2ps, //!< Instruction 'vcvtph2ps' {AVX512_F+VL & F16C}.
850 kIdVcvtps2dq, //!< Instruction 'vcvtps2dq' {AVX|AVX512_F+VL}.
851 kIdVcvtps2pd, //!< Instruction 'vcvtps2pd' {AVX|AVX512_F+VL}.
852 kIdVcvtps2ph, //!< Instruction 'vcvtps2ph' {AVX512_F+VL & F16C}.
853 kIdVcvtps2qq, //!< Instruction 'vcvtps2qq' {AVX512_DQ+VL}.
854 kIdVcvtps2udq, //!< Instruction 'vcvtps2udq' {AVX512_F+VL}.
855 kIdVcvtps2uqq, //!< Instruction 'vcvtps2uqq' {AVX512_DQ+VL}.
856 kIdVcvtqq2pd, //!< Instruction 'vcvtqq2pd' {AVX512_DQ+VL}.
857 kIdVcvtqq2ps, //!< Instruction 'vcvtqq2ps' {AVX512_DQ+VL}.
858 kIdVcvtsd2si, //!< Instruction 'vcvtsd2si' {AVX|AVX512_F}.
859 kIdVcvtsd2ss, //!< Instruction 'vcvtsd2ss' {AVX|AVX512_F}.
860 kIdVcvtsd2usi, //!< Instruction 'vcvtsd2usi' {AVX512_F}.
861 kIdVcvtsi2sd, //!< Instruction 'vcvtsi2sd' {AVX|AVX512_F}.
862 kIdVcvtsi2ss, //!< Instruction 'vcvtsi2ss' {AVX|AVX512_F}.
863 kIdVcvtss2sd, //!< Instruction 'vcvtss2sd' {AVX|AVX512_F}.
864 kIdVcvtss2si, //!< Instruction 'vcvtss2si' {AVX|AVX512_F}.
865 kIdVcvtss2usi, //!< Instruction 'vcvtss2usi' {AVX512_F}.
866 kIdVcvttpd2dq, //!< Instruction 'vcvttpd2dq' {AVX|AVX512_F+VL}.
867 kIdVcvttpd2qq, //!< Instruction 'vcvttpd2qq' {AVX512_F+VL}.
868 kIdVcvttpd2udq, //!< Instruction 'vcvttpd2udq' {AVX512_F+VL}.
869 kIdVcvttpd2uqq, //!< Instruction 'vcvttpd2uqq' {AVX512_DQ+VL}.
870 kIdVcvttps2dq, //!< Instruction 'vcvttps2dq' {AVX|AVX512_F+VL}.
871 kIdVcvttps2qq, //!< Instruction 'vcvttps2qq' {AVX512_DQ+VL}.
872 kIdVcvttps2udq, //!< Instruction 'vcvttps2udq' {AVX512_F+VL}.
873 kIdVcvttps2uqq, //!< Instruction 'vcvttps2uqq' {AVX512_DQ+VL}.
874 kIdVcvttsd2si, //!< Instruction 'vcvttsd2si' {AVX|AVX512_F}.
875 kIdVcvttsd2usi, //!< Instruction 'vcvttsd2usi' {AVX512_F}.
876 kIdVcvttss2si, //!< Instruction 'vcvttss2si' {AVX|AVX512_F}.
877 kIdVcvttss2usi, //!< Instruction 'vcvttss2usi' {AVX512_F}.
878 kIdVcvtudq2pd, //!< Instruction 'vcvtudq2pd' {AVX512_F+VL}.
879 kIdVcvtudq2ps, //!< Instruction 'vcvtudq2ps' {AVX512_F+VL}.
880 kIdVcvtuqq2pd, //!< Instruction 'vcvtuqq2pd' {AVX512_DQ+VL}.
881 kIdVcvtuqq2ps, //!< Instruction 'vcvtuqq2ps' {AVX512_DQ+VL}.
882 kIdVcvtusi2sd, //!< Instruction 'vcvtusi2sd' {AVX512_F}.
883 kIdVcvtusi2ss, //!< Instruction 'vcvtusi2ss' {AVX512_F}.
884 kIdVdbpsadbw, //!< Instruction 'vdbpsadbw' {AVX512_BW+VL}.
885 kIdVdivpd, //!< Instruction 'vdivpd' {AVX|AVX512_F+VL}.
886 kIdVdivps, //!< Instruction 'vdivps' {AVX|AVX512_F+VL}.
887 kIdVdivsd, //!< Instruction 'vdivsd' {AVX|AVX512_F}.
888 kIdVdivss, //!< Instruction 'vdivss' {AVX|AVX512_F}.
889 kIdVdpbf16ps, //!< Instruction 'vdpbf16ps' {AVX512_BF16+VL}.
890 kIdVdppd, //!< Instruction 'vdppd' {AVX}.
891 kIdVdpps, //!< Instruction 'vdpps' {AVX}.
892 kIdVerr, //!< Instruction 'verr'.
893 kIdVerw, //!< Instruction 'verw'.
894 kIdVexp2pd, //!< Instruction 'vexp2pd' {AVX512_ERI}.
895 kIdVexp2ps, //!< Instruction 'vexp2ps' {AVX512_ERI}.
896 kIdVexpandpd, //!< Instruction 'vexpandpd' {AVX512_F+VL}.
897 kIdVexpandps, //!< Instruction 'vexpandps' {AVX512_F+VL}.
898 kIdVextractf128, //!< Instruction 'vextractf128' {AVX}.
899 kIdVextractf32x4, //!< Instruction 'vextractf32x4' {AVX512_F+VL}.
900 kIdVextractf32x8, //!< Instruction 'vextractf32x8' {AVX512_DQ}.
901 kIdVextractf64x2, //!< Instruction 'vextractf64x2' {AVX512_DQ+VL}.
902 kIdVextractf64x4, //!< Instruction 'vextractf64x4' {AVX512_F}.
903 kIdVextracti128, //!< Instruction 'vextracti128' {AVX2}.
904 kIdVextracti32x4, //!< Instruction 'vextracti32x4' {AVX512_F+VL}.
905 kIdVextracti32x8, //!< Instruction 'vextracti32x8' {AVX512_DQ}.
906 kIdVextracti64x2, //!< Instruction 'vextracti64x2' {AVX512_DQ+VL}.
907 kIdVextracti64x4, //!< Instruction 'vextracti64x4' {AVX512_F}.
908 kIdVextractps, //!< Instruction 'vextractps' {AVX|AVX512_F}.
909 kIdVfixupimmpd, //!< Instruction 'vfixupimmpd' {AVX512_F+VL}.
910 kIdVfixupimmps, //!< Instruction 'vfixupimmps' {AVX512_F+VL}.
911 kIdVfixupimmsd, //!< Instruction 'vfixupimmsd' {AVX512_F}.
912 kIdVfixupimmss, //!< Instruction 'vfixupimmss' {AVX512_F}.
913 kIdVfmadd132pd, //!< Instruction 'vfmadd132pd' {FMA|AVX512_F+VL}.
914 kIdVfmadd132ps, //!< Instruction 'vfmadd132ps' {FMA|AVX512_F+VL}.
915 kIdVfmadd132sd, //!< Instruction 'vfmadd132sd' {FMA|AVX512_F}.
916 kIdVfmadd132ss, //!< Instruction 'vfmadd132ss' {FMA|AVX512_F}.
917 kIdVfmadd213pd, //!< Instruction 'vfmadd213pd' {FMA|AVX512_F+VL}.
918 kIdVfmadd213ps, //!< Instruction 'vfmadd213ps' {FMA|AVX512_F+VL}.
919 kIdVfmadd213sd, //!< Instruction 'vfmadd213sd' {FMA|AVX512_F}.
920 kIdVfmadd213ss, //!< Instruction 'vfmadd213ss' {FMA|AVX512_F}.
921 kIdVfmadd231pd, //!< Instruction 'vfmadd231pd' {FMA|AVX512_F+VL}.
922 kIdVfmadd231ps, //!< Instruction 'vfmadd231ps' {FMA|AVX512_F+VL}.
923 kIdVfmadd231sd, //!< Instruction 'vfmadd231sd' {FMA|AVX512_F}.
924 kIdVfmadd231ss, //!< Instruction 'vfmadd231ss' {FMA|AVX512_F}.
925 kIdVfmaddpd, //!< Instruction 'vfmaddpd' {FMA4}.
926 kIdVfmaddps, //!< Instruction 'vfmaddps' {FMA4}.
927 kIdVfmaddsd, //!< Instruction 'vfmaddsd' {FMA4}.
928 kIdVfmaddss, //!< Instruction 'vfmaddss' {FMA4}.
929 kIdVfmaddsub132pd, //!< Instruction 'vfmaddsub132pd' {FMA|AVX512_F+VL}.
930 kIdVfmaddsub132ps, //!< Instruction 'vfmaddsub132ps' {FMA|AVX512_F+VL}.
931 kIdVfmaddsub213pd, //!< Instruction 'vfmaddsub213pd' {FMA|AVX512_F+VL}.
932 kIdVfmaddsub213ps, //!< Instruction 'vfmaddsub213ps' {FMA|AVX512_F+VL}.
933 kIdVfmaddsub231pd, //!< Instruction 'vfmaddsub231pd' {FMA|AVX512_F+VL}.
934 kIdVfmaddsub231ps, //!< Instruction 'vfmaddsub231ps' {FMA|AVX512_F+VL}.
935 kIdVfmaddsubpd, //!< Instruction 'vfmaddsubpd' {FMA4}.
936 kIdVfmaddsubps, //!< Instruction 'vfmaddsubps' {FMA4}.
937 kIdVfmsub132pd, //!< Instruction 'vfmsub132pd' {FMA|AVX512_F+VL}.
938 kIdVfmsub132ps, //!< Instruction 'vfmsub132ps' {FMA|AVX512_F+VL}.
939 kIdVfmsub132sd, //!< Instruction 'vfmsub132sd' {FMA|AVX512_F}.
940 kIdVfmsub132ss, //!< Instruction 'vfmsub132ss' {FMA|AVX512_F}.
941 kIdVfmsub213pd, //!< Instruction 'vfmsub213pd' {FMA|AVX512_F+VL}.
942 kIdVfmsub213ps, //!< Instruction 'vfmsub213ps' {FMA|AVX512_F+VL}.
943 kIdVfmsub213sd, //!< Instruction 'vfmsub213sd' {FMA|AVX512_F}.
944 kIdVfmsub213ss, //!< Instruction 'vfmsub213ss' {FMA|AVX512_F}.
945 kIdVfmsub231pd, //!< Instruction 'vfmsub231pd' {FMA|AVX512_F+VL}.
946 kIdVfmsub231ps, //!< Instruction 'vfmsub231ps' {FMA|AVX512_F+VL}.
947 kIdVfmsub231sd, //!< Instruction 'vfmsub231sd' {FMA|AVX512_F}.
948 kIdVfmsub231ss, //!< Instruction 'vfmsub231ss' {FMA|AVX512_F}.
949 kIdVfmsubadd132pd, //!< Instruction 'vfmsubadd132pd' {FMA|AVX512_F+VL}.
950 kIdVfmsubadd132ps, //!< Instruction 'vfmsubadd132ps' {FMA|AVX512_F+VL}.
951 kIdVfmsubadd213pd, //!< Instruction 'vfmsubadd213pd' {FMA|AVX512_F+VL}.
952 kIdVfmsubadd213ps, //!< Instruction 'vfmsubadd213ps' {FMA|AVX512_F+VL}.
953 kIdVfmsubadd231pd, //!< Instruction 'vfmsubadd231pd' {FMA|AVX512_F+VL}.
954 kIdVfmsubadd231ps, //!< Instruction 'vfmsubadd231ps' {FMA|AVX512_F+VL}.
955 kIdVfmsubaddpd, //!< Instruction 'vfmsubaddpd' {FMA4}.
956 kIdVfmsubaddps, //!< Instruction 'vfmsubaddps' {FMA4}.
957 kIdVfmsubpd, //!< Instruction 'vfmsubpd' {FMA4}.
958 kIdVfmsubps, //!< Instruction 'vfmsubps' {FMA4}.
959 kIdVfmsubsd, //!< Instruction 'vfmsubsd' {FMA4}.
960 kIdVfmsubss, //!< Instruction 'vfmsubss' {FMA4}.
961 kIdVfnmadd132pd, //!< Instruction 'vfnmadd132pd' {FMA|AVX512_F+VL}.
962 kIdVfnmadd132ps, //!< Instruction 'vfnmadd132ps' {FMA|AVX512_F+VL}.
963 kIdVfnmadd132sd, //!< Instruction 'vfnmadd132sd' {FMA|AVX512_F}.
964 kIdVfnmadd132ss, //!< Instruction 'vfnmadd132ss' {FMA|AVX512_F}.
965 kIdVfnmadd213pd, //!< Instruction 'vfnmadd213pd' {FMA|AVX512_F+VL}.
966 kIdVfnmadd213ps, //!< Instruction 'vfnmadd213ps' {FMA|AVX512_F+VL}.
967 kIdVfnmadd213sd, //!< Instruction 'vfnmadd213sd' {FMA|AVX512_F}.
968 kIdVfnmadd213ss, //!< Instruction 'vfnmadd213ss' {FMA|AVX512_F}.
969 kIdVfnmadd231pd, //!< Instruction 'vfnmadd231pd' {FMA|AVX512_F+VL}.
970 kIdVfnmadd231ps, //!< Instruction 'vfnmadd231ps' {FMA|AVX512_F+VL}.
971 kIdVfnmadd231sd, //!< Instruction 'vfnmadd231sd' {FMA|AVX512_F}.
972 kIdVfnmadd231ss, //!< Instruction 'vfnmadd231ss' {FMA|AVX512_F}.
973 kIdVfnmaddpd, //!< Instruction 'vfnmaddpd' {FMA4}.
974 kIdVfnmaddps, //!< Instruction 'vfnmaddps' {FMA4}.
975 kIdVfnmaddsd, //!< Instruction 'vfnmaddsd' {FMA4}.
976 kIdVfnmaddss, //!< Instruction 'vfnmaddss' {FMA4}.
977 kIdVfnmsub132pd, //!< Instruction 'vfnmsub132pd' {FMA|AVX512_F+VL}.
978 kIdVfnmsub132ps, //!< Instruction 'vfnmsub132ps' {FMA|AVX512_F+VL}.
979 kIdVfnmsub132sd, //!< Instruction 'vfnmsub132sd' {FMA|AVX512_F}.
980 kIdVfnmsub132ss, //!< Instruction 'vfnmsub132ss' {FMA|AVX512_F}.
981 kIdVfnmsub213pd, //!< Instruction 'vfnmsub213pd' {FMA|AVX512_F+VL}.
982 kIdVfnmsub213ps, //!< Instruction 'vfnmsub213ps' {FMA|AVX512_F+VL}.
983 kIdVfnmsub213sd, //!< Instruction 'vfnmsub213sd' {FMA|AVX512_F}.
984 kIdVfnmsub213ss, //!< Instruction 'vfnmsub213ss' {FMA|AVX512_F}.
985 kIdVfnmsub231pd, //!< Instruction 'vfnmsub231pd' {FMA|AVX512_F+VL}.
986 kIdVfnmsub231ps, //!< Instruction 'vfnmsub231ps' {FMA|AVX512_F+VL}.
987 kIdVfnmsub231sd, //!< Instruction 'vfnmsub231sd' {FMA|AVX512_F}.
988 kIdVfnmsub231ss, //!< Instruction 'vfnmsub231ss' {FMA|AVX512_F}.
989 kIdVfnmsubpd, //!< Instruction 'vfnmsubpd' {FMA4}.
990 kIdVfnmsubps, //!< Instruction 'vfnmsubps' {FMA4}.
991 kIdVfnmsubsd, //!< Instruction 'vfnmsubsd' {FMA4}.
992 kIdVfnmsubss, //!< Instruction 'vfnmsubss' {FMA4}.
993 kIdVfpclasspd, //!< Instruction 'vfpclasspd' {AVX512_DQ+VL}.
994 kIdVfpclassps, //!< Instruction 'vfpclassps' {AVX512_DQ+VL}.
995 kIdVfpclasssd, //!< Instruction 'vfpclasssd' {AVX512_DQ}.
996 kIdVfpclassss, //!< Instruction 'vfpclassss' {AVX512_DQ}.
997 kIdVfrczpd, //!< Instruction 'vfrczpd' {XOP}.
998 kIdVfrczps, //!< Instruction 'vfrczps' {XOP}.
999 kIdVfrczsd, //!< Instruction 'vfrczsd' {XOP}.
1000 kIdVfrczss, //!< Instruction 'vfrczss' {XOP}.
1001 kIdVgatherdpd, //!< Instruction 'vgatherdpd' {AVX2|AVX512_F+VL}.
1002 kIdVgatherdps, //!< Instruction 'vgatherdps' {AVX2|AVX512_F+VL}.
1003 kIdVgatherpf0dpd, //!< Instruction 'vgatherpf0dpd' {AVX512_PFI}.
1004 kIdVgatherpf0dps, //!< Instruction 'vgatherpf0dps' {AVX512_PFI}.
1005 kIdVgatherpf0qpd, //!< Instruction 'vgatherpf0qpd' {AVX512_PFI}.
1006 kIdVgatherpf0qps, //!< Instruction 'vgatherpf0qps' {AVX512_PFI}.
1007 kIdVgatherpf1dpd, //!< Instruction 'vgatherpf1dpd' {AVX512_PFI}.
1008 kIdVgatherpf1dps, //!< Instruction 'vgatherpf1dps' {AVX512_PFI}.
1009 kIdVgatherpf1qpd, //!< Instruction 'vgatherpf1qpd' {AVX512_PFI}.
1010 kIdVgatherpf1qps, //!< Instruction 'vgatherpf1qps' {AVX512_PFI}.
1011 kIdVgatherqpd, //!< Instruction 'vgatherqpd' {AVX2|AVX512_F+VL}.
1012 kIdVgatherqps, //!< Instruction 'vgatherqps' {AVX2|AVX512_F+VL}.
1013 kIdVgetexppd, //!< Instruction 'vgetexppd' {AVX512_F+VL}.
1014 kIdVgetexpps, //!< Instruction 'vgetexpps' {AVX512_F+VL}.
1015 kIdVgetexpsd, //!< Instruction 'vgetexpsd' {AVX512_F}.
1016 kIdVgetexpss, //!< Instruction 'vgetexpss' {AVX512_F}.
1017 kIdVgetmantpd, //!< Instruction 'vgetmantpd' {AVX512_F+VL}.
1018 kIdVgetmantps, //!< Instruction 'vgetmantps' {AVX512_F+VL}.
1019 kIdVgetmantsd, //!< Instruction 'vgetmantsd' {AVX512_F}.
1020 kIdVgetmantss, //!< Instruction 'vgetmantss' {AVX512_F}.
1021 kIdVgf2p8affineinvqb, //!< Instruction 'vgf2p8affineinvqb' {AVX|AVX512_F+VL & GFNI}.
1022 kIdVgf2p8affineqb, //!< Instruction 'vgf2p8affineqb' {AVX|AVX512_F+VL & GFNI}.
1023 kIdVgf2p8mulb, //!< Instruction 'vgf2p8mulb' {AVX|AVX512_F+VL & GFNI}.
1024 kIdVhaddpd, //!< Instruction 'vhaddpd' {AVX}.
1025 kIdVhaddps, //!< Instruction 'vhaddps' {AVX}.
1026 kIdVhsubpd, //!< Instruction 'vhsubpd' {AVX}.
1027 kIdVhsubps, //!< Instruction 'vhsubps' {AVX}.
1028 kIdVinsertf128, //!< Instruction 'vinsertf128' {AVX}.
1029 kIdVinsertf32x4, //!< Instruction 'vinsertf32x4' {AVX512_F+VL}.
1030 kIdVinsertf32x8, //!< Instruction 'vinsertf32x8' {AVX512_DQ}.
1031 kIdVinsertf64x2, //!< Instruction 'vinsertf64x2' {AVX512_DQ+VL}.
1032 kIdVinsertf64x4, //!< Instruction 'vinsertf64x4' {AVX512_F}.
1033 kIdVinserti128, //!< Instruction 'vinserti128' {AVX2}.
1034 kIdVinserti32x4, //!< Instruction 'vinserti32x4' {AVX512_F+VL}.
1035 kIdVinserti32x8, //!< Instruction 'vinserti32x8' {AVX512_DQ}.
1036 kIdVinserti64x2, //!< Instruction 'vinserti64x2' {AVX512_DQ+VL}.
1037 kIdVinserti64x4, //!< Instruction 'vinserti64x4' {AVX512_F}.
1038 kIdVinsertps, //!< Instruction 'vinsertps' {AVX|AVX512_F}.
1039 kIdVlddqu, //!< Instruction 'vlddqu' {AVX}.
1040 kIdVldmxcsr, //!< Instruction 'vldmxcsr' {AVX}.
1041 kIdVmaskmovdqu, //!< Instruction 'vmaskmovdqu' {AVX}.
1042 kIdVmaskmovpd, //!< Instruction 'vmaskmovpd' {AVX}.
1043 kIdVmaskmovps, //!< Instruction 'vmaskmovps' {AVX}.
1044 kIdVmaxpd, //!< Instruction 'vmaxpd' {AVX|AVX512_F+VL}.
1045 kIdVmaxps, //!< Instruction 'vmaxps' {AVX|AVX512_F+VL}.
1046 kIdVmaxsd, //!< Instruction 'vmaxsd' {AVX|AVX512_F+VL}.
1047 kIdVmaxss, //!< Instruction 'vmaxss' {AVX|AVX512_F+VL}.
1048 kIdVmcall, //!< Instruction 'vmcall' {VMX}.
1049 kIdVmclear, //!< Instruction 'vmclear' {VMX}.
1050 kIdVmfunc, //!< Instruction 'vmfunc' {VMX}.
1051 kIdVminpd, //!< Instruction 'vminpd' {AVX|AVX512_F+VL}.
1052 kIdVminps, //!< Instruction 'vminps' {AVX|AVX512_F+VL}.
1053 kIdVminsd, //!< Instruction 'vminsd' {AVX|AVX512_F+VL}.
1054 kIdVminss, //!< Instruction 'vminss' {AVX|AVX512_F+VL}.
1055 kIdVmlaunch, //!< Instruction 'vmlaunch' {VMX}.
1056 kIdVmload, //!< Instruction 'vmload' {SVM}.
1057 kIdVmmcall, //!< Instruction 'vmmcall' {SVM}.
1058 kIdVmovapd, //!< Instruction 'vmovapd' {AVX|AVX512_F+VL}.
1059 kIdVmovaps, //!< Instruction 'vmovaps' {AVX|AVX512_F+VL}.
1060 kIdVmovd, //!< Instruction 'vmovd' {AVX|AVX512_F}.
1061 kIdVmovddup, //!< Instruction 'vmovddup' {AVX|AVX512_F+VL}.
1062 kIdVmovdqa, //!< Instruction 'vmovdqa' {AVX}.
1063 kIdVmovdqa32, //!< Instruction 'vmovdqa32' {AVX512_F+VL}.
1064 kIdVmovdqa64, //!< Instruction 'vmovdqa64' {AVX512_F+VL}.
1065 kIdVmovdqu, //!< Instruction 'vmovdqu' {AVX}.
1066 kIdVmovdqu16, //!< Instruction 'vmovdqu16' {AVX512_BW+VL}.
1067 kIdVmovdqu32, //!< Instruction 'vmovdqu32' {AVX512_F+VL}.
1068 kIdVmovdqu64, //!< Instruction 'vmovdqu64' {AVX512_F+VL}.
1069 kIdVmovdqu8, //!< Instruction 'vmovdqu8' {AVX512_BW+VL}.
1070 kIdVmovhlps, //!< Instruction 'vmovhlps' {AVX|AVX512_F}.
1071 kIdVmovhpd, //!< Instruction 'vmovhpd' {AVX|AVX512_F}.
1072 kIdVmovhps, //!< Instruction 'vmovhps' {AVX|AVX512_F}.
1073 kIdVmovlhps, //!< Instruction 'vmovlhps' {AVX|AVX512_F}.
1074 kIdVmovlpd, //!< Instruction 'vmovlpd' {AVX|AVX512_F}.
1075 kIdVmovlps, //!< Instruction 'vmovlps' {AVX|AVX512_F}.
1076 kIdVmovmskpd, //!< Instruction 'vmovmskpd' {AVX}.
1077 kIdVmovmskps, //!< Instruction 'vmovmskps' {AVX}.
1078 kIdVmovntdq, //!< Instruction 'vmovntdq' {AVX|AVX512_F+VL}.
1079 kIdVmovntdqa, //!< Instruction 'vmovntdqa' {AVX|AVX2|AVX512_F+VL}.
1080 kIdVmovntpd, //!< Instruction 'vmovntpd' {AVX|AVX512_F+VL}.
1081 kIdVmovntps, //!< Instruction 'vmovntps' {AVX|AVX512_F+VL}.
1082 kIdVmovq, //!< Instruction 'vmovq' {AVX|AVX512_F}.
1083 kIdVmovsd, //!< Instruction 'vmovsd' {AVX|AVX512_F}.
1084 kIdVmovshdup, //!< Instruction 'vmovshdup' {AVX|AVX512_F+VL}.
1085 kIdVmovsldup, //!< Instruction 'vmovsldup' {AVX|AVX512_F+VL}.
1086 kIdVmovss, //!< Instruction 'vmovss' {AVX|AVX512_F}.
1087 kIdVmovupd, //!< Instruction 'vmovupd' {AVX|AVX512_F+VL}.
1088 kIdVmovups, //!< Instruction 'vmovups' {AVX|AVX512_F+VL}.
1089 kIdVmpsadbw, //!< Instruction 'vmpsadbw' {AVX|AVX2}.
1090 kIdVmptrld, //!< Instruction 'vmptrld' {VMX}.
1091 kIdVmptrst, //!< Instruction 'vmptrst' {VMX}.
1092 kIdVmread, //!< Instruction 'vmread' {VMX}.
1093 kIdVmresume, //!< Instruction 'vmresume' {VMX}.
1094 kIdVmrun, //!< Instruction 'vmrun' {SVM}.
1095 kIdVmsave, //!< Instruction 'vmsave' {SVM}.
1096 kIdVmulpd, //!< Instruction 'vmulpd' {AVX|AVX512_F+VL}.
1097 kIdVmulps, //!< Instruction 'vmulps' {AVX|AVX512_F+VL}.
1098 kIdVmulsd, //!< Instruction 'vmulsd' {AVX|AVX512_F}.
1099 kIdVmulss, //!< Instruction 'vmulss' {AVX|AVX512_F}.
1100 kIdVmwrite, //!< Instruction 'vmwrite' {VMX}.
1101 kIdVmxon, //!< Instruction 'vmxon' {VMX}.
1102 kIdVorpd, //!< Instruction 'vorpd' {AVX|AVX512_DQ+VL}.
1103 kIdVorps, //!< Instruction 'vorps' {AVX|AVX512_DQ+VL}.
1104 kIdVp4dpwssd, //!< Instruction 'vp4dpwssd' {AVX512_4VNNIW}.
1105 kIdVp4dpwssds, //!< Instruction 'vp4dpwssds' {AVX512_4VNNIW}.
1106 kIdVpabsb, //!< Instruction 'vpabsb' {AVX|AVX2|AVX512_BW+VL}.
1107 kIdVpabsd, //!< Instruction 'vpabsd' {AVX|AVX2|AVX512_F+VL}.
1108 kIdVpabsq, //!< Instruction 'vpabsq' {AVX512_F+VL}.
1109 kIdVpabsw, //!< Instruction 'vpabsw' {AVX|AVX2|AVX512_BW+VL}.
1110 kIdVpackssdw, //!< Instruction 'vpackssdw' {AVX|AVX2|AVX512_BW+VL}.
1111 kIdVpacksswb, //!< Instruction 'vpacksswb' {AVX|AVX2|AVX512_BW+VL}.
1112 kIdVpackusdw, //!< Instruction 'vpackusdw' {AVX|AVX2|AVX512_BW+VL}.
1113 kIdVpackuswb, //!< Instruction 'vpackuswb' {AVX|AVX2|AVX512_BW+VL}.
1114 kIdVpaddb, //!< Instruction 'vpaddb' {AVX|AVX2|AVX512_BW+VL}.
1115 kIdVpaddd, //!< Instruction 'vpaddd' {AVX|AVX2|AVX512_F+VL}.
1116 kIdVpaddq, //!< Instruction 'vpaddq' {AVX|AVX2|AVX512_F+VL}.
1117 kIdVpaddsb, //!< Instruction 'vpaddsb' {AVX|AVX2|AVX512_BW+VL}.
1118 kIdVpaddsw, //!< Instruction 'vpaddsw' {AVX|AVX2|AVX512_BW+VL}.
1119 kIdVpaddusb, //!< Instruction 'vpaddusb' {AVX|AVX2|AVX512_BW+VL}.
1120 kIdVpaddusw, //!< Instruction 'vpaddusw' {AVX|AVX2|AVX512_BW+VL}.
1121 kIdVpaddw, //!< Instruction 'vpaddw' {AVX|AVX2|AVX512_BW+VL}.
1122 kIdVpalignr, //!< Instruction 'vpalignr' {AVX|AVX2|AVX512_BW+VL}.
1123 kIdVpand, //!< Instruction 'vpand' {AVX|AVX2}.
1124 kIdVpandd, //!< Instruction 'vpandd' {AVX512_F+VL}.
1125 kIdVpandn, //!< Instruction 'vpandn' {AVX|AVX2}.
1126 kIdVpandnd, //!< Instruction 'vpandnd' {AVX512_F+VL}.
1127 kIdVpandnq, //!< Instruction 'vpandnq' {AVX512_F+VL}.
1128 kIdVpandq, //!< Instruction 'vpandq' {AVX512_F+VL}.
1129 kIdVpavgb, //!< Instruction 'vpavgb' {AVX|AVX2|AVX512_BW+VL}.
1130 kIdVpavgw, //!< Instruction 'vpavgw' {AVX|AVX2|AVX512_BW+VL}.
1131 kIdVpblendd, //!< Instruction 'vpblendd' {AVX2}.
1132 kIdVpblendvb, //!< Instruction 'vpblendvb' {AVX|AVX2}.
1133 kIdVpblendw, //!< Instruction 'vpblendw' {AVX|AVX2}.
1134 kIdVpbroadcastb, //!< Instruction 'vpbroadcastb' {AVX2|AVX512_BW+VL}.
1135 kIdVpbroadcastd, //!< Instruction 'vpbroadcastd' {AVX2|AVX512_F+VL}.
1136 kIdVpbroadcastmb2d, //!< Instruction 'vpbroadcastmb2d' {AVX512_CDI+VL}.
1137 kIdVpbroadcastmb2q, //!< Instruction 'vpbroadcastmb2q' {AVX512_CDI+VL}.
1138 kIdVpbroadcastq, //!< Instruction 'vpbroadcastq' {AVX2|AVX512_F+VL}.
1139 kIdVpbroadcastw, //!< Instruction 'vpbroadcastw' {AVX2|AVX512_BW+VL}.
1140 kIdVpclmulqdq, //!< Instruction 'vpclmulqdq' {AVX|AVX512_F+VL & PCLMULQDQ|VPCLMULQDQ}.
1141 kIdVpcmov, //!< Instruction 'vpcmov' {XOP}.
1142 kIdVpcmpb, //!< Instruction 'vpcmpb' {AVX512_BW+VL}.
1143 kIdVpcmpd, //!< Instruction 'vpcmpd' {AVX512_F+VL}.
1144 kIdVpcmpeqb, //!< Instruction 'vpcmpeqb' {AVX|AVX2|AVX512_BW+VL}.
1145 kIdVpcmpeqd, //!< Instruction 'vpcmpeqd' {AVX|AVX2|AVX512_F+VL}.
1146 kIdVpcmpeqq, //!< Instruction 'vpcmpeqq' {AVX|AVX2|AVX512_F+VL}.
1147 kIdVpcmpeqw, //!< Instruction 'vpcmpeqw' {AVX|AVX2|AVX512_BW+VL}.
1148 kIdVpcmpestri, //!< Instruction 'vpcmpestri' {AVX}.
1149 kIdVpcmpestrm, //!< Instruction 'vpcmpestrm' {AVX}.
1150 kIdVpcmpgtb, //!< Instruction 'vpcmpgtb' {AVX|AVX2|AVX512_BW+VL}.
1151 kIdVpcmpgtd, //!< Instruction 'vpcmpgtd' {AVX|AVX2|AVX512_F+VL}.
1152 kIdVpcmpgtq, //!< Instruction 'vpcmpgtq' {AVX|AVX2|AVX512_F+VL}.
1153 kIdVpcmpgtw, //!< Instruction 'vpcmpgtw' {AVX|AVX2|AVX512_BW+VL}.
1154 kIdVpcmpistri, //!< Instruction 'vpcmpistri' {AVX}.
1155 kIdVpcmpistrm, //!< Instruction 'vpcmpistrm' {AVX}.
1156 kIdVpcmpq, //!< Instruction 'vpcmpq' {AVX512_F+VL}.
1157 kIdVpcmpub, //!< Instruction 'vpcmpub' {AVX512_BW+VL}.
1158 kIdVpcmpud, //!< Instruction 'vpcmpud' {AVX512_F+VL}.
1159 kIdVpcmpuq, //!< Instruction 'vpcmpuq' {AVX512_F+VL}.
1160 kIdVpcmpuw, //!< Instruction 'vpcmpuw' {AVX512_BW+VL}.
1161 kIdVpcmpw, //!< Instruction 'vpcmpw' {AVX512_BW+VL}.
1162 kIdVpcomb, //!< Instruction 'vpcomb' {XOP}.
1163 kIdVpcomd, //!< Instruction 'vpcomd' {XOP}.
1164 kIdVpcompressb, //!< Instruction 'vpcompressb' {AVX512_VBMI2+VL}.
1165 kIdVpcompressd, //!< Instruction 'vpcompressd' {AVX512_F+VL}.
1166 kIdVpcompressq, //!< Instruction 'vpcompressq' {AVX512_F+VL}.
1167 kIdVpcompressw, //!< Instruction 'vpcompressw' {AVX512_VBMI2+VL}.
1168 kIdVpcomq, //!< Instruction 'vpcomq' {XOP}.
1169 kIdVpcomub, //!< Instruction 'vpcomub' {XOP}.
1170 kIdVpcomud, //!< Instruction 'vpcomud' {XOP}.
1171 kIdVpcomuq, //!< Instruction 'vpcomuq' {XOP}.
1172 kIdVpcomuw, //!< Instruction 'vpcomuw' {XOP}.
1173 kIdVpcomw, //!< Instruction 'vpcomw' {XOP}.
1174 kIdVpconflictd, //!< Instruction 'vpconflictd' {AVX512_CDI+VL}.
1175 kIdVpconflictq, //!< Instruction 'vpconflictq' {AVX512_CDI+VL}.
1176 kIdVpdpbusd, //!< Instruction 'vpdpbusd' {AVX512_VNNI+VL}.
1177 kIdVpdpbusds, //!< Instruction 'vpdpbusds' {AVX512_VNNI+VL}.
1178 kIdVpdpwssd, //!< Instruction 'vpdpwssd' {AVX512_VNNI+VL}.
1179 kIdVpdpwssds, //!< Instruction 'vpdpwssds' {AVX512_VNNI+VL}.
1180 kIdVperm2f128, //!< Instruction 'vperm2f128' {AVX}.
1181 kIdVperm2i128, //!< Instruction 'vperm2i128' {AVX2}.
1182 kIdVpermb, //!< Instruction 'vpermb' {AVX512_VBMI+VL}.
1183 kIdVpermd, //!< Instruction 'vpermd' {AVX2|AVX512_F+VL}.
1184 kIdVpermi2b, //!< Instruction 'vpermi2b' {AVX512_VBMI+VL}.
1185 kIdVpermi2d, //!< Instruction 'vpermi2d' {AVX512_F+VL}.
1186 kIdVpermi2pd, //!< Instruction 'vpermi2pd' {AVX512_F+VL}.
1187 kIdVpermi2ps, //!< Instruction 'vpermi2ps' {AVX512_F+VL}.
1188 kIdVpermi2q, //!< Instruction 'vpermi2q' {AVX512_F+VL}.
1189 kIdVpermi2w, //!< Instruction 'vpermi2w' {AVX512_BW+VL}.
1190 kIdVpermil2pd, //!< Instruction 'vpermil2pd' {XOP}.
1191 kIdVpermil2ps, //!< Instruction 'vpermil2ps' {XOP}.
1192 kIdVpermilpd, //!< Instruction 'vpermilpd' {AVX|AVX512_F+VL}.
1193 kIdVpermilps, //!< Instruction 'vpermilps' {AVX|AVX512_F+VL}.
1194 kIdVpermpd, //!< Instruction 'vpermpd' {AVX2|AVX512_F+VL}.
1195 kIdVpermps, //!< Instruction 'vpermps' {AVX2|AVX512_F+VL}.
1196 kIdVpermq, //!< Instruction 'vpermq' {AVX2|AVX512_F+VL}.
1197 kIdVpermt2b, //!< Instruction 'vpermt2b' {AVX512_VBMI+VL}.
1198 kIdVpermt2d, //!< Instruction 'vpermt2d' {AVX512_F+VL}.
1199 kIdVpermt2pd, //!< Instruction 'vpermt2pd' {AVX512_F+VL}.
1200 kIdVpermt2ps, //!< Instruction 'vpermt2ps' {AVX512_F+VL}.
1201 kIdVpermt2q, //!< Instruction 'vpermt2q' {AVX512_F+VL}.
1202 kIdVpermt2w, //!< Instruction 'vpermt2w' {AVX512_BW+VL}.
1203 kIdVpermw, //!< Instruction 'vpermw' {AVX512_BW+VL}.
1204 kIdVpexpandb, //!< Instruction 'vpexpandb' {AVX512_VBMI2+VL}.
1205 kIdVpexpandd, //!< Instruction 'vpexpandd' {AVX512_F+VL}.
1206 kIdVpexpandq, //!< Instruction 'vpexpandq' {AVX512_F+VL}.
1207 kIdVpexpandw, //!< Instruction 'vpexpandw' {AVX512_VBMI2+VL}.
1208 kIdVpextrb, //!< Instruction 'vpextrb' {AVX|AVX512_BW}.
1209 kIdVpextrd, //!< Instruction 'vpextrd' {AVX|AVX512_DQ}.
1210 kIdVpextrq, //!< Instruction 'vpextrq' {AVX|AVX512_DQ} (X64).
1211 kIdVpextrw, //!< Instruction 'vpextrw' {AVX|AVX512_BW}.
1212 kIdVpgatherdd, //!< Instruction 'vpgatherdd' {AVX2|AVX512_F+VL}.
1213 kIdVpgatherdq, //!< Instruction 'vpgatherdq' {AVX2|AVX512_F+VL}.
1214 kIdVpgatherqd, //!< Instruction 'vpgatherqd' {AVX2|AVX512_F+VL}.
1215 kIdVpgatherqq, //!< Instruction 'vpgatherqq' {AVX2|AVX512_F+VL}.
1216 kIdVphaddbd, //!< Instruction 'vphaddbd' {XOP}.
1217 kIdVphaddbq, //!< Instruction 'vphaddbq' {XOP}.
1218 kIdVphaddbw, //!< Instruction 'vphaddbw' {XOP}.
1219 kIdVphaddd, //!< Instruction 'vphaddd' {AVX|AVX2}.
1220 kIdVphadddq, //!< Instruction 'vphadddq' {XOP}.
1221 kIdVphaddsw, //!< Instruction 'vphaddsw' {AVX|AVX2}.
1222 kIdVphaddubd, //!< Instruction 'vphaddubd' {XOP}.
1223 kIdVphaddubq, //!< Instruction 'vphaddubq' {XOP}.
1224 kIdVphaddubw, //!< Instruction 'vphaddubw' {XOP}.
1225 kIdVphaddudq, //!< Instruction 'vphaddudq' {XOP}.
1226 kIdVphadduwd, //!< Instruction 'vphadduwd' {XOP}.
1227 kIdVphadduwq, //!< Instruction 'vphadduwq' {XOP}.
1228 kIdVphaddw, //!< Instruction 'vphaddw' {AVX|AVX2}.
1229 kIdVphaddwd, //!< Instruction 'vphaddwd' {XOP}.
1230 kIdVphaddwq, //!< Instruction 'vphaddwq' {XOP}.
1231 kIdVphminposuw, //!< Instruction 'vphminposuw' {AVX}.
1232 kIdVphsubbw, //!< Instruction 'vphsubbw' {XOP}.
1233 kIdVphsubd, //!< Instruction 'vphsubd' {AVX|AVX2}.
1234 kIdVphsubdq, //!< Instruction 'vphsubdq' {XOP}.
1235 kIdVphsubsw, //!< Instruction 'vphsubsw' {AVX|AVX2}.
1236 kIdVphsubw, //!< Instruction 'vphsubw' {AVX|AVX2}.
1237 kIdVphsubwd, //!< Instruction 'vphsubwd' {XOP}.
1238 kIdVpinsrb, //!< Instruction 'vpinsrb' {AVX|AVX512_BW}.
1239 kIdVpinsrd, //!< Instruction 'vpinsrd' {AVX|AVX512_DQ}.
1240 kIdVpinsrq, //!< Instruction 'vpinsrq' {AVX|AVX512_DQ} (X64).
1241 kIdVpinsrw, //!< Instruction 'vpinsrw' {AVX|AVX512_BW}.
1242 kIdVplzcntd, //!< Instruction 'vplzcntd' {AVX512_CDI+VL}.
1243 kIdVplzcntq, //!< Instruction 'vplzcntq' {AVX512_CDI+VL}.
1244 kIdVpmacsdd, //!< Instruction 'vpmacsdd' {XOP}.
1245 kIdVpmacsdqh, //!< Instruction 'vpmacsdqh' {XOP}.
1246 kIdVpmacsdql, //!< Instruction 'vpmacsdql' {XOP}.
1247 kIdVpmacssdd, //!< Instruction 'vpmacssdd' {XOP}.
1248 kIdVpmacssdqh, //!< Instruction 'vpmacssdqh' {XOP}.
1249 kIdVpmacssdql, //!< Instruction 'vpmacssdql' {XOP}.
1250 kIdVpmacsswd, //!< Instruction 'vpmacsswd' {XOP}.
1251 kIdVpmacssww, //!< Instruction 'vpmacssww' {XOP}.
1252 kIdVpmacswd, //!< Instruction 'vpmacswd' {XOP}.
1253 kIdVpmacsww, //!< Instruction 'vpmacsww' {XOP}.
1254 kIdVpmadcsswd, //!< Instruction 'vpmadcsswd' {XOP}.
1255 kIdVpmadcswd, //!< Instruction 'vpmadcswd' {XOP}.
1256 kIdVpmadd52huq, //!< Instruction 'vpmadd52huq' {AVX512_IFMA+VL}.
1257 kIdVpmadd52luq, //!< Instruction 'vpmadd52luq' {AVX512_IFMA+VL}.
1258 kIdVpmaddubsw, //!< Instruction 'vpmaddubsw' {AVX|AVX2|AVX512_BW+VL}.
1259 kIdVpmaddwd, //!< Instruction 'vpmaddwd' {AVX|AVX2|AVX512_BW+VL}.
1260 kIdVpmaskmovd, //!< Instruction 'vpmaskmovd' {AVX2}.
1261 kIdVpmaskmovq, //!< Instruction 'vpmaskmovq' {AVX2}.
1262 kIdVpmaxsb, //!< Instruction 'vpmaxsb' {AVX|AVX2|AVX512_BW+VL}.
1263 kIdVpmaxsd, //!< Instruction 'vpmaxsd' {AVX|AVX2|AVX512_F+VL}.
1264 kIdVpmaxsq, //!< Instruction 'vpmaxsq' {AVX512_F+VL}.
1265 kIdVpmaxsw, //!< Instruction 'vpmaxsw' {AVX|AVX2|AVX512_BW+VL}.
1266 kIdVpmaxub, //!< Instruction 'vpmaxub' {AVX|AVX2|AVX512_BW+VL}.
1267 kIdVpmaxud, //!< Instruction 'vpmaxud' {AVX|AVX2|AVX512_F+VL}.
1268 kIdVpmaxuq, //!< Instruction 'vpmaxuq' {AVX512_F+VL}.
1269 kIdVpmaxuw, //!< Instruction 'vpmaxuw' {AVX|AVX2|AVX512_BW+VL}.
1270 kIdVpminsb, //!< Instruction 'vpminsb' {AVX|AVX2|AVX512_BW+VL}.
1271 kIdVpminsd, //!< Instruction 'vpminsd' {AVX|AVX2|AVX512_F+VL}.
1272 kIdVpminsq, //!< Instruction 'vpminsq' {AVX512_F+VL}.
1273 kIdVpminsw, //!< Instruction 'vpminsw' {AVX|AVX2|AVX512_BW+VL}.
1274 kIdVpminub, //!< Instruction 'vpminub' {AVX|AVX2|AVX512_BW+VL}.
1275 kIdVpminud, //!< Instruction 'vpminud' {AVX|AVX2|AVX512_F+VL}.
1276 kIdVpminuq, //!< Instruction 'vpminuq' {AVX512_F+VL}.
1277 kIdVpminuw, //!< Instruction 'vpminuw' {AVX|AVX2|AVX512_BW+VL}.
1278 kIdVpmovb2m, //!< Instruction 'vpmovb2m' {AVX512_BW+VL}.
1279 kIdVpmovd2m, //!< Instruction 'vpmovd2m' {AVX512_DQ+VL}.
1280 kIdVpmovdb, //!< Instruction 'vpmovdb' {AVX512_F+VL}.
1281 kIdVpmovdw, //!< Instruction 'vpmovdw' {AVX512_F+VL}.
1282 kIdVpmovm2b, //!< Instruction 'vpmovm2b' {AVX512_BW+VL}.
1283 kIdVpmovm2d, //!< Instruction 'vpmovm2d' {AVX512_DQ+VL}.
1284 kIdVpmovm2q, //!< Instruction 'vpmovm2q' {AVX512_DQ+VL}.
1285 kIdVpmovm2w, //!< Instruction 'vpmovm2w' {AVX512_BW+VL}.
1286 kIdVpmovmskb, //!< Instruction 'vpmovmskb' {AVX|AVX2}.
1287 kIdVpmovq2m, //!< Instruction 'vpmovq2m' {AVX512_DQ+VL}.
1288 kIdVpmovqb, //!< Instruction 'vpmovqb' {AVX512_F+VL}.
1289 kIdVpmovqd, //!< Instruction 'vpmovqd' {AVX512_F+VL}.
1290 kIdVpmovqw, //!< Instruction 'vpmovqw' {AVX512_F+VL}.
1291 kIdVpmovsdb, //!< Instruction 'vpmovsdb' {AVX512_F+VL}.
1292 kIdVpmovsdw, //!< Instruction 'vpmovsdw' {AVX512_F+VL}.
1293 kIdVpmovsqb, //!< Instruction 'vpmovsqb' {AVX512_F+VL}.
1294 kIdVpmovsqd, //!< Instruction 'vpmovsqd' {AVX512_F+VL}.
1295 kIdVpmovsqw, //!< Instruction 'vpmovsqw' {AVX512_F+VL}.
1296 kIdVpmovswb, //!< Instruction 'vpmovswb' {AVX512_BW+VL}.
1297 kIdVpmovsxbd, //!< Instruction 'vpmovsxbd' {AVX|AVX2|AVX512_F+VL}.
1298 kIdVpmovsxbq, //!< Instruction 'vpmovsxbq' {AVX|AVX2|AVX512_F+VL}.
1299 kIdVpmovsxbw, //!< Instruction 'vpmovsxbw' {AVX|AVX2|AVX512_BW+VL}.
1300 kIdVpmovsxdq, //!< Instruction 'vpmovsxdq' {AVX|AVX2|AVX512_F+VL}.
1301 kIdVpmovsxwd, //!< Instruction 'vpmovsxwd' {AVX|AVX2|AVX512_F+VL}.
1302 kIdVpmovsxwq, //!< Instruction 'vpmovsxwq' {AVX|AVX2|AVX512_F+VL}.
1303 kIdVpmovusdb, //!< Instruction 'vpmovusdb' {AVX512_F+VL}.
1304 kIdVpmovusdw, //!< Instruction 'vpmovusdw' {AVX512_F+VL}.
1305 kIdVpmovusqb, //!< Instruction 'vpmovusqb' {AVX512_F+VL}.
1306 kIdVpmovusqd, //!< Instruction 'vpmovusqd' {AVX512_F+VL}.
1307 kIdVpmovusqw, //!< Instruction 'vpmovusqw' {AVX512_F+VL}.
1308 kIdVpmovuswb, //!< Instruction 'vpmovuswb' {AVX512_BW+VL}.
1309 kIdVpmovw2m, //!< Instruction 'vpmovw2m' {AVX512_BW+VL}.
1310 kIdVpmovwb, //!< Instruction 'vpmovwb' {AVX512_BW+VL}.
1311 kIdVpmovzxbd, //!< Instruction 'vpmovzxbd' {AVX|AVX2|AVX512_F+VL}.
1312 kIdVpmovzxbq, //!< Instruction 'vpmovzxbq' {AVX|AVX2|AVX512_F+VL}.
1313 kIdVpmovzxbw, //!< Instruction 'vpmovzxbw' {AVX|AVX2|AVX512_BW+VL}.
1314 kIdVpmovzxdq, //!< Instruction 'vpmovzxdq' {AVX|AVX2|AVX512_F+VL}.
1315 kIdVpmovzxwd, //!< Instruction 'vpmovzxwd' {AVX|AVX2|AVX512_F+VL}.
1316 kIdVpmovzxwq, //!< Instruction 'vpmovzxwq' {AVX|AVX2|AVX512_F+VL}.
1317 kIdVpmuldq, //!< Instruction 'vpmuldq' {AVX|AVX2|AVX512_F+VL}.
1318 kIdVpmulhrsw, //!< Instruction 'vpmulhrsw' {AVX|AVX2|AVX512_BW+VL}.
1319 kIdVpmulhuw, //!< Instruction 'vpmulhuw' {AVX|AVX2|AVX512_BW+VL}.
1320 kIdVpmulhw, //!< Instruction 'vpmulhw' {AVX|AVX2|AVX512_BW+VL}.
1321 kIdVpmulld, //!< Instruction 'vpmulld' {AVX|AVX2|AVX512_F+VL}.
1322 kIdVpmullq, //!< Instruction 'vpmullq' {AVX512_DQ+VL}.
1323 kIdVpmullw, //!< Instruction 'vpmullw' {AVX|AVX2|AVX512_BW+VL}.
1324 kIdVpmultishiftqb, //!< Instruction 'vpmultishiftqb' {AVX512_VBMI+VL}.
1325 kIdVpmuludq, //!< Instruction 'vpmuludq' {AVX|AVX2|AVX512_F+VL}.
1326 kIdVpopcntb, //!< Instruction 'vpopcntb' {AVX512_BITALG+VL}.
1327 kIdVpopcntd, //!< Instruction 'vpopcntd' {AVX512_VPOPCNTDQ+VL}.
1328 kIdVpopcntq, //!< Instruction 'vpopcntq' {AVX512_VPOPCNTDQ+VL}.
1329 kIdVpopcntw, //!< Instruction 'vpopcntw' {AVX512_BITALG+VL}.
1330 kIdVpor, //!< Instruction 'vpor' {AVX|AVX2}.
1331 kIdVpord, //!< Instruction 'vpord' {AVX512_F+VL}.
1332 kIdVporq, //!< Instruction 'vporq' {AVX512_F+VL}.
1333 kIdVpperm, //!< Instruction 'vpperm' {XOP}.
1334 kIdVprold, //!< Instruction 'vprold' {AVX512_F+VL}.
1335 kIdVprolq, //!< Instruction 'vprolq' {AVX512_F+VL}.
1336 kIdVprolvd, //!< Instruction 'vprolvd' {AVX512_F+VL}.
1337 kIdVprolvq, //!< Instruction 'vprolvq' {AVX512_F+VL}.
1338 kIdVprord, //!< Instruction 'vprord' {AVX512_F+VL}.
1339 kIdVprorq, //!< Instruction 'vprorq' {AVX512_F+VL}.
1340 kIdVprorvd, //!< Instruction 'vprorvd' {AVX512_F+VL}.
1341 kIdVprorvq, //!< Instruction 'vprorvq' {AVX512_F+VL}.
1342 kIdVprotb, //!< Instruction 'vprotb' {XOP}.
1343 kIdVprotd, //!< Instruction 'vprotd' {XOP}.
1344 kIdVprotq, //!< Instruction 'vprotq' {XOP}.
1345 kIdVprotw, //!< Instruction 'vprotw' {XOP}.
1346 kIdVpsadbw, //!< Instruction 'vpsadbw' {AVX|AVX2|AVX512_BW+VL}.
1347 kIdVpscatterdd, //!< Instruction 'vpscatterdd' {AVX512_F+VL}.
1348 kIdVpscatterdq, //!< Instruction 'vpscatterdq' {AVX512_F+VL}.
1349 kIdVpscatterqd, //!< Instruction 'vpscatterqd' {AVX512_F+VL}.
1350 kIdVpscatterqq, //!< Instruction 'vpscatterqq' {AVX512_F+VL}.
1351 kIdVpshab, //!< Instruction 'vpshab' {XOP}.
1352 kIdVpshad, //!< Instruction 'vpshad' {XOP}.
1353 kIdVpshaq, //!< Instruction 'vpshaq' {XOP}.
1354 kIdVpshaw, //!< Instruction 'vpshaw' {XOP}.
1355 kIdVpshlb, //!< Instruction 'vpshlb' {XOP}.
1356 kIdVpshld, //!< Instruction 'vpshld' {XOP}.
1357 kIdVpshldd, //!< Instruction 'vpshldd' {AVX512_VBMI2+VL}.
1358 kIdVpshldq, //!< Instruction 'vpshldq' {AVX512_VBMI2+VL}.
1359 kIdVpshldvd, //!< Instruction 'vpshldvd' {AVX512_VBMI2+VL}.
1360 kIdVpshldvq, //!< Instruction 'vpshldvq' {AVX512_VBMI2+VL}.
1361 kIdVpshldvw, //!< Instruction 'vpshldvw' {AVX512_VBMI2+VL}.
1362 kIdVpshldw, //!< Instruction 'vpshldw' {AVX512_VBMI2+VL}.
1363 kIdVpshlq, //!< Instruction 'vpshlq' {XOP}.
1364 kIdVpshlw, //!< Instruction 'vpshlw' {XOP}.
1365 kIdVpshrdd, //!< Instruction 'vpshrdd' {AVX512_VBMI2+VL}.
1366 kIdVpshrdq, //!< Instruction 'vpshrdq' {AVX512_VBMI2+VL}.
1367 kIdVpshrdvd, //!< Instruction 'vpshrdvd' {AVX512_VBMI2+VL}.
1368 kIdVpshrdvq, //!< Instruction 'vpshrdvq' {AVX512_VBMI2+VL}.
1369 kIdVpshrdvw, //!< Instruction 'vpshrdvw' {AVX512_VBMI2+VL}.
1370 kIdVpshrdw, //!< Instruction 'vpshrdw' {AVX512_VBMI2+VL}.
1371 kIdVpshufb, //!< Instruction 'vpshufb' {AVX|AVX2|AVX512_BW+VL}.
1372 kIdVpshufbitqmb, //!< Instruction 'vpshufbitqmb' {AVX512_BITALG+VL}.
1373 kIdVpshufd, //!< Instruction 'vpshufd' {AVX|AVX2|AVX512_F+VL}.
1374 kIdVpshufhw, //!< Instruction 'vpshufhw' {AVX|AVX2|AVX512_BW+VL}.
1375 kIdVpshuflw, //!< Instruction 'vpshuflw' {AVX|AVX2|AVX512_BW+VL}.
1376 kIdVpsignb, //!< Instruction 'vpsignb' {AVX|AVX2}.
1377 kIdVpsignd, //!< Instruction 'vpsignd' {AVX|AVX2}.
1378 kIdVpsignw, //!< Instruction 'vpsignw' {AVX|AVX2}.
1379 kIdVpslld, //!< Instruction 'vpslld' {AVX|AVX2|AVX512_F+VL}.
1380 kIdVpslldq, //!< Instruction 'vpslldq' {AVX|AVX2|AVX512_BW+VL}.
1381 kIdVpsllq, //!< Instruction 'vpsllq' {AVX|AVX2|AVX512_F+VL}.
1382 kIdVpsllvd, //!< Instruction 'vpsllvd' {AVX2|AVX512_F+VL}.
1383 kIdVpsllvq, //!< Instruction 'vpsllvq' {AVX2|AVX512_F+VL}.
1384 kIdVpsllvw, //!< Instruction 'vpsllvw' {AVX512_BW+VL}.
1385 kIdVpsllw, //!< Instruction 'vpsllw' {AVX|AVX2|AVX512_BW+VL}.
1386 kIdVpsrad, //!< Instruction 'vpsrad' {AVX|AVX2|AVX512_F+VL}.
1387 kIdVpsraq, //!< Instruction 'vpsraq' {AVX512_F+VL}.
1388 kIdVpsravd, //!< Instruction 'vpsravd' {AVX2|AVX512_F+VL}.
1389 kIdVpsravq, //!< Instruction 'vpsravq' {AVX512_F+VL}.
1390 kIdVpsravw, //!< Instruction 'vpsravw' {AVX512_BW+VL}.
1391 kIdVpsraw, //!< Instruction 'vpsraw' {AVX|AVX2|AVX512_BW+VL}.
1392 kIdVpsrld, //!< Instruction 'vpsrld' {AVX|AVX2|AVX512_F+VL}.
1393 kIdVpsrldq, //!< Instruction 'vpsrldq' {AVX|AVX2|AVX512_BW+VL}.
1394 kIdVpsrlq, //!< Instruction 'vpsrlq' {AVX|AVX2|AVX512_F+VL}.
1395 kIdVpsrlvd, //!< Instruction 'vpsrlvd' {AVX2|AVX512_F+VL}.
1396 kIdVpsrlvq, //!< Instruction 'vpsrlvq' {AVX2|AVX512_F+VL}.
1397 kIdVpsrlvw, //!< Instruction 'vpsrlvw' {AVX512_BW+VL}.
1398 kIdVpsrlw, //!< Instruction 'vpsrlw' {AVX|AVX2|AVX512_BW+VL}.
1399 kIdVpsubb, //!< Instruction 'vpsubb' {AVX|AVX2|AVX512_BW+VL}.
1400 kIdVpsubd, //!< Instruction 'vpsubd' {AVX|AVX2|AVX512_F+VL}.
1401 kIdVpsubq, //!< Instruction 'vpsubq' {AVX|AVX2|AVX512_F+VL}.
1402 kIdVpsubsb, //!< Instruction 'vpsubsb' {AVX|AVX2|AVX512_BW+VL}.
1403 kIdVpsubsw, //!< Instruction 'vpsubsw' {AVX|AVX2|AVX512_BW+VL}.
1404 kIdVpsubusb, //!< Instruction 'vpsubusb' {AVX|AVX2|AVX512_BW+VL}.
1405 kIdVpsubusw, //!< Instruction 'vpsubusw' {AVX|AVX2|AVX512_BW+VL}.
1406 kIdVpsubw, //!< Instruction 'vpsubw' {AVX|AVX2|AVX512_BW+VL}.
1407 kIdVpternlogd, //!< Instruction 'vpternlogd' {AVX512_F+VL}.
1408 kIdVpternlogq, //!< Instruction 'vpternlogq' {AVX512_F+VL}.
1409 kIdVptest, //!< Instruction 'vptest' {AVX}.
1410 kIdVptestmb, //!< Instruction 'vptestmb' {AVX512_BW+VL}.
1411 kIdVptestmd, //!< Instruction 'vptestmd' {AVX512_F+VL}.
1412 kIdVptestmq, //!< Instruction 'vptestmq' {AVX512_F+VL}.
1413 kIdVptestmw, //!< Instruction 'vptestmw' {AVX512_BW+VL}.
1414 kIdVptestnmb, //!< Instruction 'vptestnmb' {AVX512_BW+VL}.
1415 kIdVptestnmd, //!< Instruction 'vptestnmd' {AVX512_F+VL}.
1416 kIdVptestnmq, //!< Instruction 'vptestnmq' {AVX512_F+VL}.
1417 kIdVptestnmw, //!< Instruction 'vptestnmw' {AVX512_BW+VL}.
1418 kIdVpunpckhbw, //!< Instruction 'vpunpckhbw' {AVX|AVX2|AVX512_BW+VL}.
1419 kIdVpunpckhdq, //!< Instruction 'vpunpckhdq' {AVX|AVX2|AVX512_F+VL}.
1420 kIdVpunpckhqdq, //!< Instruction 'vpunpckhqdq' {AVX|AVX2|AVX512_F+VL}.
1421 kIdVpunpckhwd, //!< Instruction 'vpunpckhwd' {AVX|AVX2|AVX512_BW+VL}.
1422 kIdVpunpcklbw, //!< Instruction 'vpunpcklbw' {AVX|AVX2|AVX512_BW+VL}.
1423 kIdVpunpckldq, //!< Instruction 'vpunpckldq' {AVX|AVX2|AVX512_F+VL}.
1424 kIdVpunpcklqdq, //!< Instruction 'vpunpcklqdq' {AVX|AVX2|AVX512_F+VL}.
1425 kIdVpunpcklwd, //!< Instruction 'vpunpcklwd' {AVX|AVX2|AVX512_BW+VL}.
1426 kIdVpxor, //!< Instruction 'vpxor' {AVX|AVX2}.
1427 kIdVpxord, //!< Instruction 'vpxord' {AVX512_F+VL}.
1428 kIdVpxorq, //!< Instruction 'vpxorq' {AVX512_F+VL}.
1429 kIdVrangepd, //!< Instruction 'vrangepd' {AVX512_DQ+VL}.
1430 kIdVrangeps, //!< Instruction 'vrangeps' {AVX512_DQ+VL}.
1431 kIdVrangesd, //!< Instruction 'vrangesd' {AVX512_DQ}.
1432 kIdVrangess, //!< Instruction 'vrangess' {AVX512_DQ}.
1433 kIdVrcp14pd, //!< Instruction 'vrcp14pd' {AVX512_F+VL}.
1434 kIdVrcp14ps, //!< Instruction 'vrcp14ps' {AVX512_F+VL}.
1435 kIdVrcp14sd, //!< Instruction 'vrcp14sd' {AVX512_F}.
1436 kIdVrcp14ss, //!< Instruction 'vrcp14ss' {AVX512_F}.
1437 kIdVrcp28pd, //!< Instruction 'vrcp28pd' {AVX512_ERI}.
1438 kIdVrcp28ps, //!< Instruction 'vrcp28ps' {AVX512_ERI}.
1439 kIdVrcp28sd, //!< Instruction 'vrcp28sd' {AVX512_ERI}.
1440 kIdVrcp28ss, //!< Instruction 'vrcp28ss' {AVX512_ERI}.
1441 kIdVrcpps, //!< Instruction 'vrcpps' {AVX}.
1442 kIdVrcpss, //!< Instruction 'vrcpss' {AVX}.
1443 kIdVreducepd, //!< Instruction 'vreducepd' {AVX512_DQ+VL}.
1444 kIdVreduceps, //!< Instruction 'vreduceps' {AVX512_DQ+VL}.
1445 kIdVreducesd, //!< Instruction 'vreducesd' {AVX512_DQ}.
1446 kIdVreducess, //!< Instruction 'vreducess' {AVX512_DQ}.
1447 kIdVrndscalepd, //!< Instruction 'vrndscalepd' {AVX512_F+VL}.
1448 kIdVrndscaleps, //!< Instruction 'vrndscaleps' {AVX512_F+VL}.
1449 kIdVrndscalesd, //!< Instruction 'vrndscalesd' {AVX512_F}.
1450 kIdVrndscaless, //!< Instruction 'vrndscaless' {AVX512_F}.
1451 kIdVroundpd, //!< Instruction 'vroundpd' {AVX}.
1452 kIdVroundps, //!< Instruction 'vroundps' {AVX}.
1453 kIdVroundsd, //!< Instruction 'vroundsd' {AVX}.
1454 kIdVroundss, //!< Instruction 'vroundss' {AVX}.
1455 kIdVrsqrt14pd, //!< Instruction 'vrsqrt14pd' {AVX512_F+VL}.
1456 kIdVrsqrt14ps, //!< Instruction 'vrsqrt14ps' {AVX512_F+VL}.
1457 kIdVrsqrt14sd, //!< Instruction 'vrsqrt14sd' {AVX512_F}.
1458 kIdVrsqrt14ss, //!< Instruction 'vrsqrt14ss' {AVX512_F}.
1459 kIdVrsqrt28pd, //!< Instruction 'vrsqrt28pd' {AVX512_ERI}.
1460 kIdVrsqrt28ps, //!< Instruction 'vrsqrt28ps' {AVX512_ERI}.
1461 kIdVrsqrt28sd, //!< Instruction 'vrsqrt28sd' {AVX512_ERI}.
1462 kIdVrsqrt28ss, //!< Instruction 'vrsqrt28ss' {AVX512_ERI}.
1463 kIdVrsqrtps, //!< Instruction 'vrsqrtps' {AVX}.
1464 kIdVrsqrtss, //!< Instruction 'vrsqrtss' {AVX}.
1465 kIdVscalefpd, //!< Instruction 'vscalefpd' {AVX512_F+VL}.
1466 kIdVscalefps, //!< Instruction 'vscalefps' {AVX512_F+VL}.
1467 kIdVscalefsd, //!< Instruction 'vscalefsd' {AVX512_F}.
1468 kIdVscalefss, //!< Instruction 'vscalefss' {AVX512_F}.
1469 kIdVscatterdpd, //!< Instruction 'vscatterdpd' {AVX512_F+VL}.
1470 kIdVscatterdps, //!< Instruction 'vscatterdps' {AVX512_F+VL}.
1471 kIdVscatterpf0dpd, //!< Instruction 'vscatterpf0dpd' {AVX512_PFI}.
1472 kIdVscatterpf0dps, //!< Instruction 'vscatterpf0dps' {AVX512_PFI}.
1473 kIdVscatterpf0qpd, //!< Instruction 'vscatterpf0qpd' {AVX512_PFI}.
1474 kIdVscatterpf0qps, //!< Instruction 'vscatterpf0qps' {AVX512_PFI}.
1475 kIdVscatterpf1dpd, //!< Instruction 'vscatterpf1dpd' {AVX512_PFI}.
1476 kIdVscatterpf1dps, //!< Instruction 'vscatterpf1dps' {AVX512_PFI}.
1477 kIdVscatterpf1qpd, //!< Instruction 'vscatterpf1qpd' {AVX512_PFI}.
1478 kIdVscatterpf1qps, //!< Instruction 'vscatterpf1qps' {AVX512_PFI}.
1479 kIdVscatterqpd, //!< Instruction 'vscatterqpd' {AVX512_F+VL}.
1480 kIdVscatterqps, //!< Instruction 'vscatterqps' {AVX512_F+VL}.
1481 kIdVshuff32x4, //!< Instruction 'vshuff32x4' {AVX512_F+VL}.
1482 kIdVshuff64x2, //!< Instruction 'vshuff64x2' {AVX512_F+VL}.
1483 kIdVshufi32x4, //!< Instruction 'vshufi32x4' {AVX512_F+VL}.
1484 kIdVshufi64x2, //!< Instruction 'vshufi64x2' {AVX512_F+VL}.
1485 kIdVshufpd, //!< Instruction 'vshufpd' {AVX|AVX512_F+VL}.
1486 kIdVshufps, //!< Instruction 'vshufps' {AVX|AVX512_F+VL}.
1487 kIdVsqrtpd, //!< Instruction 'vsqrtpd' {AVX|AVX512_F+VL}.
1488 kIdVsqrtps, //!< Instruction 'vsqrtps' {AVX|AVX512_F+VL}.
1489 kIdVsqrtsd, //!< Instruction 'vsqrtsd' {AVX|AVX512_F}.
1490 kIdVsqrtss, //!< Instruction 'vsqrtss' {AVX|AVX512_F}.
1491 kIdVstmxcsr, //!< Instruction 'vstmxcsr' {AVX}.
1492 kIdVsubpd, //!< Instruction 'vsubpd' {AVX|AVX512_F+VL}.
1493 kIdVsubps, //!< Instruction 'vsubps' {AVX|AVX512_F+VL}.
1494 kIdVsubsd, //!< Instruction 'vsubsd' {AVX|AVX512_F}.
1495 kIdVsubss, //!< Instruction 'vsubss' {AVX|AVX512_F}.
1496 kIdVtestpd, //!< Instruction 'vtestpd' {AVX}.
1497 kIdVtestps, //!< Instruction 'vtestps' {AVX}.
1498 kIdVucomisd, //!< Instruction 'vucomisd' {AVX|AVX512_F}.
1499 kIdVucomiss, //!< Instruction 'vucomiss' {AVX|AVX512_F}.
1500 kIdVunpckhpd, //!< Instruction 'vunpckhpd' {AVX|AVX512_F+VL}.
1501 kIdVunpckhps, //!< Instruction 'vunpckhps' {AVX|AVX512_F+VL}.
1502 kIdVunpcklpd, //!< Instruction 'vunpcklpd' {AVX|AVX512_F+VL}.
1503 kIdVunpcklps, //!< Instruction 'vunpcklps' {AVX|AVX512_F+VL}.
1504 kIdVxorpd, //!< Instruction 'vxorpd' {AVX|AVX512_DQ+VL}.
1505 kIdVxorps, //!< Instruction 'vxorps' {AVX|AVX512_DQ+VL}.
1506 kIdVzeroall, //!< Instruction 'vzeroall' {AVX}.
1507 kIdVzeroupper, //!< Instruction 'vzeroupper' {AVX}.
1508 kIdWbinvd, //!< Instruction 'wbinvd'.
1509 kIdWbnoinvd, //!< Instruction 'wbnoinvd' {WBNOINVD}.
1510 kIdWrfsbase, //!< Instruction 'wrfsbase' {FSGSBASE} (X64).
1511 kIdWrgsbase, //!< Instruction 'wrgsbase' {FSGSBASE} (X64).
1512 kIdWrmsr, //!< Instruction 'wrmsr' {MSR}.
1513 kIdXabort, //!< Instruction 'xabort' {RTM}.
1514 kIdXadd, //!< Instruction 'xadd' {I486}.
1515 kIdXbegin, //!< Instruction 'xbegin' {RTM}.
1516 kIdXchg, //!< Instruction 'xchg'.
1517 kIdXend, //!< Instruction 'xend' {RTM}.
1518 kIdXgetbv, //!< Instruction 'xgetbv' {XSAVE}.
1519 kIdXlatb, //!< Instruction 'xlatb'.
1520 kIdXor, //!< Instruction 'xor'.
1521 kIdXorpd, //!< Instruction 'xorpd' {SSE2}.
1522 kIdXorps, //!< Instruction 'xorps' {SSE}.
1523 kIdXrstor, //!< Instruction 'xrstor' {XSAVE}.
1524 kIdXrstor64, //!< Instruction 'xrstor64' {XSAVE} (X64).
1525 kIdXrstors, //!< Instruction 'xrstors' {XSAVES}.
1526 kIdXrstors64, //!< Instruction 'xrstors64' {XSAVES} (X64).
1527 kIdXsave, //!< Instruction 'xsave' {XSAVE}.
1528 kIdXsave64, //!< Instruction 'xsave64' {XSAVE} (X64).
1529 kIdXsavec, //!< Instruction 'xsavec' {XSAVEC}.
1530 kIdXsavec64, //!< Instruction 'xsavec64' {XSAVEC} (X64).
1531 kIdXsaveopt, //!< Instruction 'xsaveopt' {XSAVEOPT}.
1532 kIdXsaveopt64, //!< Instruction 'xsaveopt64' {XSAVEOPT} (X64).
1533 kIdXsaves, //!< Instruction 'xsaves' {XSAVES}.
1534 kIdXsaves64, //!< Instruction 'xsaves64' {XSAVES} (X64).
1535 kIdXsetbv, //!< Instruction 'xsetbv' {XSAVE}.
1536 kIdXtest, //!< Instruction 'xtest' {TSX}.
1537 _kIdCount
1538 // ${InstId:End}
1539 };
1540
1541 //! Instruction options.
1542 enum Options : uint32_t {
1543 kOptionVex3 = 0x00000400u, //!< Use 3-byte VEX prefix if possible (AVX) (must be 0x00000400).
1544 kOptionModMR = 0x00000800u, //!< Use ModMR instead of ModRM when it's available.
1545 kOptionEvex = 0x00001000u, //!< Use 4-byte EVEX prefix if possible (AVX-512) (must be 0x00001000).
1546
1547 kOptionLock = 0x00002000u, //!< LOCK prefix (lock-enabled instructions only).
1548 kOptionRep = 0x00004000u, //!< REP prefix (string instructions only).
1549 kOptionRepne = 0x00008000u, //!< REPNE prefix (string instructions only).
1550
1551 kOptionXAcquire = 0x00010000u, //!< XACQUIRE prefix (only allowed instructions).
1552 kOptionXRelease = 0x00020000u, //!< XRELEASE prefix (only allowed instructions).
1553
1554 kOptionER = 0x00040000u, //!< AVX-512: embedded-rounding {er} and implicit {sae}.
1555 kOptionSAE = 0x00080000u, //!< AVX-512: suppress-all-exceptions {sae}.
1556 kOptionRN_SAE = 0x00000000u, //!< AVX-512: round-to-nearest (even) {rn-sae} (bits 00).
1557 kOptionRD_SAE = 0x00200000u, //!< AVX-512: round-down (toward -inf) {rd-sae} (bits 01).
1558 kOptionRU_SAE = 0x00400000u, //!< AVX-512: round-up (toward +inf) {ru-sae} (bits 10).
1559 kOptionRZ_SAE = 0x00600000u, //!< AVX-512: round-toward-zero (truncate) {rz-sae} (bits 11).
1560 kOptionZMask = 0x00800000u, //!< AVX-512: Use zeroing {k}{z} instead of merging {k}.
1561 _kOptionAvx512Mask = 0x00FC0000u, //!< AVX-512: Mask of all possible AVX-512 options except EVEX prefix flag.
1562
1563 kOptionOpCodeB = 0x01000000u, //!< REX.B and/or VEX.B field (X64).
1564 kOptionOpCodeX = 0x02000000u, //!< REX.X and/or VEX.X field (X64).
1565 kOptionOpCodeR = 0x04000000u, //!< REX.R and/or VEX.R field (X64).
1566 kOptionOpCodeW = 0x08000000u, //!< REX.W and/or VEX.W field (X64).
1567 kOptionRex = 0x40000000u, //!< Force REX prefix (X64).
1568 _kOptionInvalidRex = 0x80000000u //!< Invalid REX prefix (set by X86 or when AH|BH|CH|DH regs are used on X64).
1569 };
1570
1571 // --------------------------------------------------------------------------
1572 // [Statics]
1573 // --------------------------------------------------------------------------
1574
1575 //! Tests whether the `instId` is defined (counts also Inst::kIdNone, which must be zero).
1576 static inline bool isDefinedId(uint32_t instId) noexcept { return instId < _kIdCount; }
1577 };
1578
1579 // ============================================================================
1580 // [asmjit::x86::Condition]
1581 // ============================================================================
1582
1583 namespace Condition {
1584 //! Condition code.
1585 enum Code : uint32_t {
1586 kO = 0x00u, //!< OF==1
1587 kNO = 0x01u, //!< OF==0
1588 kB = 0x02u, //!< CF==1 (unsigned < )
1589 kC = 0x02u, //!< CF==1
1590 kNAE = 0x02u, //!< CF==1 (unsigned < )
1591 kAE = 0x03u, //!< CF==0 (unsigned >=)
1592 kNB = 0x03u, //!< CF==0 (unsigned >=)
1593 kNC = 0x03u, //!< CF==0
1594 kE = 0x04u, //!< ZF==1 (any_sign ==)
1595 kZ = 0x04u, //!< ZF==1 (any_sign ==)
1596 kNE = 0x05u, //!< ZF==0 (any_sign !=)
1597 kNZ = 0x05u, //!< ZF==0 (any_sign !=)
1598 kBE = 0x06u, //!< CF==1 | ZF==1 (unsigned <=)
1599 kNA = 0x06u, //!< CF==1 | ZF==1 (unsigned <=)
1600 kA = 0x07u, //!< CF==0 & ZF==0 (unsigned > )
1601 kNBE = 0x07u, //!< CF==0 & ZF==0 (unsigned > )
1602 kS = 0x08u, //!< SF==1 (is negative)
1603 kNS = 0x09u, //!< SF==0 (is positive or zero)
1604 kP = 0x0Au, //!< PF==1
1605 kPE = 0x0Au, //!< PF==1
1606 kPO = 0x0Bu, //!< PF==0
1607 kNP = 0x0Bu, //!< PF==0
1608 kL = 0x0Cu, //!< SF!=OF (signed < )
1609 kNGE = 0x0Cu, //!< SF!=OF (signed < )
1610 kGE = 0x0Du, //!< SF==OF (signed >=)
1611 kNL = 0x0Du, //!< SF==OF (signed >=)
1612 kLE = 0x0Eu, //!< ZF==1 | SF!=OF (signed <=)
1613 kNG = 0x0Eu, //!< ZF==1 | SF!=OF (signed <=)
1614 kG = 0x0Fu, //!< ZF==0 & SF==OF (signed > )
1615 kNLE = 0x0Fu, //!< ZF==0 & SF==OF (signed > )
1616 kCount = 0x10u,
1617
1618 kSign = kS, //!< Sign.
1619 kNotSign = kNS, //!< Not Sign.
1620
1621 kOverflow = kO, //!< Signed overflow.
1622 kNotOverflow = kNO, //!< Not signed overflow.
1623
1624 kEqual = kE, //!< Equal `a == b`.
1625 kNotEqual = kNE, //!< Not Equal `a != b`.
1626
1627 kSignedLT = kL, //!< Signed `a < b`.
1628 kSignedLE = kLE, //!< Signed `a <= b`.
1629 kSignedGT = kG, //!< Signed `a > b`.
1630 kSignedGE = kGE, //!< Signed `a >= b`.
1631
1632 kUnsignedLT = kB, //!< Unsigned `a < b`.
1633 kUnsignedLE = kBE, //!< Unsigned `a <= b`.
1634 kUnsignedGT = kA, //!< Unsigned `a > b`.
1635 kUnsignedGE = kAE, //!< Unsigned `a >= b`.
1636
1637 kZero = kZ,
1638 kNotZero = kNZ,
1639
1640 kNegative = kS,
1641 kPositive = kNS,
1642
1643 kParityEven = kP,
1644 kParityOdd = kPO
1645 };
1646
1647 static constexpr uint8_t reverseTable[kCount] = {
1648 kO, kNO, kA , kBE, // O|NO|B |AE
1649 kE, kNE, kAE, kB , // E|NE|BE|A
1650 kS, kNS, kPE, kPO, // S|NS|PE|PO
1651 kG, kLE, kGE, kL // L|GE|LE|G
1652 };
1653
1654 #define ASMJIT_INST_FROM_COND(ID) \
1655 ID##o, ID##no, ID##b , ID##ae, \
1656 ID##e, ID##ne, ID##be, ID##a , \
1657 ID##s, ID##ns, ID##pe, ID##po, \
1658 ID##l, ID##ge, ID##le, ID##g
1659 static constexpr uint16_t jccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdJ) };
1660 static constexpr uint16_t setccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdSet) };
1661 static constexpr uint16_t cmovccTable[] = { ASMJIT_INST_FROM_COND(Inst::kIdCmov) };
1662 #undef ASMJIT_INST_FROM_COND
1663
1664 //! Reverse a condition code (reverses the corresponding operands of a comparison).
1665 static constexpr uint32_t reverse(uint32_t cond) noexcept { return reverseTable[cond]; }
1666 //! Negate a condition code.
1667 static constexpr uint32_t negate(uint32_t cond) noexcept { return cond ^ 1u; }
1668
1669 //! Translate a condition code `cond` to a `jcc` instruction id.
1670 static constexpr uint32_t toJcc(uint32_t cond) noexcept { return jccTable[cond]; }
1671 //! Translate a condition code `cond` to a `setcc` instruction id.
1672 static constexpr uint32_t toSetcc(uint32_t cond) noexcept { return setccTable[cond]; }
1673 //! Translate a condition code `cond` to a `cmovcc` instruction id.
1674 static constexpr uint32_t toCmovcc(uint32_t cond) noexcept { return cmovccTable[cond]; }
1675 }
1676
1677 // ============================================================================
1678 // [asmjit::x86::FpuWord]
1679 // ============================================================================
1680
1681 //! FPU control and status word.
1682 namespace FpuWord {
1683 //! FPU status word.
1684 enum Status : uint32_t {
1685 kStatusInvalid = 0x0001u,
1686 kStatusDenormalized = 0x0002u,
1687 kStatusDivByZero = 0x0004u,
1688 kStatusOverflow = 0x0008u,
1689 kStatusUnderflow = 0x0010u,
1690 kStatusPrecision = 0x0020u,
1691 kStatusStackFault = 0x0040u,
1692 kStatusInterrupt = 0x0080u,
1693 kStatusC0 = 0x0100u,
1694 kStatusC1 = 0x0200u,
1695 kStatusC2 = 0x0400u,
1696 kStatusTop = 0x3800u,
1697 kStatusC3 = 0x4000u,
1698 kStatusBusy = 0x8000u
1699 };
1700
1701 //! FPU control word.
1702 enum Control : uint32_t {
1703 // Bits 0-5.
1704 kControlEM_Mask = 0x003Fu,
1705 kControlEM_Invalid = 0x0001u,
1706 kControlEM_Denormal = 0x0002u,
1707 kControlEM_DivByZero = 0x0004u,
1708 kControlEM_Overflow = 0x0008u,
1709 kControlEM_Underflow = 0x0010u,
1710 kControlEM_Inexact = 0x0020u,
1711
1712 // Bits 8-9.
1713 kControlPC_Mask = 0x0300u,
1714 kControlPC_Float = 0x0000u,
1715 kControlPC_Reserved = 0x0100u,
1716 kControlPC_Double = 0x0200u,
1717 kControlPC_Extended = 0x0300u,
1718
1719 // Bits 10-11.
1720 kControlRC_Mask = 0x0C00u,
1721 kControlRC_Nearest = 0x0000u,
1722 kControlRC_Down = 0x0400u,
1723 kControlRC_Up = 0x0800u,
1724 kControlRC_Truncate = 0x0C00u,
1725
1726 // Bit 12.
1727 kControlIC_Mask = 0x1000u,
1728 kControlIC_Projective = 0x0000u,
1729 kControlIC_Affine = 0x1000u
1730 };
1731 }
1732
1733 // ============================================================================
1734 // [asmjit::x86::Status]
1735 // ============================================================================
1736
1737 //! CPU and FPU status flags.
1738 namespace Status {
1739 //! CPU and FPU status flags used by `InstRWInfo`
1740 enum Flags : uint32_t {
1741 // ------------------------------------------------------------------------
1742 // [Architecture Neutral Flags - 0x000000FF]
1743 // ------------------------------------------------------------------------
1744
1745 kCF = 0x00000001u, //!< Carry flag.
1746 kOF = 0x00000002u, //!< Signed overflow flag.
1747 kSF = 0x00000004u, //!< Sign flag (negative/sign, if set).
1748 kZF = 0x00000008u, //!< Zero and/or equality flag (1 if zero/equal).
1749
1750 // ------------------------------------------------------------------------
1751 // [Architecture Specific Flags - 0xFFFFFF00]
1752 // ------------------------------------------------------------------------
1753
1754 kAF = 0x00000100u, //!< Adjust flag.
1755 kPF = 0x00000200u, //!< Parity flag.
1756 kDF = 0x00000400u, //!< Direction flag.
1757 kIF = 0x00000800u, //!< Interrupt enable flag.
1758
1759 kAC = 0x00001000u, //!< Alignment check.
1760
1761 kC0 = 0x00010000u, //!< FPU C0 status flag.
1762 kC1 = 0x00020000u, //!< FPU C1 status flag.
1763 kC2 = 0x00040000u, //!< FPU C2 status flag.
1764 kC3 = 0x00080000u //!< FPU C3 status flag.
1765 };
1766 }
1767
1768 // ============================================================================
1769 // [asmjit::x86::Predicate]
1770 // ============================================================================
1771
1772 //! Contains predicates used by SIMD instructions.
1773 namespace Predicate {
1774 //! A predicate used by CMP[PD|PS|SD|SS] instructions.
1775 enum Cmp : uint32_t {
1776 kCmpEQ = 0x00u, //!< Equal (Quiet).
1777 kCmpLT = 0x01u, //!< Less (Signaling).
1778 kCmpLE = 0x02u, //!< Less/Equal (Signaling).
1779 kCmpUNORD = 0x03u, //!< Unordered (Quiet).
1780 kCmpNEQ = 0x04u, //!< Not Equal (Quiet).
1781 kCmpNLT = 0x05u, //!< Not Less (Signaling).
1782 kCmpNLE = 0x06u, //!< Not Less/Equal (Signaling).
1783 kCmpORD = 0x07u //!< Ordered (Quiet).
1784 };
1785
1786 //! A predicate used by [V]PCMP[I|E]STR[I|M] instructions.
1787 enum PCmpStr : uint32_t {
1788 // Source data format:
1789 kPCmpStrUB = 0x00u << 0, //!< The source data format is unsigned bytes.
1790 kPCmpStrUW = 0x01u << 0, //!< The source data format is unsigned words.
1791 kPCmpStrSB = 0x02u << 0, //!< The source data format is signed bytes.
1792 kPCmpStrSW = 0x03u << 0, //!< The source data format is signed words.
1793
1794 // Aggregation operation:
1795 kPCmpStrEqualAny = 0x00u << 2, //!< The arithmetic comparison is "equal".
1796 kPCmpStrRanges = 0x01u << 2, //!< The arithmetic comparison is "greater than or equal"
1797 //!< between even indexed elements and "less than or equal"
1798 //!< between odd indexed elements.
1799 kPCmpStrEqualEach = 0x02u << 2, //!< The arithmetic comparison is "equal".
1800 kPCmpStrEqualOrdered = 0x03u << 2, //!< The arithmetic comparison is "equal".
1801
1802 // Polarity:
1803 kPCmpStrPosPolarity = 0x00u << 4, //!< IntRes2 = IntRes1.
1804 kPCmpStrNegPolarity = 0x01u << 4, //!< IntRes2 = -1 XOR IntRes1.
1805 kPCmpStrPosMasked = 0x02u << 4, //!< IntRes2 = IntRes1.
1806 kPCmpStrNegMasked = 0x03u << 4, //!< IntRes2[i] = second[i] == invalid ? IntRes1[i] : ~IntRes1[i].
1807
1808 // Output selection (pcmpstri):
1809 kPCmpStrOutputLSI = 0x00u << 6, //!< The index returned to ECX is of the least significant set bit in IntRes2.
1810 kPCmpStrOutputMSI = 0x01u << 6, //!< The index returned to ECX is of the most significant set bit in IntRes2.
1811
1812 // Output selection (pcmpstrm):
1813 kPCmpStrBitMask = 0x00u << 6, //!< IntRes2 is returned as the mask to the least significant bits of XMM0.
1814 kPCmpStrIndexMask = 0x01u << 6 //!< IntRes2 is expanded into a byte/word mask and placed in XMM0.
1815 };
1816
1817 //! A predicate used by ROUND[PD|PS|SD|SS] instructions.
1818 enum Round : uint32_t {
1819 kRoundNearest = 0x00u, //!< Round to nearest (even).
1820 kRoundDown = 0x01u, //!< Round to down toward -INF (floor),
1821 kRoundUp = 0x02u, //!< Round to up toward +INF (ceil).
1822 kRoundTrunc = 0x03u, //!< Round toward zero (truncate).
1823 kRoundCurrent = 0x04u, //!< Round to the current rounding mode set (ignores other RC bits).
1824 kRoundInexact = 0x08u //!< Avoids inexact exception, if set.
1825 };
1826
1827 //! A predicate used by VCMP[PD|PS|SD|SS] instructions.
1828 //!
1829 //! The first 8 values are compatible with `Cmp`.
1830 enum VCmp : uint32_t {
1831 kVCmpEQ_OQ = kCmpEQ, //!< Equal (Quiet , Ordered).
1832 kVCmpLT_OS = kCmpLT, //!< Less (Signaling, Ordered).
1833 kVCmpLE_OS = kCmpLE, //!< Less/Equal (Signaling, Ordered).
1834 kVCmpUNORD_Q = kCmpUNORD, //!< Unordered (Quiet).
1835 kVCmpNEQ_UQ = kCmpNEQ, //!< Not Equal (Quiet , Unordered).
1836 kVCmpNLT_US = kCmpNLT, //!< Not Less (Signaling, Unordered).
1837 kVCmpNLE_US = kCmpNLE, //!< Not Less/Equal (Signaling, Unordered).
1838 kVCmpORD_Q = kCmpORD, //!< Ordered (Quiet).
1839 kVCmpEQ_UQ = 0x08u, //!< Equal (Quiet , Unordered).
1840 kVCmpNGE_US = 0x09u, //!< Not Greater/Equal (Signaling, Unordered).
1841 kVCmpNGT_US = 0x0Au, //!< Not Greater (Signaling, Unordered).
1842 kVCmpFALSE_OQ = 0x0Bu, //!< False (Quiet , Ordered).
1843 kVCmpNEQ_OQ = 0x0Cu, //!< Not Equal (Quiet , Ordered).
1844 kVCmpGE_OS = 0x0Du, //!< Greater/Equal (Signaling, Ordered).
1845 kVCmpGT_OS = 0x0Eu, //!< Greater (Signaling, Ordered).
1846 kVCmpTRUE_UQ = 0x0Fu, //!< True (Quiet , Unordered).
1847 kVCmpEQ_OS = 0x10u, //!< Equal (Signaling, Ordered).
1848 kVCmpLT_OQ = 0x11u, //!< Less (Quiet , Ordered).
1849 kVCmpLE_OQ = 0x12u, //!< Less/Equal (Quiet , Ordered).
1850 kVCmpUNORD_S = 0x13u, //!< Unordered (Signaling).
1851 kVCmpNEQ_US = 0x14u, //!< Not Equal (Signaling, Unordered).
1852 kVCmpNLT_UQ = 0x15u, //!< Not Less (Quiet , Unordered).
1853 kVCmpNLE_UQ = 0x16u, //!< Not Less/Equal (Quiet , Unordered).
1854 kVCmpORD_S = 0x17u, //!< Ordered (Signaling).
1855 kVCmpEQ_US = 0x18u, //!< Equal (Signaling, Unordered).
1856 kVCmpNGE_UQ = 0x19u, //!< Not Greater/Equal (Quiet , Unordered).
1857 kVCmpNGT_UQ = 0x1Au, //!< Not Greater (Quiet , Unordered).
1858 kVCmpFALSE_OS = 0x1Bu, //!< False (Signaling, Ordered).
1859 kVCmpNEQ_OS = 0x1Cu, //!< Not Equal (Signaling, Ordered).
1860 kVCmpGE_OQ = 0x1Du, //!< Greater/Equal (Quiet , Ordered).
1861 kVCmpGT_OQ = 0x1Eu, //!< Greater (Quiet , Ordered).
1862 kVCmpTRUE_US = 0x1Fu //!< True (Signaling, Unordered).
1863 };
1864
1865 //! A predicate used by VFIXUPIMM[PD|PS|SD|SS] instructions (AVX-512).
1866 enum VFixupImm : uint32_t {
1867 kVFixupImmZEOnZero = 0x01u,
1868 kVFixupImmIEOnZero = 0x02u,
1869 kVFixupImmZEOnOne = 0x04u,
1870 kVFixupImmIEOnOne = 0x08u,
1871 kVFixupImmIEOnSNaN = 0x10u,
1872 kVFixupImmIEOnNInf = 0x20u,
1873 kVFixupImmIEOnNegative= 0x40u,
1874 kVFixupImmIEOnPInf = 0x80u
1875 };
1876
1877 //! A predicate used by VFPCLASS[PD|PS|SD|SS] instructions (AVX-512).
1878 //!
1879 //! \note Values can be combined together to form the final 8-bit mask.
1880 enum VFPClass : uint32_t {
1881 kVFPClassQNaN = 0x01u, //!< Checks for QNaN.
1882 kVFPClassPZero = 0x02u, //!< Checks for +0.
1883 kVFPClassNZero = 0x04u, //!< Checks for -0.
1884 kVFPClassPInf = 0x08u, //!< Checks for +Inf.
1885 kVFPClassNInf = 0x10u, //!< Checks for -Inf.
1886 kVFPClassDenormal = 0x20u, //!< Checks for denormal.
1887 kVFPClassNegative = 0x40u, //!< Checks for negative finite value.
1888 kVFPClassSNaN = 0x80u //!< Checks for SNaN.
1889 };
1890
1891 //! A predicate used by VGETMANT[PD|PS|SD|SS] instructions (AVX-512).
1892 enum VGetMant : uint32_t {
1893 kVGetMant1To2 = 0x00u,
1894 kVGetMant1Div2To2 = 0x01u,
1895 kVGetMant1Div2To1 = 0x02u,
1896 kVGetMant3Div4To3Div2 = 0x03u,
1897 kVGetMantNoSign = 0x04u,
1898 kVGetMantQNaNIfSign = 0x08u
1899 };
1900
1901 //! A predicate used by VPCMP[U][B|W|D|Q] instructions (AVX-512).
1902 enum VPCmp : uint32_t {
1903 kVPCmpEQ = 0x00u, //!< Equal.
1904 kVPCmpLT = 0x01u, //!< Less.
1905 kVPCmpLE = 0x02u, //!< Less/Equal.
1906 kVPCmpFALSE = 0x03u, //!< False.
1907 kVPCmpNE = 0x04u, //!< Not Equal.
1908 kVPCmpGE = 0x05u, //!< Greater/Equal.
1909 kVPCmpGT = 0x06u, //!< Greater.
1910 kVPCmpTRUE = 0x07u //!< True.
1911 };
1912
1913 //! A predicate used by VPCOM[U][B|W|D|Q] instructions (XOP).
1914 enum VPCom : uint32_t {
1915 kVPComLT = 0x00u, //!< Less.
1916 kVPComLE = 0x01u, //!< Less/Equal
1917 kVPComGT = 0x02u, //!< Greater.
1918 kVPComGE = 0x03u, //!< Greater/Equal.
1919 kVPComEQ = 0x04u, //!< Equal.
1920 kVPComNE = 0x05u, //!< Not Equal.
1921 kVPComFALSE = 0x06u, //!< False.
1922 kVPComTRUE = 0x07u //!< True.
1923 };
1924
1925 //! A predicate used by VRANGE[PD|PS|SD|SS] instructions (AVX-512).
1926 enum VRange : uint32_t {
1927 kVRangeSelectMin = 0x00u, //!< Select minimum value.
1928 kVRangeSelectMax = 0x01u, //!< Select maximum value.
1929 kVRangeSelectAbsMin = 0x02u, //!< Select minimum absolute value.
1930 kVRangeSelectAbsMax = 0x03u, //!< Select maximum absolute value.
1931 kVRangeSignSrc1 = 0x00u, //!< Select sign of SRC1.
1932 kVRangeSignSrc2 = 0x04u, //!< Select sign of SRC2.
1933 kVRangeSign0 = 0x08u, //!< Set sign to 0.
1934 kVRangeSign1 = 0x0Cu //!< Set sign to 1.
1935 };
1936
1937 //! A predicate used by VREDUCE[PD|PS|SD|SS] instructions (AVX-512).
1938 enum VReduce : uint32_t {
1939 kVReduceRoundCurrent = 0x00u, //!< Round to the current mode set.
1940 kVReduceRoundEven = 0x04u, //!< Round to nearest even.
1941 kVReduceRoundDown = 0x05u, //!< Round down.
1942 kVReduceRoundUp = 0x06u, //!< Round up.
1943 kVReduceRoundTrunc = 0x07u, //!< Truncate.
1944 kVReduceSuppress = 0x08u //!< Suppress exceptions.
1945 };
1946
1947 //! Pack a shuffle constant to be used by SSE/AVX/AVX-512 instructions (2 values).
1948 //!
1949 //! \param a Position of the first component [0, 1].
1950 //! \param b Position of the second component [0, 1].
1951 //!
1952 //! Shuffle constants can be used to encode an immediate for these instructions:
1953 //! - `shufpd|vshufpd`
1954 static constexpr uint32_t shuf(uint32_t a, uint32_t b) noexcept {
1955 return (a << 1) | b;
1956 }
1957
1958 //! Pack a shuffle constant to be used by SSE/AVX/AVX-512 instructions (4 values).
1959 //!
1960 //! \param a Position of the first component [0, 3].
1961 //! \param b Position of the second component [0, 3].
1962 //! \param c Position of the third component [0, 3].
1963 //! \param d Position of the fourth component [0, 3].
1964 //!
1965 //! Shuffle constants can be used to encode an immediate for these instructions:
1966 //! - `pshufw`
1967 //! - `pshuflw|vpshuflw`
1968 //! - `pshufhw|vpshufhw`
1969 //! - `pshufd|vpshufd`
1970 //! - `shufps|vshufps`
1971 static constexpr uint32_t shuf(uint32_t a, uint32_t b, uint32_t c, uint32_t d) noexcept {
1972 return (a << 6) | (b << 4) | (c << 2) | d;
1973 }
1974 }
1975
1976 // ============================================================================
1977 // [asmjit::x86::TLog]
1978 // ============================================================================
1979
1980 //! Bitwise ternary logic between 3 operands introduced by AVX-512.
1981 namespace TLog {
1982 //! A predicate that can be used to create a common predicate for VPTERNLOG[D|Q].
1983 enum Operator : uint32_t {
1984 k0 = 0x00u, //!< 0 value.
1985 k1 = 0xFFu, //!< 1 value.
1986 kA = 0xF0u, //!< A value.
1987 kB = 0xCCu, //!< B value.
1988 kC = 0xAAu, //!< C value.
1989 kNotA = kA ^ k1, //!< `!A` expression.
1990 kNotB = kB ^ k1, //!< `!B` expression.
1991 kNotC = kC ^ k1, //!< `!C` expression.
1992
1993 kAB = kA & kB, //!< `A & B` expression.
1994 kAC = kA & kC, //!< `A & C` expression.
1995 kBC = kB & kC, //!< `B & C` expression.
1996 kNotAB = kAB ^ k1, //!< `!(A & B)` expression.
1997 kNotAC = kAC ^ k1, //!< `!(A & C)` expression.
1998 kNotBC = kBC ^ k1, //!< `!(B & C)` expression.
1999
2000 kABC = kAB & kC, //!< `A & B & C` expression.
2001 kNotABC = kABC ^ k1 //!< `!(A & B & C)` expression.
2002 };
2003
2004 //! Creates an immediate that can be used by VPTERNLOG[D|Q] instructions.
2005 static constexpr uint32_t make(uint32_t b000, uint32_t b001, uint32_t b010, uint32_t b011, uint32_t b100, uint32_t b101, uint32_t b110, uint32_t b111) noexcept {
2006 return (b000 << 0) | (b001 << 1) | (b010 << 2) | (b011 << 3) | (b100 << 4) | (b101 << 5) | (b110 << 6) | (b111 << 7);
2007 }
2008
2009 //! Creates an immediate that can be used by VPTERNLOG[D|Q] instructions.
2010 static constexpr uint32_t value(uint32_t x) noexcept { return x & 0xFF; }
2011 //! Negate an immediate that can be used by VPTERNLOG[D|Q] instructions.
2012 static constexpr uint32_t negate(uint32_t x) noexcept { return x ^ 0xFF; }
2013 //! Creates an if/else logic that can be used by VPTERNLOG[D|Q] instructions.
2014 static constexpr uint32_t ifElse(uint32_t condition, uint32_t a, uint32_t b) noexcept { return (condition & a) | (negate(condition) & b); }
2015 }
2016
2017 //! \}
2018
2019 ASMJIT_END_SUB_NAMESPACE
2020
2021 #endif // _ASMJIT_X86_X86GLOBALS_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 // ----------------------------------------------------------------------------
7 // IMPORTANT: AsmJit now uses an external instruction database to populate
8 // static tables within this file. Perform the following steps to regenerate
9 // all tables enclosed by ${...}:
10 //
11 // 1. Install node.js environment <https://nodejs.org>
12 // 2. Go to asmjit/tools directory
13 // 3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
14 // copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
15 // 4. Execute `node tablegen-x86.js`
16 //
17 // Instruction encoding and opcodes were added to the `x86inst.cpp` database
18 // manually in the past and they are not updated by the script as it became
19 // tricky. However, everything else is updated including instruction operands
20 // and tables required to validate them, instruction read/write information
21 // (including registers and flags), and all indexes to all tables.
22 // ----------------------------------------------------------------------------
23
24 #include "../core/api-build_p.h"
25 #ifdef ASMJIT_BUILD_X86
26
27 #include "../core/cpuinfo.h"
28 #include "../core/misc_p.h"
29 #include "../core/support.h"
30 #include "../x86/x86features.h"
31 #include "../x86/x86instapi_p.h"
32 #include "../x86/x86instdb_p.h"
33 #include "../x86/x86opcode_p.h"
34 #include "../x86/x86operand.h"
35
36 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
37
38 // ============================================================================
39 // [asmjit::x86::InstInternal - Text]
40 // ============================================================================
41
42 #ifndef ASMJIT_NO_TEXT
43 Error InstInternal::instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept {
44 ASMJIT_UNUSED(archId);
45
46 if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
47 return DebugUtils::errored(kErrorInvalidInstruction);
48
49 const InstDB::InstInfo& info = InstDB::infoById(instId);
50 return output.appendString(InstDB::_nameData + info._nameDataIndex);
51 }
52
53 uint32_t InstInternal::stringToInstId(uint32_t archId, const char* s, size_t len) noexcept {
54 ASMJIT_UNUSED(archId);
55
56 if (ASMJIT_UNLIKELY(!s))
57 return Inst::kIdNone;
58
59 if (len == SIZE_MAX)
60 len = strlen(s);
61
62 if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
63 return Inst::kIdNone;
64
65 uint32_t prefix = uint32_t(s[0]) - 'a';
66 if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
67 return Inst::kIdNone;
68
69 uint32_t index = InstDB::instNameIndex[prefix].start;
70 if (ASMJIT_UNLIKELY(!index))
71 return Inst::kIdNone;
72
73 const char* nameData = InstDB::_nameData;
74 const InstDB::InstInfo* table = InstDB::_instInfoTable;
75
76 const InstDB::InstInfo* base = table + index;
77 const InstDB::InstInfo* end = table + InstDB::instNameIndex[prefix].end;
78
79 for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
80 const InstDB::InstInfo* cur = base + (lim >> 1);
81 int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
82
83 if (result < 0) {
84 base = cur + 1;
85 lim--;
86 continue;
87 }
88
89 if (result > 0)
90 continue;
91
92 return uint32_t((size_t)(cur - table));
93 }
94
95 return Inst::kIdNone;
96 }
97 #endif // !ASMJIT_NO_TEXT
98
99 // ============================================================================
100 // [asmjit::x86::InstInternal - Validate]
101 // ============================================================================
102
103 #ifndef ASMJIT_NO_VALIDATION
104 struct X86ValidationData {
105 //! Allowed registers by reg-type (x86::Reg::kType...).
106 uint32_t allowedRegMask[Reg::kTypeMax + 1];
107 uint32_t allowedMemBaseRegs;
108 uint32_t allowedMemIndexRegs;
109 };
110
111 #define VALUE(X) \
112 (X == Reg::kTypeGpbLo) ? InstDB::kOpGpbLo : \
113 (X == Reg::kTypeGpbHi) ? InstDB::kOpGpbHi : \
114 (X == Reg::kTypeGpw ) ? InstDB::kOpGpw : \
115 (X == Reg::kTypeGpd ) ? InstDB::kOpGpd : \
116 (X == Reg::kTypeGpq ) ? InstDB::kOpGpq : \
117 (X == Reg::kTypeXmm ) ? InstDB::kOpXmm : \
118 (X == Reg::kTypeYmm ) ? InstDB::kOpYmm : \
119 (X == Reg::kTypeZmm ) ? InstDB::kOpZmm : \
120 (X == Reg::kTypeMm ) ? InstDB::kOpMm : \
121 (X == Reg::kTypeKReg ) ? InstDB::kOpKReg : \
122 (X == Reg::kTypeSReg ) ? InstDB::kOpSReg : \
123 (X == Reg::kTypeCReg ) ? InstDB::kOpCReg : \
124 (X == Reg::kTypeDReg ) ? InstDB::kOpDReg : \
125 (X == Reg::kTypeSt ) ? InstDB::kOpSt : \
126 (X == Reg::kTypeBnd ) ? InstDB::kOpBnd : \
127 (X == Reg::kTypeRip ) ? InstDB::kOpNone : InstDB::kOpNone
128 static const uint32_t _x86OpFlagFromRegType[Reg::kTypeMax + 1] = { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) };
129 #undef VALUE
130
131 #define REG_MASK_FROM_REG_TYPE_X86(X) \
132 (X == Reg::kTypeGpbLo) ? 0x0000000Fu : \
133 (X == Reg::kTypeGpbHi) ? 0x0000000Fu : \
134 (X == Reg::kTypeGpw ) ? 0x000000FFu : \
135 (X == Reg::kTypeGpd ) ? 0x000000FFu : \
136 (X == Reg::kTypeGpq ) ? 0x000000FFu : \
137 (X == Reg::kTypeXmm ) ? 0x000000FFu : \
138 (X == Reg::kTypeYmm ) ? 0x000000FFu : \
139 (X == Reg::kTypeZmm ) ? 0x000000FFu : \
140 (X == Reg::kTypeMm ) ? 0x000000FFu : \
141 (X == Reg::kTypeKReg ) ? 0x000000FFu : \
142 (X == Reg::kTypeSReg ) ? 0x0000007Eu : \
143 (X == Reg::kTypeCReg ) ? 0x0000FFFFu : \
144 (X == Reg::kTypeDReg ) ? 0x000000FFu : \
145 (X == Reg::kTypeSt ) ? 0x000000FFu : \
146 (X == Reg::kTypeBnd ) ? 0x0000000Fu : \
147 (X == Reg::kTypeRip ) ? 0x00000001u : 0u
148
149 #define REG_MASK_FROM_REG_TYPE_X64(X) \
150 (X == Reg::kTypeGpbLo) ? 0x0000FFFFu : \
151 (X == Reg::kTypeGpbHi) ? 0x0000000Fu : \
152 (X == Reg::kTypeGpw ) ? 0x0000FFFFu : \
153 (X == Reg::kTypeGpd ) ? 0x0000FFFFu : \
154 (X == Reg::kTypeGpq ) ? 0x0000FFFFu : \
155 (X == Reg::kTypeXmm ) ? 0xFFFFFFFFu : \
156 (X == Reg::kTypeYmm ) ? 0xFFFFFFFFu : \
157 (X == Reg::kTypeZmm ) ? 0xFFFFFFFFu : \
158 (X == Reg::kTypeMm ) ? 0x000000FFu : \
159 (X == Reg::kTypeKReg ) ? 0x000000FFu : \
160 (X == Reg::kTypeSReg ) ? 0x0000007Eu : \
161 (X == Reg::kTypeCReg ) ? 0x0000FFFFu : \
162 (X == Reg::kTypeDReg ) ? 0x0000FFFFu : \
163 (X == Reg::kTypeSt ) ? 0x000000FFu : \
164 (X == Reg::kTypeBnd ) ? 0x0000000Fu : \
165 (X == Reg::kTypeRip ) ? 0x00000001u : 0u
166
167 static const X86ValidationData _x86ValidationData = {
168 { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X86, 0) },
169 (1u << Reg::kTypeGpw) | (1u << Reg::kTypeGpd) | (1u << Reg::kTypeRip) | (1u << Label::kLabelTag),
170 (1u << Reg::kTypeGpw) | (1u << Reg::kTypeGpd) | (1u << Reg::kTypeXmm) | (1u << Reg::kTypeYmm) | (1u << Reg::kTypeZmm)
171 };
172
173 static const X86ValidationData _x64ValidationData = {
174 { ASMJIT_LOOKUP_TABLE_32(REG_MASK_FROM_REG_TYPE_X64, 0) },
175 (1u << Reg::kTypeGpd) | (1u << Reg::kTypeGpq) | (1u << Reg::kTypeRip) | (1u << Label::kLabelTag),
176 (1u << Reg::kTypeGpd) | (1u << Reg::kTypeGpq) | (1u << Reg::kTypeXmm) | (1u << Reg::kTypeYmm) | (1u << Reg::kTypeZmm)
177 };
178
179 #undef REG_MASK_FROM_REG_TYPE_X64
180 #undef REG_MASK_FROM_REG_TYPE_X86
181
182 static ASMJIT_INLINE bool x86IsZmmOrM512(const Operand_& op) noexcept {
183 return Reg::isZmm(op) || (op.isMem() && op.size() == 64);
184 }
185
186 static ASMJIT_INLINE bool x86CheckOSig(const InstDB::OpSignature& op, const InstDB::OpSignature& ref, bool& immOutOfRange) noexcept {
187 // Fail if operand types are incompatible.
188 uint32_t opFlags = op.opFlags;
189 if ((opFlags & ref.opFlags) == 0) {
190 // Mark temporarily `immOutOfRange` so we can return a more descriptive error later.
191 if ((opFlags & InstDB::kOpAllImm) && (ref.opFlags & InstDB::kOpAllImm)) {
192 immOutOfRange = true;
193 return true;
194 }
195
196 return false;
197 }
198
199 // Fail if memory specific flags and sizes do not match the signature.
200 uint32_t opMemFlags = op.memFlags;
201 if (opMemFlags != 0) {
202 uint32_t refMemFlags = ref.memFlags;
203 if ((refMemFlags & opMemFlags) == 0)
204 return false;
205
206 if ((refMemFlags & InstDB::kMemOpBaseOnly) && !(opMemFlags & InstDB::kMemOpBaseOnly))
207 return false;
208 }
209
210 // Specific register index.
211 if (opFlags & InstDB::kOpAllRegs) {
212 uint32_t refRegMask = ref.regMask;
213 if (refRegMask && !(op.regMask & refRegMask))
214 return false;
215 }
216
217 return true;
218 }
219
220 ASMJIT_FAVOR_SIZE Error InstInternal::validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept {
221 // Only called when `archId` matches X86 family.
222 ASMJIT_ASSERT(ArchInfo::isX86Family(archId));
223
224 const X86ValidationData* vd;
225 if (archId == ArchInfo::kIdX86)
226 vd = &_x86ValidationData;
227 else
228 vd = &_x64ValidationData;
229
230 uint32_t i;
231 uint32_t mode = InstDB::modeFromArchId(archId);
232
233 // Get the instruction data.
234 uint32_t instId = inst.id();
235 uint32_t options = inst.options();
236
237 if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
238 return DebugUtils::errored(kErrorInvalidInstruction);
239
240 const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
241 const InstDB::CommonInfo& commonInfo = instInfo.commonInfo();
242
243 uint32_t iFlags = instInfo.flags();
244
245 // --------------------------------------------------------------------------
246 // [Validate LOCK|XACQUIRE|XRELEASE]
247 // --------------------------------------------------------------------------
248
249 const uint32_t kLockXAcqRel = Inst::kOptionXAcquire | Inst::kOptionXRelease;
250 if (options & (Inst::kOptionLock | kLockXAcqRel)) {
251 if (options & Inst::kOptionLock) {
252 if (ASMJIT_UNLIKELY(!(iFlags & InstDB::kFlagLock) && !(options & kLockXAcqRel)))
253 return DebugUtils::errored(kErrorInvalidLockPrefix);
254
255 if (ASMJIT_UNLIKELY(opCount < 1 || !operands[0].isMem()))
256 return DebugUtils::errored(kErrorInvalidLockPrefix);
257 }
258
259 if (options & kLockXAcqRel) {
260 if (ASMJIT_UNLIKELY(!(options & Inst::kOptionLock) || (options & kLockXAcqRel) == kLockXAcqRel))
261 return DebugUtils::errored(kErrorInvalidPrefixCombination);
262
263 if (ASMJIT_UNLIKELY((options & Inst::kOptionXAcquire) && !(iFlags & InstDB::kFlagXAcquire)))
264 return DebugUtils::errored(kErrorInvalidXAcquirePrefix);
265
266 if (ASMJIT_UNLIKELY((options & Inst::kOptionXRelease) && !(iFlags & InstDB::kFlagXRelease)))
267 return DebugUtils::errored(kErrorInvalidXReleasePrefix);
268 }
269 }
270
271 // Validate REP and REPNE prefixes.
272 const uint32_t kRepAny = Inst::kOptionRep | Inst::kOptionRepne;
273 if (options & kRepAny) {
274 if (ASMJIT_UNLIKELY((options & kRepAny) == kRepAny))
275 return DebugUtils::errored(kErrorInvalidPrefixCombination);
276
277 if (ASMJIT_UNLIKELY(!(iFlags & InstDB::kFlagRep)))
278 return DebugUtils::errored(kErrorInvalidRepPrefix);
279 }
280
281 // --------------------------------------------------------------------------
282 // [Translate Each Operand to the Corresponding OpSignature]
283 // --------------------------------------------------------------------------
284
285 InstDB::OpSignature oSigTranslated[Globals::kMaxOpCount];
286 uint32_t combinedOpFlags = 0;
287 uint32_t combinedRegMask = 0;
288 const Mem* memOp = nullptr;
289
290 for (i = 0; i < opCount; i++) {
291 const Operand_& op = operands[i];
292 if (op.opType() == Operand::kOpNone)
293 break;
294
295 uint32_t opFlags = 0;
296 uint32_t memFlags = 0;
297 uint32_t regMask = 0;
298
299 switch (op.opType()) {
300 case Operand::kOpReg: {
301 uint32_t regType = op.as<BaseReg>().type();
302 if (ASMJIT_UNLIKELY(regType >= Reg::kTypeCount))
303 return DebugUtils::errored(kErrorInvalidRegType);
304
305 opFlags = _x86OpFlagFromRegType[regType];
306 if (ASMJIT_UNLIKELY(opFlags == 0))
307 return DebugUtils::errored(kErrorInvalidRegType);
308
309 // If `regId` is equal or greater than Operand::kVirtIdMin it means
310 // that the register is virtual and its index will be assigned later
311 // by the register allocator. We must pass unless asked to disallow
312 // virtual registers.
313 // TODO: We need an option to refuse virtual regs here.
314 uint32_t regId = op.id();
315 if (regId < Operand::kVirtIdMin) {
316 if (ASMJIT_UNLIKELY(regId >= 32))
317 return DebugUtils::errored(kErrorInvalidPhysId);
318
319 if (ASMJIT_UNLIKELY(Support::bitTest(vd->allowedRegMask[regType], regId) == 0))
320 return DebugUtils::errored(kErrorInvalidPhysId);
321
322 regMask = Support::bitMask(regId);
323 combinedRegMask |= regMask;
324 }
325 else {
326 regMask = 0xFFFFFFFFu;
327 }
328 break;
329 }
330
331 // TODO: Validate base and index and combine these with `combinedRegMask`.
332 case Operand::kOpMem: {
333 const Mem& m = op.as<Mem>();
334 memOp = &m;
335
336 uint32_t memSize = m.size();
337 uint32_t baseType = m.baseType();
338 uint32_t indexType = m.indexType();
339
340 if (m.segmentId() > 6)
341 return DebugUtils::errored(kErrorInvalidSegment);
342
343 // Validate AVX-512 broadcast {1tox}.
344 if (m.hasBroadcast()) {
345 if (memSize != 0) {
346 // If the size is specified it has to match the broadcast size.
347 if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B32() && memSize != 4))
348 return DebugUtils::errored(kErrorInvalidBroadcast);
349
350 if (ASMJIT_UNLIKELY(commonInfo.hasAvx512B64() && memSize != 8))
351 return DebugUtils::errored(kErrorInvalidBroadcast);
352 }
353 else {
354 // If there is no size we implicitly calculate it so we can validate N in {1toN} properly.
355 memSize = commonInfo.hasAvx512B32() ? 4 : 8;
356 }
357
358 memSize <<= m.getBroadcast();
359 }
360
361 if (baseType) {
362 uint32_t baseId = m.baseId();
363
364 if (m.isRegHome()) {
365 // Home address of a virtual register. In such case we don't want to
366 // validate the type of the base register as it will always be patched
367 // to ESP|RSP.
368 }
369 else {
370 if (ASMJIT_UNLIKELY((vd->allowedMemBaseRegs & (1u << baseType)) == 0))
371 return DebugUtils::errored(kErrorInvalidAddress);
372 }
373
374 // Create information that will be validated only if this is an implicit
375 // memory operand. Basically only usable for string instructions and other
376 // instructions where memory operand is implicit and has 'seg:[reg]' form.
377 if (baseId < Operand::kVirtIdMin) {
378 // Physical base id.
379 regMask = Support::bitMask(baseId);
380 combinedRegMask |= regMask;
381 }
382 else {
383 // Virtual base id - fill the whole mask for implicit mem validation.
384 // The register is not assigned yet, so we cannot predict the phys id.
385 regMask = 0xFFFFFFFFu;
386 }
387
388 if (!indexType && !m.offsetLo32())
389 memFlags |= InstDB::kMemOpBaseOnly;
390 }
391 else {
392 // Base is a 64-bit address.
393 int64_t offset = m.offset();
394 if (!Support::isInt32(offset)) {
395 if (mode == InstDB::kModeX86) {
396 // 32-bit mode: Make sure that the address is either `int32_t` or `uint32_t`.
397 if (!Support::isUInt32(offset))
398 return DebugUtils::errored(kErrorInvalidAddress64Bit);
399 }
400 else {
401 // 64-bit mode: Zero extension is allowed if the address has 32-bit index
402 // register or the address has no index register (it's still encodable).
403 if (indexType) {
404 if (!Support::isUInt32(offset))
405 return DebugUtils::errored(kErrorInvalidAddress64Bit);
406
407 if (indexType != Reg::kTypeGpd)
408 return DebugUtils::errored(kErrorInvalidAddress64BitZeroExtension);
409 }
410 else {
411 // We don't validate absolute 64-bit addresses without an index register
412 // as this also depends on the target's base address. We don't have the
413 // information to do it at this moment.
414 }
415 }
416 }
417 }
418
419 if (indexType) {
420 if (ASMJIT_UNLIKELY((vd->allowedMemIndexRegs & (1u << indexType)) == 0))
421 return DebugUtils::errored(kErrorInvalidAddress);
422
423 if (indexType == Reg::kTypeXmm) {
424 opFlags |= InstDB::kOpVm;
425 memFlags |= InstDB::kMemOpVm32x | InstDB::kMemOpVm64x;
426 }
427 else if (indexType == Reg::kTypeYmm) {
428 opFlags |= InstDB::kOpVm;
429 memFlags |= InstDB::kMemOpVm32y | InstDB::kMemOpVm64y;
430 }
431 else if (indexType == Reg::kTypeZmm) {
432 opFlags |= InstDB::kOpVm;
433 memFlags |= InstDB::kMemOpVm32z | InstDB::kMemOpVm64z;
434 }
435 else {
436 opFlags |= InstDB::kOpMem;
437 if (baseType)
438 memFlags |= InstDB::kMemOpMib;
439 }
440
441 // [RIP + {XMM|YMM|ZMM}] is not allowed.
442 if (baseType == Reg::kTypeRip && (opFlags & InstDB::kOpVm))
443 return DebugUtils::errored(kErrorInvalidAddress);
444
445 uint32_t indexId = m.indexId();
446 if (indexId < Operand::kVirtIdMin)
447 combinedRegMask |= Support::bitMask(indexId);
448
449 // Only used for implicit memory operands having 'seg:[reg]' form, so clear it.
450 regMask = 0;
451 }
452 else {
453 opFlags |= InstDB::kOpMem;
454 }
455
456 switch (memSize) {
457 case 0: memFlags |= InstDB::kMemOpAny ; break;
458 case 1: memFlags |= InstDB::kMemOpM8 ; break;
459 case 2: memFlags |= InstDB::kMemOpM16 ; break;
460 case 4: memFlags |= InstDB::kMemOpM32 ; break;
461 case 6: memFlags |= InstDB::kMemOpM48 ; break;
462 case 8: memFlags |= InstDB::kMemOpM64 ; break;
463 case 10: memFlags |= InstDB::kMemOpM80 ; break;
464 case 16: memFlags |= InstDB::kMemOpM128; break;
465 case 32: memFlags |= InstDB::kMemOpM256; break;
466 case 64: memFlags |= InstDB::kMemOpM512; break;
467 default:
468 return DebugUtils::errored(kErrorInvalidOperandSize);
469 }
470
471 break;
472 }
473
474 case Operand::kOpImm: {
475 uint64_t immValue = op.as<Imm>().u64();
476 uint32_t immFlags = 0;
477
478 if (int64_t(immValue) >= 0) {
479 if (immValue <= 0x7u)
480 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
481 InstDB::kOpI16 | InstDB::kOpU16 | InstDB::kOpI8 | InstDB::kOpU8 |
482 InstDB::kOpI4 | InstDB::kOpU4 ;
483 else if (immValue <= 0xFu)
484 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
485 InstDB::kOpI16 | InstDB::kOpU16 | InstDB::kOpI8 | InstDB::kOpU8 |
486 InstDB::kOpU4 ;
487 else if (immValue <= 0x7Fu)
488 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
489 InstDB::kOpI16 | InstDB::kOpU16 | InstDB::kOpI8 | InstDB::kOpU8 ;
490 else if (immValue <= 0xFFu)
491 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
492 InstDB::kOpI16 | InstDB::kOpU16 | InstDB::kOpU8 ;
493 else if (immValue <= 0x7FFFu)
494 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
495 InstDB::kOpI16 | InstDB::kOpU16 ;
496 else if (immValue <= 0xFFFFu)
497 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32 |
498 InstDB::kOpU16 ;
499 else if (immValue <= 0x7FFFFFFFu)
500 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpI32 | InstDB::kOpU32;
501 else if (immValue <= 0xFFFFFFFFu)
502 immFlags = InstDB::kOpI64 | InstDB::kOpU64 | InstDB::kOpU32;
503 else if (immValue <= 0x7FFFFFFFFFFFFFFFu)
504 immFlags = InstDB::kOpI64 | InstDB::kOpU64;
505 else
506 immFlags = InstDB::kOpU64;
507 }
508 else {
509 immValue = Support::neg(immValue);
510 if (immValue <= 0x8u)
511 immFlags = InstDB::kOpI64 | InstDB::kOpI32 | InstDB::kOpI16 | InstDB::kOpI8 | InstDB::kOpI4;
512 else if (immValue <= 0x80u)
513 immFlags = InstDB::kOpI64 | InstDB::kOpI32 | InstDB::kOpI16 | InstDB::kOpI8;
514 else if (immValue <= 0x8000u)
515 immFlags = InstDB::kOpI64 | InstDB::kOpI32 | InstDB::kOpI16;
516 else if (immValue <= 0x80000000u)
517 immFlags = InstDB::kOpI64 | InstDB::kOpI32;
518 else
519 immFlags = InstDB::kOpI64;
520 }
521 opFlags |= immFlags;
522 break;
523 }
524
525 case Operand::kOpLabel: {
526 opFlags |= InstDB::kOpRel8 | InstDB::kOpRel32;
527 break;
528 }
529
530 default:
531 return DebugUtils::errored(kErrorInvalidState);
532 }
533
534 InstDB::OpSignature& oSigDst = oSigTranslated[i];
535 oSigDst.opFlags = opFlags;
536 oSigDst.memFlags = uint16_t(memFlags);
537 oSigDst.regMask = uint8_t(regMask & 0xFFu);
538 combinedOpFlags |= opFlags;
539 }
540
541 // Decrease the number of operands of those that are none. This is important
542 // as Assembler and Compiler may just pass more operands padded with none
543 // (which means that no operand is given at that index). However, validate
544 // that there are no gaps (like [reg, none, reg] or [none, reg]).
545 if (i < opCount) {
546 while (--opCount > i)
547 if (ASMJIT_UNLIKELY(!operands[opCount].isNone()))
548 return DebugUtils::errored(kErrorInvalidInstruction);
549 }
550
551 // Validate X86 and X64 specific cases.
552 if (mode == InstDB::kModeX86) {
553 // Illegal use of 64-bit register in 32-bit mode.
554 if (ASMJIT_UNLIKELY((combinedOpFlags & InstDB::kOpGpq) != 0))
555 return DebugUtils::errored(kErrorInvalidUseOfGpq);
556 }
557 else {
558 // Illegal use of a high 8-bit register with REX prefix.
559 if (ASMJIT_UNLIKELY((combinedOpFlags & InstDB::kOpGpbHi) != 0 && (combinedRegMask & 0xFFFFFF00u) != 0))
560 return DebugUtils::errored(kErrorInvalidUseOfGpbHi);
561 }
562
563 // --------------------------------------------------------------------------
564 // [Validate Instruction Signature by Comparing Against All `iSig` Rows]
565 // --------------------------------------------------------------------------
566
567 const InstDB::InstSignature* iSig = InstDB::_instSignatureTable + commonInfo._iSignatureIndex;
568 const InstDB::InstSignature* iEnd = iSig + commonInfo._iSignatureCount;
569
570 if (iSig != iEnd) {
571 const InstDB::OpSignature* opSignatureTable = InstDB::_opSignatureTable;
572
573 // If set it means that we matched a signature where only immediate value
574 // was out of bounds. We can return a more descriptive error if we know this.
575 bool globalImmOutOfRange = false;
576
577 do {
578 // Check if the architecture is compatible.
579 if ((iSig->modes & mode) == 0)
580 continue;
581
582 // Compare the operands table with reference operands.
583 uint32_t j = 0;
584 uint32_t iSigCount = iSig->opCount;
585 bool localImmOutOfRange = false;
586
587 if (iSigCount == opCount) {
588 for (j = 0; j < opCount; j++)
589 if (!x86CheckOSig(oSigTranslated[j], opSignatureTable[iSig->operands[j]], localImmOutOfRange))
590 break;
591 }
592 else if (iSigCount - iSig->implicit == opCount) {
593 uint32_t r = 0;
594 for (j = 0; j < opCount && r < iSigCount; j++, r++) {
595 const InstDB::OpSignature* oChk = oSigTranslated + j;
596 const InstDB::OpSignature* oRef;
597 Next:
598 oRef = opSignatureTable + iSig->operands[r];
599 // Skip implicit.
600 if ((oRef->opFlags & InstDB::kOpImplicit) != 0) {
601 if (++r >= iSigCount)
602 break;
603 else
604 goto Next;
605 }
606
607 if (!x86CheckOSig(*oChk, *oRef, localImmOutOfRange))
608 break;
609 }
610 }
611
612 if (j == opCount) {
613 if (!localImmOutOfRange) {
614 // Match, must clear possible `globalImmOutOfRange`.
615 globalImmOutOfRange = false;
616 break;
617 }
618 globalImmOutOfRange = localImmOutOfRange;
619 }
620 } while (++iSig != iEnd);
621
622 if (iSig == iEnd) {
623 if (globalImmOutOfRange)
624 return DebugUtils::errored(kErrorInvalidImmediate);
625 else
626 return DebugUtils::errored(kErrorInvalidInstruction);
627 }
628 }
629
630 // --------------------------------------------------------------------------
631 // [Validate AVX512 Options]
632 // --------------------------------------------------------------------------
633
634 const RegOnly& extraReg = inst.extraReg();
635 const uint32_t kAvx512Options = Inst::kOptionZMask |
636 Inst::kOptionER |
637 Inst::kOptionSAE ;
638
639 if (options & kAvx512Options) {
640 if (commonInfo.hasFlag(InstDB::kFlagEvex)) {
641 // Validate AVX-512 {z}.
642 if ((options & Inst::kOptionZMask)) {
643 if (ASMJIT_UNLIKELY((options & Inst::kOptionZMask) != 0 && !commonInfo.hasAvx512Z()))
644 return DebugUtils::errored(kErrorInvalidKZeroUse);
645 }
646
647 // Validate AVX-512 {sae} and {er}.
648 if (options & (Inst::kOptionSAE | Inst::kOptionER)) {
649 // Rounding control is impossible if the instruction is not reg-to-reg.
650 if (ASMJIT_UNLIKELY(memOp))
651 return DebugUtils::errored(kErrorInvalidEROrSAE);
652
653 // Check if {sae} or {er} is supported by the instruction.
654 if (options & Inst::kOptionER) {
655 // NOTE: if both {sae} and {er} are set, we don't care, as {sae} is implied.
656 if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512ER()))
657 return DebugUtils::errored(kErrorInvalidEROrSAE);
658 }
659 else {
660 if (ASMJIT_UNLIKELY(!commonInfo.hasAvx512SAE()))
661 return DebugUtils::errored(kErrorInvalidEROrSAE);
662 }
663
664 // {sae} and {er} are defined for either scalar ops or vector ops that
665 // require LL to be 10 (512-bit vector operations). We don't need any
666 // more bits in the instruction database to be able to validate this, as
667 // each AVX512 instruction that has broadcast is vector instruction (in
668 // this case we require zmm registers), otherwise it's a scalar instruction,
669 // which is valid.
670 if (commonInfo.hasAvx512B()) {
671 // Supports broadcast, thus we require LL to be '10', which means there
672 // have to be ZMM registers used. We don't calculate LL here, but we know
673 // that it would be '10' if there is at least one ZMM register used.
674
675 // There is no {er}/{sae}-enabled instruction with less than two operands.
676 ASMJIT_ASSERT(opCount >= 2);
677 if (ASMJIT_UNLIKELY(!x86IsZmmOrM512(operands[0]) && !x86IsZmmOrM512(operands[1])))
678 return DebugUtils::errored(kErrorInvalidEROrSAE);
679 }
680 }
681 }
682 else {
683 // Not AVX512 instruction - maybe OpExtra is xCX register used by REP/REPNE
684 // prefix. Otherwise the instruction is invalid.
685 if ((options & kAvx512Options) || (options & kRepAny) == 0)
686 return DebugUtils::errored(kErrorInvalidInstruction);
687 }
688 }
689
690 // --------------------------------------------------------------------------
691 // [Validate {Extra} Register]
692 // --------------------------------------------------------------------------
693
694 if (extraReg.isReg()) {
695 if (options & kRepAny) {
696 // Validate REP|REPNE {cx|ecx|rcx}.
697 if (ASMJIT_UNLIKELY(iFlags & InstDB::kFlagRepIgnored))
698 return DebugUtils::errored(kErrorInvalidExtraReg);
699
700 if (extraReg.isPhysReg()) {
701 if (ASMJIT_UNLIKELY(extraReg.id() != Gp::kIdCx))
702 return DebugUtils::errored(kErrorInvalidExtraReg);
703 }
704
705 // The type of the {...} register must match the type of the base register
706 // of memory operand. So if the memory operand uses 32-bit register the
707 // count register must also be 32-bit, etc...
708 if (ASMJIT_UNLIKELY(!memOp || extraReg.type() != memOp->baseType()))
709 return DebugUtils::errored(kErrorInvalidExtraReg);
710 }
711 else if (commonInfo.hasFlag(InstDB::kFlagEvex)) {
712 // Validate AVX-512 {k}.
713 if (ASMJIT_UNLIKELY(extraReg.type() != Reg::kTypeKReg))
714 return DebugUtils::errored(kErrorInvalidExtraReg);
715
716 if (ASMJIT_UNLIKELY(extraReg.id() == 0 || !commonInfo.hasAvx512K()))
717 return DebugUtils::errored(kErrorInvalidKMaskUse);
718 }
719 else {
720 return DebugUtils::errored(kErrorInvalidExtraReg);
721 }
722 }
723
724 return kErrorOk;
725 }
726 #endif // !ASMJIT_NO_VALIDATION
727
728 // ============================================================================
729 // [asmjit::x86::InstInternal - QueryRWInfo]
730 // ============================================================================
731
732 #ifndef ASMJIT_NO_INTROSPECTION
733 static const uint64_t rwRegGroupByteMask[Reg::kGroupCount] = {
734 0x00000000000000FFu, // GP.
735 0xFFFFFFFFFFFFFFFFu, // XMM|YMM|ZMM.
736 0x00000000000000FFu, // MM.
737 0x00000000000000FFu, // KReg.
738 0x0000000000000003u, // SReg.
739 0x00000000000000FFu, // CReg.
740 0x00000000000000FFu, // DReg.
741 0x00000000000003FFu, // St().
742 0x000000000000FFFFu, // BND.
743 0x00000000000000FFu // RIP.
744 };
745
746 // TODO: Make universal.
747 static ASMJIT_INLINE uint32_t gpRegSizeByArchId(uint32_t archId) noexcept {
748 static const uint8_t table[] = { 0, 4, 8, 4, 8 };
749 return table[archId];
750 }
751
752 static ASMJIT_INLINE void rwZeroExtendGp(OpRWInfo& opRwInfo, const Gp& reg, uint32_t nativeGpSize) noexcept {
753 ASMJIT_ASSERT(BaseReg::isGp(reg.as<Operand>()));
754 if (reg.size() + 4 == nativeGpSize) {
755 opRwInfo.addOpFlags(OpRWInfo::kZExt);
756 opRwInfo.setExtendByteMask(~opRwInfo.writeByteMask() & 0xFFu);
757 }
758 }
759
760 static ASMJIT_INLINE void rwZeroExtendAvxVec(OpRWInfo& opRwInfo, const Vec& reg) noexcept {
761 ASMJIT_UNUSED(reg);
762
763 uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask());
764 if (msk) {
765 opRwInfo.addOpFlags(OpRWInfo::kZExt);
766 opRwInfo.setExtendByteMask(msk);
767 }
768 }
769
770 static ASMJIT_INLINE void rwZeroExtendNonVec(OpRWInfo& opRwInfo, const Reg& reg) noexcept {
771 uint64_t msk = ~Support::fillTrailingBits(opRwInfo.writeByteMask()) & rwRegGroupByteMask[reg.group()];
772 if (msk) {
773 opRwInfo.addOpFlags(OpRWInfo::kZExt);
774 opRwInfo.setExtendByteMask(msk);
775 }
776 }
777
778 Error InstInternal::queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept {
779 using namespace Status;
780
781 // Only called when `archId` matches X86 family.
782 ASMJIT_ASSERT(ArchInfo::isX86Family(archId));
783
784 // Get the instruction data.
785 uint32_t instId = inst.id();
786 if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
787 return DebugUtils::errored(kErrorInvalidInstruction);
788
789 // Read/Write flags.
790 const InstDB::CommonInfoTableB& tabB = InstDB::_commonInfoTableB[InstDB::_instInfoTable[instId]._commonInfoIndexB];
791 const InstDB::RWFlagsInfoTable& rwFlags = InstDB::_rwFlagsInfoTable[tabB._rwFlagsIndex];
792
793 // Each RWInfo contains two indexes
794 // [0] - OpCount == 2
795 // [1] - OpCount != 2
796 // They are used this way as there are instructions that have 2 and 3
797 // operand overloads that use different semantics. So instead of adding
798 // more special cases we just separated their data tables.
799 const InstDB::RWInfo& instRwInfo = InstDB::rwInfo[InstDB::rwInfoIndex[instId * 2u + uint32_t(opCount != 2)]];
800 const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
801
802 out._instFlags = 0;
803 out._opCount = uint8_t(opCount);
804 out._rmFeature = instRmInfo.rmFeature;
805 out._extraReg.reset();
806 out._readFlags = rwFlags.readFlags;
807 out._writeFlags = rwFlags.writeFlags;
808
809 uint32_t nativeGpSize = gpRegSizeByArchId(archId);
810
811 constexpr uint32_t R = OpRWInfo::kRead;
812 constexpr uint32_t W = OpRWInfo::kWrite;
813 constexpr uint32_t X = OpRWInfo::kRW;
814 constexpr uint32_t RegM = OpRWInfo::kRegMem;
815 constexpr uint32_t RegPhys = OpRWInfo::kRegPhysId;
816 constexpr uint32_t MibRead = OpRWInfo::kMemBaseRead | OpRWInfo::kMemIndexRead;
817
818 if (ASMJIT_LIKELY(instRwInfo.category == InstDB::RWInfo::kCategoryGeneric)) {
819 uint32_t i;
820 uint32_t rmOpsMask = 0;
821 uint32_t rmMaxSize = 0;
822
823 for (i = 0; i < opCount; i++) {
824 OpRWInfo& op = out._operands[i];
825 const Operand_& srcOp = operands[i];
826 const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
827
828 if (!srcOp.isRegOrMem()) {
829 op.reset();
830 continue;
831 }
832
833 op._opFlags = rwOpData.flags & ~(OpRWInfo::kZExt);
834 op._physId = rwOpData.physId;
835 op._rmSize = 0;
836 op._resetReserved();
837
838 uint64_t rByteMask = rwOpData.rByteMask;
839 uint64_t wByteMask = rwOpData.wByteMask;
840
841 if (op.isRead() && !rByteMask) rByteMask = Support::lsbMask<uint64_t>(srcOp.size());
842 if (op.isWrite() && !wByteMask) wByteMask = Support::lsbMask<uint64_t>(srcOp.size());
843
844 op._readByteMask = rByteMask;
845 op._writeByteMask = wByteMask;
846 op._extendByteMask = 0;
847
848 if (srcOp.isReg()) {
849 // Zero extension.
850 if (op.isWrite()) {
851 if (srcOp.as<Reg>().isGp()) {
852 // GP registers on X64 are special:
853 // - 8-bit and 16-bit writes aren't zero extended.
854 // - 32-bit writes ARE zero extended.
855 rwZeroExtendGp(op, srcOp.as<Gp>(), nativeGpSize);
856 }
857 else if (rwOpData.flags & OpRWInfo::kZExt) {
858 // Otherwise follow ZExt.
859 rwZeroExtendNonVec(op, srcOp.as<Gp>());
860 }
861 }
862
863 // Aggregate values required to calculate valid Reg/M info.
864 rmMaxSize = Support::max(rmMaxSize, srcOp.size());
865 rmOpsMask |= Support::bitMask<uint32_t>(i);
866 }
867 else {
868 op.addOpFlags(MibRead);
869 }
870 }
871
872 rmOpsMask &= instRmInfo.rmOpsMask;
873 if (rmOpsMask) {
874 Support::BitWordIterator<uint32_t> it(rmOpsMask);
875 do {
876 i = it.next();
877
878 OpRWInfo& op = out._operands[i];
879 op.addOpFlags(RegM);
880
881 switch (instRmInfo.category) {
882 case InstDB::RWInfoRm::kCategoryFixed:
883 op.setRmSize(instRmInfo.fixedSize);
884 break;
885 case InstDB::RWInfoRm::kCategoryConsistent:
886 op.setRmSize(operands[i].size());
887 break;
888 case InstDB::RWInfoRm::kCategoryHalf:
889 op.setRmSize(rmMaxSize / 2u);
890 break;
891 case InstDB::RWInfoRm::kCategoryQuarter:
892 op.setRmSize(rmMaxSize / 4u);
893 break;
894 case InstDB::RWInfoRm::kCategoryEighth:
895 op.setRmSize(rmMaxSize / 8u);
896 break;
897 }
898 } while (it.hasNext());
899 }
900
901 return kErrorOk;
902 }
903
904 switch (instRwInfo.category) {
905 case InstDB::RWInfo::kCategoryMov: {
906 // Special case for 'movhpd' instruction. Here there are some variants that
907 // we have to handle as mov can be used to move between GP, segment, control
908 // and debug registers. Moving between GP registers also allow to use memory
909 // operand.
910
911 if (opCount == 2) {
912 if (operands[0].isReg() && operands[1].isReg()) {
913 const Reg& o0 = operands[0].as<Reg>();
914 const Reg& o1 = operands[1].as<Reg>();
915
916 if (o0.isGp() && o1.isGp()) {
917 out._operands[0].reset(W | RegM, operands[0].size());
918 out._operands[1].reset(R | RegM, operands[1].size());
919
920 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
921 return kErrorOk;
922 }
923
924 if (o0.isGp() && o1.isSReg()) {
925 out._operands[0].reset(W | RegM, nativeGpSize);
926 out._operands[0].setRmSize(2);
927 out._operands[1].reset(R, 2);
928 return kErrorOk;
929 }
930
931 if (o0.isSReg() && o1.isGp()) {
932 out._operands[0].reset(W, 2);
933 out._operands[1].reset(R | RegM, 2);
934 out._operands[1].setRmSize(2);
935 return kErrorOk;
936 }
937
938 if (o0.isGp() && (o1.isCReg() || o1.isDReg())) {
939 out._operands[0].reset(W, nativeGpSize);
940 out._operands[1].reset(R, nativeGpSize);
941 out._writeFlags = kOF | kSF | kZF | kAF | kPF | kCF;
942 return kErrorOk;
943 }
944
945 if ((o0.isCReg() || o0.isDReg()) && o1.isGp()) {
946 out._operands[0].reset(W, nativeGpSize);
947 out._operands[1].reset(R, nativeGpSize);
948 out._writeFlags = kOF | kSF | kZF | kAF | kPF | kCF;
949 return kErrorOk;
950 }
951 }
952
953 if (operands[0].isReg() && operands[1].isMem()) {
954 const Reg& o0 = operands[0].as<Reg>();
955 const Mem& o1 = operands[1].as<Mem>();
956
957 if (o0.isGp()) {
958 if (!o1.isOffset64Bit())
959 out._operands[0].reset(W, o0.size());
960 else
961 out._operands[0].reset(W | RegPhys, o0.size(), Gp::kIdAx);
962
963 out._operands[1].reset(R | MibRead, o0.size());
964 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
965 return kErrorOk;
966 }
967
968 if (o0.isSReg()) {
969 out._operands[0].reset(W, 2);
970 out._operands[1].reset(R, 2);
971 return kErrorOk;
972 }
973 }
974
975 if (operands[0].isMem() && operands[1].isReg()) {
976 const Mem& o0 = operands[0].as<Mem>();
977 const Reg& o1 = operands[1].as<Reg>();
978
979 if (o1.isGp()) {
980 out._operands[0].reset(W | MibRead, o1.size());
981 if (!o0.isOffset64Bit())
982 out._operands[1].reset(R, o1.size());
983 else
984 out._operands[1].reset(R | RegPhys, o1.size(), Gp::kIdAx);
985 return kErrorOk;
986 }
987
988 if (o1.isSReg()) {
989 out._operands[0].reset(W | MibRead, 2);
990 out._operands[1].reset(R, 2);
991 return kErrorOk;
992 }
993 }
994
995 if (Reg::isGp(operands[0]) && operands[1].isImm()) {
996 const Reg& o0 = operands[0].as<Reg>();
997 out._operands[0].reset(W | RegM, o0.size());
998 out._operands[1].reset();
999
1000 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1001 return kErrorOk;
1002 }
1003
1004 if (operands[0].isMem() && operands[1].isImm()) {
1005 const Reg& o0 = operands[0].as<Reg>();
1006 out._operands[0].reset(W | MibRead, o0.size());
1007 out._operands[1].reset();
1008 return kErrorOk;
1009 }
1010 }
1011 break;
1012 }
1013
1014 case InstDB::RWInfo::kCategoryImul: {
1015 // Special case for 'imul' instruction.
1016 //
1017 // There are 3 variants in general:
1018 //
1019 // 1. Standard multiplication: 'A = A * B'.
1020 // 2. Multiplication with imm: 'A = B * C'.
1021 // 3. Extended multiplication: 'A:B = B * C'.
1022
1023 if (opCount == 2) {
1024 if (operands[0].isReg() && operands[1].isImm()) {
1025 out._operands[0].reset(X, operands[0].size());
1026 out._operands[1].reset();
1027
1028 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1029 return kErrorOk;
1030 }
1031
1032 if (Reg::isGpw(operands[0]) && operands[1].size() == 1) {
1033 // imul ax, r8/m8 <- AX = AL * r8/m8
1034 out._operands[0].reset(X | RegPhys, 2, Gp::kIdAx);
1035 out._operands[0].setReadByteMask(Support::lsbMask<uint64_t>(1));
1036 out._operands[1].reset(R | RegM, 1);
1037 }
1038 else {
1039 // imul r?, r?/m?
1040 out._operands[0].reset(X, operands[0].size());
1041 out._operands[1].reset(R | RegM, operands[0].size());
1042 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1043 }
1044
1045 if (operands[1].isMem())
1046 out._operands[1].addOpFlags(MibRead);
1047 return kErrorOk;
1048 }
1049
1050 if (opCount == 3) {
1051 if (operands[2].isImm()) {
1052 out._operands[0].reset(W, operands[0].size());
1053 out._operands[1].reset(R | RegM, operands[1].size());
1054 out._operands[2].reset();
1055
1056 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1057 if (operands[1].isMem())
1058 out._operands[1].addOpFlags(MibRead);
1059 return kErrorOk;
1060 }
1061 else {
1062 out._operands[0].reset(W | RegPhys, operands[0].size(), Gp::kIdDx);
1063 out._operands[1].reset(X | RegPhys, operands[1].size(), Gp::kIdAx);
1064 out._operands[2].reset(R | RegM, operands[2].size());
1065
1066 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1067 rwZeroExtendGp(out._operands[1], operands[1].as<Gp>(), nativeGpSize);
1068 if (operands[2].isMem())
1069 out._operands[2].addOpFlags(MibRead);
1070 return kErrorOk;
1071 }
1072 }
1073 break;
1074 }
1075
1076 case InstDB::RWInfo::kCategoryMovh64: {
1077 // Special case for 'movhpd|movhps' instructions. Note that this is only
1078 // required for legacy (non-AVX) variants as AVX instructions use either
1079 // 2 or 3 operands that are use `kCategoryGeneric`.
1080 if (opCount == 2) {
1081 if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
1082 out._operands[0].reset(W, 8);
1083 out._operands[0].setWriteByteMask(Support::lsbMask<uint64_t>(8) << 8);
1084 out._operands[1].reset(R | MibRead, 8);
1085 return kErrorOk;
1086 }
1087
1088 if (operands[0].isMem() && BaseReg::isVec(operands[1])) {
1089 out._operands[0].reset(W | MibRead, 8);
1090 out._operands[1].reset(R, 8);
1091 out._operands[1].setReadByteMask(Support::lsbMask<uint64_t>(8) << 8);
1092 return kErrorOk;
1093 }
1094 }
1095 break;
1096 }
1097
1098 case InstDB::RWInfo::kCategoryVmaskmov: {
1099 // Special case for 'vmaskmovpd|vmaskmovps|vpmaskmovd|vpmaskmovq' instructions.
1100 if (opCount == 3) {
1101 if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1]) && operands[2].isMem()) {
1102 out._operands[0].reset(W, operands[0].size());
1103 out._operands[1].reset(R, operands[1].size());
1104 out._operands[2].reset(R | MibRead, operands[1].size());
1105
1106 rwZeroExtendAvxVec(out._operands[0], operands[0].as<Vec>());
1107 return kErrorOk;
1108 }
1109
1110 if (operands[0].isMem() && BaseReg::isVec(operands[1]) && BaseReg::isVec(operands[2])) {
1111 out._operands[0].reset(X | MibRead, operands[1].size());
1112 out._operands[1].reset(R, operands[1].size());
1113 out._operands[2].reset(R, operands[2].size());
1114 return kErrorOk;
1115 }
1116 }
1117 break;
1118 }
1119
1120 case InstDB::RWInfo::kCategoryVmovddup: {
1121 // Special case for 'vmovddup' instruction. This instruction has an
1122 // interesting semantic as 128-bit XMM version only uses 64-bit memory
1123 // operand (m64), however, 256/512-bit versions use 256/512-bit memory
1124 // operand, respectively.
1125 if (opCount == 2) {
1126 if (BaseReg::isVec(operands[0]) && BaseReg::isVec(operands[1])) {
1127 uint32_t o0Size = operands[0].size();
1128 uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
1129
1130 out._operands[0].reset(W, o0Size);
1131 out._operands[1].reset(R | RegM, o1Size);
1132 out._operands[1]._readByteMask &= 0x00FF00FF00FF00FFu;
1133
1134 rwZeroExtendAvxVec(out._operands[0], operands[0].as<Vec>());
1135 return kErrorOk;
1136 }
1137
1138 if (BaseReg::isVec(operands[0]) && operands[1].isMem()) {
1139 uint32_t o0Size = operands[0].size();
1140 uint32_t o1Size = o0Size == 16 ? 8 : o0Size;
1141
1142 out._operands[0].reset(W, o0Size);
1143 out._operands[1].reset(R | MibRead, o1Size);
1144
1145 rwZeroExtendAvxVec(out._operands[0], operands[0].as<Vec>());
1146 return kErrorOk;
1147 }
1148 }
1149 break;
1150 }
1151
1152 case InstDB::RWInfo::kCategoryVmovmskpd:
1153 case InstDB::RWInfo::kCategoryVmovmskps: {
1154 // Special case for 'vmovmskpd|vmovmskps' instructions.
1155 if (opCount == 2) {
1156 if (BaseReg::isGp(operands[0]) && BaseReg::isVec(operands[1])) {
1157 out._operands[0].reset(W, 1);
1158 out._operands[0].setExtendByteMask(Support::lsbMask<uint32_t>(nativeGpSize - 1) << 1);
1159 out._operands[1].reset(R, operands[1].size());
1160 return kErrorOk;
1161 }
1162 }
1163 break;
1164 }
1165
1166 case InstDB::RWInfo::kCategoryVmov1_2:
1167 case InstDB::RWInfo::kCategoryVmov1_4:
1168 case InstDB::RWInfo::kCategoryVmov1_8: {
1169 // Special case for instructions where the destination is 1:N (narrowing).
1170 //
1171 // Vmov1_2:
1172 // vcvtpd2dq|vcvttpd2dq
1173 // vcvtpd2udq|vcvttpd2udq
1174 // vcvtpd2ps|vcvtps2ph
1175 // vcvtqq2ps|vcvtuqq2ps
1176 // vpmovwb|vpmovswb|vpmovuswb
1177 // vpmovdw|vpmovsdw|vpmovusdw
1178 // vpmovqd|vpmovsqd|vpmovusqd
1179 //
1180 // Vmov1_4:
1181 // vpmovdb|vpmovsdb|vpmovusdb
1182 // vpmovqw|vpmovsqw|vpmovusqw
1183 //
1184 // Vmov1_8:
1185 // pmovmskb|vpmovmskb
1186 // vpmovqb|vpmovsqb|vpmovusqb
1187 uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov1_2 + 1;
1188
1189 if (opCount >= 2) {
1190 if (opCount >= 3) {
1191 if (opCount > 3)
1192 return DebugUtils::errored(kErrorInvalidInstruction);
1193 out._operands[2].reset();
1194 }
1195
1196 if (operands[0].isReg() && operands[1].isReg()) {
1197 uint32_t size1 = operands[1].size();
1198 uint32_t size0 = size1 >> shift;
1199
1200 out._operands[0].reset(W, size0);
1201 out._operands[1].reset(R, size1);
1202
1203 if (instRmInfo.rmOpsMask & 0x1) {
1204 out._operands[0].addOpFlags(RegM);
1205 out._operands[0].setRmSize(size0);
1206 }
1207
1208 if (instRmInfo.rmOpsMask & 0x2) {
1209 out._operands[1].addOpFlags(RegM);
1210 out._operands[1].setRmSize(size1);
1211 }
1212
1213 // Handle 'pmovmskb|vpmovmskb'.
1214 if (BaseReg::isGp(operands[0]))
1215 rwZeroExtendGp(out._operands[0], operands[0].as<Gp>(), nativeGpSize);
1216
1217 if (BaseReg::isVec(operands[0]))
1218 rwZeroExtendAvxVec(out._operands[0], operands[0].as<Vec>());
1219
1220 return kErrorOk;
1221 }
1222
1223 if (operands[0].isReg() && operands[1].isMem()) {
1224 uint32_t size1 = operands[1].size() ? operands[1].size() : uint32_t(16);
1225 uint32_t size0 = size1 >> shift;
1226
1227 out._operands[0].reset(W, size0);
1228 out._operands[1].reset(R | MibRead, size1);
1229 return kErrorOk;
1230 }
1231
1232 if (operands[0].isMem() && operands[1].isReg()) {
1233 uint32_t size1 = operands[1].size();
1234 uint32_t size0 = size1 >> shift;
1235
1236 out._operands[0].reset(W | MibRead, size0);
1237 out._operands[1].reset(R, size1);
1238 return kErrorOk;
1239 }
1240 }
1241 break;
1242 }
1243
1244 case InstDB::RWInfo::kCategoryVmov2_1:
1245 case InstDB::RWInfo::kCategoryVmov4_1:
1246 case InstDB::RWInfo::kCategoryVmov8_1: {
1247 // Special case for instructions where the destination is N:1 (widening).
1248 //
1249 // Vmov2_1:
1250 // vcvtdq2pd|vcvtudq2pd
1251 // vcvtps2pd|vcvtph2ps
1252 // vcvtps2qq|vcvtps2uqq
1253 // vcvttps2qq|vcvttps2uqq
1254 // vpmovsxbw|vpmovzxbw
1255 // vpmovsxwd|vpmovzxwd
1256 // vpmovsxdq|vpmovzxdq
1257 //
1258 // Vmov4_1:
1259 // vpmovsxbd|vpmovzxbd
1260 // vpmovsxwq|vpmovzxwq
1261 //
1262 // Vmov8_1:
1263 // vpmovsxbq|vpmovzxbq
1264 uint32_t shift = instRwInfo.category - InstDB::RWInfo::kCategoryVmov2_1 + 1;
1265
1266 if (opCount >= 2) {
1267 if (opCount >= 3) {
1268 if (opCount > 3)
1269 return DebugUtils::errored(kErrorInvalidInstruction);
1270 out._operands[2].reset();
1271 }
1272
1273 uint32_t size0 = operands[0].size();
1274 uint32_t size1 = size0 >> shift;
1275
1276 out._operands[0].reset(W, size0);
1277 out._operands[1].reset(R, size1);
1278
1279 if (operands[0].isReg() && operands[1].isReg()) {
1280 if (instRmInfo.rmOpsMask & 0x1) {
1281 out._operands[0].addOpFlags(RegM);
1282 out._operands[0].setRmSize(size0);
1283 }
1284
1285 if (instRmInfo.rmOpsMask & 0x2) {
1286 out._operands[1].addOpFlags(RegM);
1287 out._operands[1].setRmSize(size1);
1288 }
1289 return kErrorOk;
1290 }
1291
1292 if (operands[0].isReg() && operands[1].isMem()) {
1293 out._operands[1].addOpFlags(MibRead);
1294 return kErrorOk;
1295 }
1296 }
1297 break;
1298 }
1299 }
1300
1301 return DebugUtils::errored(kErrorInvalidInstruction);
1302 }
1303 #endif // !ASMJIT_NO_INTROSPECTION
1304
1305 // ============================================================================
1306 // [asmjit::x86::InstInternal - QueryFeatures]
1307 // ============================================================================
1308
1309 #ifndef ASMJIT_NO_INTROSPECTION
1310 struct RegAnalysis {
1311 uint32_t regTypeMask;
1312 uint32_t highVecUsed;
1313
1314 inline bool hasRegType(uint32_t regType) const noexcept {
1315 return Support::bitTest(regTypeMask, regType);
1316 }
1317 };
1318
1319 static RegAnalysis InstInternal_regAnalysis(const Operand_* operands, uint32_t opCount) noexcept {
1320 uint32_t mask = 0;
1321 uint32_t highVecUsed = 0;
1322
1323 for (uint32_t i = 0; i < opCount; i++) {
1324 const Operand_& op = operands[i];
1325 if (op.isReg()) {
1326 const BaseReg& reg = op.as<BaseReg>();
1327 mask |= Support::bitMask(reg.type());
1328 if (reg.isVec())
1329 highVecUsed |= uint32_t(reg.id() >= 16 && reg.id() < 32);
1330 }
1331 else if (op.isMem()) {
1332 const BaseMem& mem = op.as<BaseMem>();
1333 if (mem.hasBaseReg()) mask |= Support::bitMask(mem.baseType());
1334 if (mem.hasIndexReg()) {
1335 mask |= Support::bitMask(mem.indexType());
1336 highVecUsed |= uint32_t(mem.indexId() >= 16 && mem.indexId() < 32);
1337 }
1338 }
1339 }
1340
1341 return RegAnalysis { mask, highVecUsed };
1342 }
1343
1344 Error InstInternal::queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept {
1345 // Only called when `archId` matches X86 family.
1346 ASMJIT_UNUSED(archId);
1347 ASMJIT_ASSERT(ArchInfo::isX86Family(archId));
1348
1349 // Get the instruction data.
1350 uint32_t instId = inst.id();
1351 uint32_t options = inst.options();
1352
1353 if (ASMJIT_UNLIKELY(!Inst::isDefinedId(instId)))
1354 return DebugUtils::errored(kErrorInvalidInstruction);
1355
1356 const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
1357 const InstDB::CommonInfoTableB& tableB = InstDB::_commonInfoTableB[instInfo._commonInfoIndexB];
1358
1359 const uint8_t* fData = tableB.featuresBegin();
1360 const uint8_t* fEnd = tableB.featuresEnd();
1361
1362 // Copy all features to `out`.
1363 out.reset();
1364 do {
1365 uint32_t feature = fData[0];
1366 if (!feature)
1367 break;
1368 out.add(feature);
1369 } while (++fData != fEnd);
1370
1371 // Since AsmJit aggregates instructions that share the same name we have to
1372 // deal with some special cases and also with MMX/SSE and AVX/AVX2 overlaps.
1373 if (fData != tableB.featuresBegin()) {
1374 RegAnalysis regAnalysis = InstInternal_regAnalysis(operands, opCount);
1375
1376 // Handle MMX vs SSE overlap.
1377 if (out.has(Features::kMMX) || out.has(Features::kMMX2)) {
1378 // Only instructions defined by SSE and SSE2 overlap. Instructions
1379 // introduced by newer instruction sets like SSE3+ don't state MMX as
1380 // they require SSE3+.
1381 if (out.has(Features::kSSE) || out.has(Features::kSSE2)) {
1382 if (!regAnalysis.hasRegType(Reg::kTypeXmm)) {
1383 // The instruction doesn't use XMM register(s), thus it's MMX/MMX2 only.
1384 out.remove(Features::kSSE);
1385 out.remove(Features::kSSE2);
1386 }
1387 else {
1388 out.remove(Features::kMMX);
1389 out.remove(Features::kMMX2);
1390 }
1391
1392 // Special case: PEXTRW instruction is MMX/SSE2 instruction. However,
1393 // MMX/SSE version cannot access memory (only register to register
1394 // extract) so when SSE4.1 introduced the whole family of PEXTR/PINSR
1395 // instructions they also introduced PEXTRW with a new opcode 0x15 that
1396 // can extract directly to memory. This instruction is, of course, not
1397 // compatible with MMX/SSE2 and would #UD if SSE4.1 is not supported.
1398 if (instId == Inst::kIdPextrw) {
1399 ASMJIT_ASSERT(out.has(Features::kSSE2));
1400 ASMJIT_ASSERT(out.has(Features::kSSE4_1));
1401
1402 if (opCount >= 1 && operands[0].isMem())
1403 out.remove(Features::kSSE2);
1404 else
1405 out.remove(Features::kSSE4_1);
1406 }
1407 }
1408 }
1409
1410 // Handle PCLMULQDQ vs VPCLMULQDQ.
1411 if (out.has(Features::kVPCLMULQDQ)) {
1412 if (regAnalysis.hasRegType(Reg::kTypeZmm) || Support::bitTest(options, Inst::kOptionEvex)) {
1413 // AVX512_F & VPCLMULQDQ.
1414 out.remove(Features::kAVX, Features::kPCLMULQDQ);
1415 }
1416 else if (regAnalysis.hasRegType(Reg::kTypeYmm)) {
1417 out.remove(Features::kAVX512_F, Features::kAVX512_VL);
1418 }
1419 else {
1420 // AVX & PCLMULQDQ.
1421 out.remove(Features::kAVX512_F, Features::kAVX512_VL, Features::kVPCLMULQDQ);
1422 }
1423 }
1424
1425 // Handle AVX vs AVX2 overlap.
1426 if (out.has(Features::kAVX) && out.has(Features::kAVX2)) {
1427 bool isAVX2 = true;
1428 // Special case: VBROADCASTSS and VBROADCASTSD were introduced in AVX, but
1429 // only version that uses memory as a source operand. AVX2 then added support
1430 // for register source operand.
1431 if (instId == Inst::kIdVbroadcastss || instId == Inst::kIdVbroadcastsd) {
1432 if (opCount > 1 && operands[1].isMem())
1433 isAVX2 = false;
1434 }
1435 else {
1436 // AVX instruction set doesn't support integer operations on YMM registers
1437 // as these were later introcuced by AVX2. In our case we have to check if
1438 // YMM register(s) are in use and if that is the case this is an AVX2 instruction.
1439 if (!(regAnalysis.regTypeMask & Support::bitMask(Reg::kTypeYmm, Reg::kTypeZmm)))
1440 isAVX2 = false;
1441 }
1442
1443 if (isAVX2)
1444 out.remove(Features::kAVX);
1445 else
1446 out.remove(Features::kAVX2);
1447 }
1448
1449 // Handle AVX|AVX2|FMA|F16C vs AVX512 overlap.
1450 if (out.has(Features::kAVX) || out.has(Features::kAVX2) || out.has(Features::kFMA) || out.has(Features::kF16C)) {
1451 // Only AVX512-F|BW|DQ allow to encode AVX/AVX2/FMA/F16C instructions
1452 if (out.has(Features::kAVX512_F) || out.has(Features::kAVX512_BW) || out.has(Features::kAVX512_DQ)) {
1453 uint32_t hasEvex = options & (Inst::kOptionEvex | Inst::_kOptionAvx512Mask);
1454 uint32_t hasKMask = inst.extraReg().type() == Reg::kTypeKReg;
1455 uint32_t hasKOrZmm = regAnalysis.regTypeMask & Support::bitMask(Reg::kTypeZmm, Reg::kTypeKReg);
1456
1457 uint32_t mustUseEvex = 0;
1458
1459 switch (instId) {
1460 // Special case: VPSLLDQ and VPSRLDQ instructions only allow `reg, reg. imm`
1461 // combination in AVX|AVX2 mode, then AVX-512 introduced `reg, reg/mem, imm`
1462 // combination that uses EVEX prefix. This means that if the second operand
1463 // is memory then this is AVX-512_BW instruction and not AVX/AVX2 instruction.
1464 case Inst::kIdVpslldq:
1465 case Inst::kIdVpsrldq:
1466 mustUseEvex = opCount >= 2 && operands[1].isMem();
1467 break;
1468
1469 // Special case: VPBROADCAST[B|D|Q|W] only supports r32/r64 with EVEX prefix.
1470 case Inst::kIdVpbroadcastb:
1471 case Inst::kIdVpbroadcastd:
1472 case Inst::kIdVpbroadcastq:
1473 case Inst::kIdVpbroadcastw:
1474 mustUseEvex = opCount >= 2 && x86::Reg::isGp(operands[1]);
1475 break;
1476
1477 // Special case: VPERMPD only supports YMM predicate in AVX mode, immediate
1478 // precicate is only supported by AVX512-F and newer.
1479 case Inst::kIdVpermpd:
1480 mustUseEvex = opCount >= 3 && !operands[2].isImm();
1481 break;
1482 }
1483
1484 if (!(hasEvex | mustUseEvex | hasKMask | hasKOrZmm | regAnalysis.highVecUsed))
1485 out.remove(Features::kAVX512_F, Features::kAVX512_BW, Features::kAVX512_DQ, Features::kAVX512_VL);
1486 else
1487 out.remove(Features::kAVX, Features::kAVX2, Features::kFMA, Features::kF16C);
1488 }
1489 }
1490
1491 // Clear AVX512_VL if ZMM register is used.
1492 if (regAnalysis.hasRegType(Reg::kTypeZmm))
1493 out.remove(Features::kAVX512_VL);
1494 }
1495
1496 return kErrorOk;
1497 }
1498 #endif // !ASMJIT_NO_INTROSPECTION
1499
1500 // ============================================================================
1501 // [asmjit::x86::InstInternal - Unit]
1502 // ============================================================================
1503
1504 #if defined(ASMJIT_TEST)
1505 UNIT(x86_inst_api_text) {
1506 // All known instructions should be matched.
1507 INFO("Matching all X86 instructions");
1508 for (uint32_t a = 1; a < Inst::_kIdCount; a++) {
1509 StringTmp<128> aName;
1510 EXPECT(InstInternal::instIdToString(0, a, aName) == kErrorOk,
1511 "Failed to get the name of instruction #%u", a);
1512
1513 uint32_t b = InstInternal::stringToInstId(0, aName.data(), aName.size());
1514 StringTmp<128> bName;
1515 InstInternal::instIdToString(0, b, bName);
1516
1517 EXPECT(a == b,
1518 "Instructions do not match \"%s\" (#%u) != \"%s\" (#%u)", aName.data(), a, bName.data(), b);
1519 }
1520 }
1521 #endif
1522
1523 ASMJIT_END_SUB_NAMESPACE
1524
1525 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86INSTAPI_P_H
7 #define _ASMJIT_X86_X86INSTAPI_P_H
8
9 #include "../core/inst.h"
10 #include "../core/operand.h"
11
12 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
13
14 //! \cond INTERNAL
15 //! \addtogroup asmjit_x86
16 //! \{
17
18 namespace InstInternal {
19
20 #ifndef ASMJIT_NO_TEXT
21 Error instIdToString(uint32_t archId, uint32_t instId, String& output) noexcept;
22 uint32_t stringToInstId(uint32_t archId, const char* s, size_t len) noexcept;
23 #endif // !ASMJIT_NO_TEXT
24
25 #ifndef ASMJIT_NO_VALIDATION
26 Error validate(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept;
27 #endif // !ASMJIT_NO_VALIDATION
28
29 #ifndef ASMJIT_NO_INTROSPECTION
30 Error queryRWInfo(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, InstRWInfo& out) noexcept;
31 Error queryFeatures(uint32_t archId, const BaseInst& inst, const Operand_* operands, uint32_t opCount, BaseFeatures& out) noexcept;
32 #endif // !ASMJIT_NO_INTROSPECTION
33
34 } // {InstInternal}
35
36 //! \}
37 //! \endcond
38
39 ASMJIT_END_SUB_NAMESPACE
40
41 #endif // _ASMJIT_X86_X86INSTAPI_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 // ----------------------------------------------------------------------------
7 // IMPORTANT: AsmJit now uses an external instruction database to populate
8 // static tables within this file. Perform the following steps to regenerate
9 // all tables enclosed by ${...}:
10 //
11 // 1. Install node.js environment <https://nodejs.org>
12 // 2. Go to asmjit/tools directory
13 // 3. Get the latest asmdb from <https://github.com/asmjit/asmdb> and
14 // copy/link the `asmdb` directory to `asmjit/tools/asmdb`.
15 // 4. Execute `node tablegen-x86.js`
16 //
17 // Instruction encoding and opcodes were added to the `x86inst.cpp` database
18 // manually in the past and they are not updated by the script as it became
19 // tricky. However, everything else is updated including instruction operands
20 // and tables required to validate them, instruction read/write information
21 // (including registers and flags), and all indexes to all tables.
22 // ----------------------------------------------------------------------------
23
24 #include "../core/api-build_p.h"
25 #ifdef ASMJIT_BUILD_X86
26
27 #include "../core/cpuinfo.h"
28 #include "../core/misc_p.h"
29 #include "../core/support.h"
30 #include "../x86/x86features.h"
31 #include "../x86/x86instdb_p.h"
32 #include "../x86/x86opcode_p.h"
33 #include "../x86/x86operand.h"
34
35 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
36
37 // ============================================================================
38 // [asmjit::x86::InstDB - InstInfo]
39 // ============================================================================
40
41 // Instruction opcode definitions:
42 // - `O` encodes X86|MMX|SSE instructions.
43 // - `V` encodes VEX|XOP|EVEX instructions.
44 // - `E` encodes EVEX instructions only.
45 #define O_ENCODE(VEX, PREFIX, OPCODE, O, L, W, EvexW, N, TT) \
46 ((PREFIX) | (OPCODE) | (O) | (L) | (W) | (EvexW) | (N) | (TT) | \
47 (VEX && ((PREFIX) & Opcode::kMM_Mask) != Opcode::kMM_0F ? int(Opcode::kMM_ForceVex3) : 0))
48
49 #define O(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(0, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT))
50 #define V(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT))
51 #define E(PREFIX, OPCODE, O, LL, W, EvexW, N, TT) (O_ENCODE(1, Opcode::k##PREFIX, 0x##OPCODE, Opcode::kO_##O, Opcode::kLL_##LL, Opcode::kW_##W, Opcode::kEvex_W_##EvexW, Opcode::kCDSHL_##N, Opcode::kCDTT_##TT) | Opcode::kMM_ForceEvex)
52 #define O_FPU(PREFIX, OPCODE, O) (Opcode::kFPU_##PREFIX | (0x##OPCODE & 0xFFu) | ((0x##OPCODE >> 8) << Opcode::kFPU_2B_Shift) | Opcode::kO_##O)
53
54 // Don't store `_nameDataIndex` if instruction names are disabled. Since some
55 // APIs can use `_nameDataIndex` it's much safer if it's zero if it's not defined.
56 #ifndef ASMJIT_NO_TEXT
57 #define NAME_DATA_INDEX(X) X
58 #else
59 #define NAME_DATA_INDEX(X) 0
60 #endif
61
62 // Defines an X86 instruction.
63 #define INST(id, encoding, opcode0, opcode1, mainOpcodeIndex, altOpcodeIndex, nameDataIndex, commomInfoIndexA, commomInfoIndexB) { \
64 uint32_t(NAME_DATA_INDEX(nameDataIndex)), \
65 uint32_t(commomInfoIndexA), \
66 uint32_t(commomInfoIndexB), \
67 uint8_t(InstDB::kEncoding##encoding), \
68 uint8_t((opcode0) & 0xFFu), \
69 uint8_t(mainOpcodeIndex), \
70 uint8_t(altOpcodeIndex) \
71 }
72
73 const InstDB::InstInfo InstDB::_instInfoTable[] = {
74 /*--------------------+--------------------+------------------+--------+------------------+--------+----+----+------+----+----+
75 | Instruction | Instruction | Main Opcode | EVEX |Alternative Opcode| EVEX |Op0X|Op1X|Name-X|IdxA|IdxB|
76 | Id & Name | Encoding | (pp+mmm|op/o|L|w|W|N|TT.)|--(pp+mmm|op/o|L|w|W|N|TT.)| (auto-generated) |
77 +---------------------+--------------------+---------+----+-+-+-+-+----+---------+----+-+-+-+-+----+----+----+------+----+---*/
78 // ${InstInfo:Begin}
79 INST(None , None , 0 , 0 , 0 , 0 , 0 , 0 , 0 ), // #0
80 INST(Aaa , X86Op_xAX , O(000000,37,_,_,_,_,_,_ ), 0 , 0 , 0 , 1 , 1 , 1 ), // #1
81 INST(Aad , X86I_xAX , O(000000,D5,_,_,_,_,_,_ ), 0 , 0 , 0 , 5 , 2 , 1 ), // #2
82 INST(Aam , X86I_xAX , O(000000,D4,_,_,_,_,_,_ ), 0 , 0 , 0 , 9 , 2 , 1 ), // #3
83 INST(Aas , X86Op_xAX , O(000000,3F,_,_,_,_,_,_ ), 0 , 0 , 0 , 13 , 1 , 1 ), // #4
84 INST(Adc , X86Arith , O(000000,10,2,_,x,_,_,_ ), 0 , 1 , 0 , 17 , 3 , 2 ), // #5
85 INST(Adcx , X86Rm , O(660F38,F6,_,_,x,_,_,_ ), 0 , 2 , 0 , 21 , 4 , 3 ), // #6
86 INST(Add , X86Arith , O(000000,00,0,_,x,_,_,_ ), 0 , 0 , 0 , 761 , 3 , 1 ), // #7
87 INST(Addpd , ExtRm , O(660F00,58,_,_,_,_,_,_ ), 0 , 3 , 0 , 4814 , 5 , 4 ), // #8
88 INST(Addps , ExtRm , O(000F00,58,_,_,_,_,_,_ ), 0 , 4 , 0 , 4826 , 5 , 5 ), // #9
89 INST(Addsd , ExtRm , O(F20F00,58,_,_,_,_,_,_ ), 0 , 5 , 0 , 5048 , 6 , 4 ), // #10
90 INST(Addss , ExtRm , O(F30F00,58,_,_,_,_,_,_ ), 0 , 6 , 0 , 2955 , 7 , 5 ), // #11
91 INST(Addsubpd , ExtRm , O(660F00,D0,_,_,_,_,_,_ ), 0 , 3 , 0 , 4553 , 5 , 6 ), // #12
92 INST(Addsubps , ExtRm , O(F20F00,D0,_,_,_,_,_,_ ), 0 , 5 , 0 , 4565 , 5 , 6 ), // #13
93 INST(Adox , X86Rm , O(F30F38,F6,_,_,x,_,_,_ ), 0 , 7 , 0 , 26 , 4 , 7 ), // #14
94 INST(Aesdec , ExtRm , O(660F38,DE,_,_,_,_,_,_ ), 0 , 2 , 0 , 3010 , 5 , 8 ), // #15
95 INST(Aesdeclast , ExtRm , O(660F38,DF,_,_,_,_,_,_ ), 0 , 2 , 0 , 3018 , 5 , 8 ), // #16
96 INST(Aesenc , ExtRm , O(660F38,DC,_,_,_,_,_,_ ), 0 , 2 , 0 , 3030 , 5 , 8 ), // #17
97 INST(Aesenclast , ExtRm , O(660F38,DD,_,_,_,_,_,_ ), 0 , 2 , 0 , 3038 , 5 , 8 ), // #18
98 INST(Aesimc , ExtRm , O(660F38,DB,_,_,_,_,_,_ ), 0 , 2 , 0 , 3050 , 5 , 8 ), // #19
99 INST(Aeskeygenassist , ExtRmi , O(660F3A,DF,_,_,_,_,_,_ ), 0 , 8 , 0 , 3058 , 8 , 8 ), // #20
100 INST(And , X86Arith , O(000000,20,4,_,x,_,_,_ ), 0 , 9 , 0 , 2433 , 9 , 1 ), // #21
101 INST(Andn , VexRvm_Wx , V(000F38,F2,_,0,x,_,_,_ ), 0 , 10 , 0 , 6494 , 10 , 9 ), // #22
102 INST(Andnpd , ExtRm , O(660F00,55,_,_,_,_,_,_ ), 0 , 3 , 0 , 3091 , 5 , 4 ), // #23
103 INST(Andnps , ExtRm , O(000F00,55,_,_,_,_,_,_ ), 0 , 4 , 0 , 3099 , 5 , 5 ), // #24
104 INST(Andpd , ExtRm , O(660F00,54,_,_,_,_,_,_ ), 0 , 3 , 0 , 4067 , 11 , 4 ), // #25
105 INST(Andps , ExtRm , O(000F00,54,_,_,_,_,_,_ ), 0 , 4 , 0 , 4077 , 11 , 5 ), // #26
106 INST(Arpl , X86Mr_NoSize , O(000000,63,_,_,_,_,_,_ ), 0 , 0 , 0 , 31 , 12 , 10 ), // #27
107 INST(Bextr , VexRmv_Wx , V(000F38,F7,_,0,x,_,_,_ ), 0 , 10 , 0 , 36 , 13 , 9 ), // #28
108 INST(Blcfill , VexVm_Wx , V(XOP_M9,01,1,0,x,_,_,_ ), 0 , 11 , 0 , 42 , 14 , 11 ), // #29
109 INST(Blci , VexVm_Wx , V(XOP_M9,02,6,0,x,_,_,_ ), 0 , 12 , 0 , 50 , 14 , 11 ), // #30
110 INST(Blcic , VexVm_Wx , V(XOP_M9,01,5,0,x,_,_,_ ), 0 , 13 , 0 , 55 , 14 , 11 ), // #31
111 INST(Blcmsk , VexVm_Wx , V(XOP_M9,02,1,0,x,_,_,_ ), 0 , 11 , 0 , 61 , 14 , 11 ), // #32
112 INST(Blcs , VexVm_Wx , V(XOP_M9,01,3,0,x,_,_,_ ), 0 , 14 , 0 , 68 , 14 , 11 ), // #33
113 INST(Blendpd , ExtRmi , O(660F3A,0D,_,_,_,_,_,_ ), 0 , 8 , 0 , 3177 , 8 , 12 ), // #34
114 INST(Blendps , ExtRmi , O(660F3A,0C,_,_,_,_,_,_ ), 0 , 8 , 0 , 3186 , 8 , 12 ), // #35
115 INST(Blendvpd , ExtRm_XMM0 , O(660F38,15,_,_,_,_,_,_ ), 0 , 2 , 0 , 3195 , 15 , 12 ), // #36
116 INST(Blendvps , ExtRm_XMM0 , O(660F38,14,_,_,_,_,_,_ ), 0 , 2 , 0 , 3205 , 15 , 12 ), // #37
117 INST(Blsfill , VexVm_Wx , V(XOP_M9,01,2,0,x,_,_,_ ), 0 , 15 , 0 , 73 , 14 , 11 ), // #38
118 INST(Blsi , VexVm_Wx , V(000F38,F3,3,0,x,_,_,_ ), 0 , 16 , 0 , 81 , 14 , 9 ), // #39
119 INST(Blsic , VexVm_Wx , V(XOP_M9,01,6,0,x,_,_,_ ), 0 , 12 , 0 , 86 , 14 , 11 ), // #40
120 INST(Blsmsk , VexVm_Wx , V(000F38,F3,2,0,x,_,_,_ ), 0 , 17 , 0 , 92 , 14 , 9 ), // #41
121 INST(Blsr , VexVm_Wx , V(000F38,F3,1,0,x,_,_,_ ), 0 , 18 , 0 , 99 , 14 , 9 ), // #42
122 INST(Bndcl , X86Rm , O(F30F00,1A,_,_,_,_,_,_ ), 0 , 6 , 0 , 104 , 16 , 13 ), // #43
123 INST(Bndcn , X86Rm , O(F20F00,1B,_,_,_,_,_,_ ), 0 , 5 , 0 , 110 , 16 , 13 ), // #44
124 INST(Bndcu , X86Rm , O(F20F00,1A,_,_,_,_,_,_ ), 0 , 5 , 0 , 116 , 16 , 13 ), // #45
125 INST(Bndldx , X86Rm , O(000F00,1A,_,_,_,_,_,_ ), 0 , 4 , 0 , 122 , 17 , 13 ), // #46
126 INST(Bndmk , X86Rm , O(F30F00,1B,_,_,_,_,_,_ ), 0 , 6 , 0 , 129 , 18 , 13 ), // #47
127 INST(Bndmov , X86Bndmov , O(660F00,1A,_,_,_,_,_,_ ), O(660F00,1B,_,_,_,_,_,_ ), 3 , 1 , 135 , 19 , 13 ), // #48
128 INST(Bndstx , X86Mr , O(000F00,1B,_,_,_,_,_,_ ), 0 , 4 , 0 , 142 , 20 , 13 ), // #49
129 INST(Bound , X86Rm , O(000000,62,_,_,_,_,_,_ ), 0 , 0 , 0 , 149 , 21 , 0 ), // #50
130 INST(Bsf , X86Rm , O(000F00,BC,_,_,x,_,_,_ ), 0 , 4 , 0 , 155 , 22 , 1 ), // #51
131 INST(Bsr , X86Rm , O(000F00,BD,_,_,x,_,_,_ ), 0 , 4 , 0 , 159 , 22 , 1 ), // #52
132 INST(Bswap , X86Bswap , O(000F00,C8,_,_,x,_,_,_ ), 0 , 4 , 0 , 163 , 23 , 0 ), // #53
133 INST(Bt , X86Bt , O(000F00,A3,_,_,x,_,_,_ ), O(000F00,BA,4,_,x,_,_,_ ), 4 , 2 , 169 , 24 , 14 ), // #54
134 INST(Btc , X86Bt , O(000F00,BB,_,_,x,_,_,_ ), O(000F00,BA,7,_,x,_,_,_ ), 4 , 3 , 172 , 25 , 14 ), // #55
135 INST(Btr , X86Bt , O(000F00,B3,_,_,x,_,_,_ ), O(000F00,BA,6,_,x,_,_,_ ), 4 , 4 , 176 , 25 , 14 ), // #56
136 INST(Bts , X86Bt , O(000F00,AB,_,_,x,_,_,_ ), O(000F00,BA,5,_,x,_,_,_ ), 4 , 5 , 180 , 25 , 14 ), // #57
137 INST(Bzhi , VexRmv_Wx , V(000F38,F5,_,0,x,_,_,_ ), 0 , 10 , 0 , 184 , 13 , 15 ), // #58
138 INST(Call , X86Call , O(000000,FF,2,_,_,_,_,_ ), 0 , 1 , 0 , 2848 , 26 , 1 ), // #59
139 INST(Cbw , X86Op_xAX , O(660000,98,_,_,_,_,_,_ ), 0 , 19 , 0 , 189 , 27 , 0 ), // #60
140 INST(Cdq , X86Op_xDX_xAX , O(000000,99,_,_,_,_,_,_ ), 0 , 0 , 0 , 193 , 28 , 0 ), // #61
141 INST(Cdqe , X86Op_xAX , O(000000,98,_,_,1,_,_,_ ), 0 , 20 , 0 , 197 , 29 , 0 ), // #62
142 INST(Clac , X86Op , O(000F01,CA,_,_,_,_,_,_ ), 0 , 21 , 0 , 202 , 30 , 16 ), // #63
143 INST(Clc , X86Op , O(000000,F8,_,_,_,_,_,_ ), 0 , 0 , 0 , 207 , 30 , 17 ), // #64
144 INST(Cld , X86Op , O(000000,FC,_,_,_,_,_,_ ), 0 , 0 , 0 , 211 , 30 , 18 ), // #65
145 INST(Cldemote , X86M_Only , O(000F00,1C,0,_,_,_,_,_ ), 0 , 4 , 0 , 215 , 31 , 19 ), // #66
146 INST(Clflush , X86M_Only , O(000F00,AE,7,_,_,_,_,_ ), 0 , 22 , 0 , 224 , 31 , 20 ), // #67
147 INST(Clflushopt , X86M_Only , O(660F00,AE,7,_,_,_,_,_ ), 0 , 23 , 0 , 232 , 31 , 21 ), // #68
148 INST(Clgi , X86Op , O(000F01,DD,_,_,_,_,_,_ ), 0 , 21 , 0 , 243 , 30 , 22 ), // #69
149 INST(Cli , X86Op , O(000000,FA,_,_,_,_,_,_ ), 0 , 0 , 0 , 248 , 30 , 23 ), // #70
150 INST(Clts , X86Op , O(000F00,06,_,_,_,_,_,_ ), 0 , 4 , 0 , 252 , 30 , 0 ), // #71
151 INST(Clwb , X86M_Only , O(660F00,AE,6,_,_,_,_,_ ), 0 , 24 , 0 , 257 , 31 , 24 ), // #72
152 INST(Clzero , X86Op_MemZAX , O(000F01,FC,_,_,_,_,_,_ ), 0 , 21 , 0 , 262 , 32 , 25 ), // #73
153 INST(Cmc , X86Op , O(000000,F5,_,_,_,_,_,_ ), 0 , 0 , 0 , 269 , 30 , 26 ), // #74
154 INST(Cmova , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 273 , 22 , 27 ), // #75
155 INST(Cmovae , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 279 , 22 , 28 ), // #76
156 INST(Cmovb , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 618 , 22 , 28 ), // #77
157 INST(Cmovbe , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 625 , 22 , 27 ), // #78
158 INST(Cmovc , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 286 , 22 , 28 ), // #79
159 INST(Cmove , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 633 , 22 , 29 ), // #80
160 INST(Cmovg , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 292 , 22 , 30 ), // #81
161 INST(Cmovge , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 298 , 22 , 31 ), // #82
162 INST(Cmovl , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 305 , 22 , 31 ), // #83
163 INST(Cmovle , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 311 , 22 , 30 ), // #84
164 INST(Cmovna , X86Rm , O(000F00,46,_,_,x,_,_,_ ), 0 , 4 , 0 , 318 , 22 , 27 ), // #85
165 INST(Cmovnae , X86Rm , O(000F00,42,_,_,x,_,_,_ ), 0 , 4 , 0 , 325 , 22 , 28 ), // #86
166 INST(Cmovnb , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 640 , 22 , 28 ), // #87
167 INST(Cmovnbe , X86Rm , O(000F00,47,_,_,x,_,_,_ ), 0 , 4 , 0 , 648 , 22 , 27 ), // #88
168 INST(Cmovnc , X86Rm , O(000F00,43,_,_,x,_,_,_ ), 0 , 4 , 0 , 333 , 22 , 28 ), // #89
169 INST(Cmovne , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 657 , 22 , 29 ), // #90
170 INST(Cmovng , X86Rm , O(000F00,4E,_,_,x,_,_,_ ), 0 , 4 , 0 , 340 , 22 , 30 ), // #91
171 INST(Cmovnge , X86Rm , O(000F00,4C,_,_,x,_,_,_ ), 0 , 4 , 0 , 347 , 22 , 31 ), // #92
172 INST(Cmovnl , X86Rm , O(000F00,4D,_,_,x,_,_,_ ), 0 , 4 , 0 , 355 , 22 , 31 ), // #93
173 INST(Cmovnle , X86Rm , O(000F00,4F,_,_,x,_,_,_ ), 0 , 4 , 0 , 362 , 22 , 30 ), // #94
174 INST(Cmovno , X86Rm , O(000F00,41,_,_,x,_,_,_ ), 0 , 4 , 0 , 370 , 22 , 32 ), // #95
175 INST(Cmovnp , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 377 , 22 , 33 ), // #96
176 INST(Cmovns , X86Rm , O(000F00,49,_,_,x,_,_,_ ), 0 , 4 , 0 , 384 , 22 , 34 ), // #97
177 INST(Cmovnz , X86Rm , O(000F00,45,_,_,x,_,_,_ ), 0 , 4 , 0 , 391 , 22 , 29 ), // #98
178 INST(Cmovo , X86Rm , O(000F00,40,_,_,x,_,_,_ ), 0 , 4 , 0 , 398 , 22 , 32 ), // #99
179 INST(Cmovp , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 404 , 22 , 33 ), // #100
180 INST(Cmovpe , X86Rm , O(000F00,4A,_,_,x,_,_,_ ), 0 , 4 , 0 , 410 , 22 , 33 ), // #101
181 INST(Cmovpo , X86Rm , O(000F00,4B,_,_,x,_,_,_ ), 0 , 4 , 0 , 417 , 22 , 33 ), // #102
182 INST(Cmovs , X86Rm , O(000F00,48,_,_,x,_,_,_ ), 0 , 4 , 0 , 424 , 22 , 34 ), // #103
183 INST(Cmovz , X86Rm , O(000F00,44,_,_,x,_,_,_ ), 0 , 4 , 0 , 430 , 22 , 29 ), // #104
184 INST(Cmp , X86Arith , O(000000,38,7,_,x,_,_,_ ), 0 , 25 , 0 , 436 , 33 , 1 ), // #105
185 INST(Cmppd , ExtRmi , O(660F00,C2,_,_,_,_,_,_ ), 0 , 3 , 0 , 3431 , 8 , 4 ), // #106
186 INST(Cmpps , ExtRmi , O(000F00,C2,_,_,_,_,_,_ ), 0 , 4 , 0 , 3438 , 8 , 5 ), // #107
187 INST(Cmps , X86StrMm , O(000000,A6,_,_,_,_,_,_ ), 0 , 0 , 0 , 440 , 34 , 35 ), // #108
188 INST(Cmpsd , ExtRmi , O(F20F00,C2,_,_,_,_,_,_ ), 0 , 5 , 0 , 3445 , 35 , 4 ), // #109
189 INST(Cmpss , ExtRmi , O(F30F00,C2,_,_,_,_,_,_ ), 0 , 6 , 0 , 3452 , 36 , 5 ), // #110
190 INST(Cmpxchg , X86Cmpxchg , O(000F00,B0,_,_,x,_,_,_ ), 0 , 4 , 0 , 445 , 37 , 36 ), // #111
191 INST(Cmpxchg16b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,1,_,_,_ ), 0 , 26 , 0 , 453 , 38 , 37 ), // #112
192 INST(Cmpxchg8b , X86Cmpxchg8b_16b , O(000F00,C7,1,_,_,_,_,_ ), 0 , 27 , 0 , 464 , 39 , 38 ), // #113
193 INST(Comisd , ExtRm , O(660F00,2F,_,_,_,_,_,_ ), 0 , 3 , 0 , 9930 , 6 , 39 ), // #114
194 INST(Comiss , ExtRm , O(000F00,2F,_,_,_,_,_,_ ), 0 , 4 , 0 , 9939 , 7 , 40 ), // #115
195 INST(Cpuid , X86Op , O(000F00,A2,_,_,_,_,_,_ ), 0 , 4 , 0 , 474 , 40 , 41 ), // #116
196 INST(Cqo , X86Op_xDX_xAX , O(000000,99,_,_,1,_,_,_ ), 0 , 20 , 0 , 480 , 41 , 0 ), // #117
197 INST(Crc32 , X86Crc , O(F20F38,F0,_,_,x,_,_,_ ), 0 , 28 , 0 , 484 , 42 , 42 ), // #118
198 INST(Cvtdq2pd , ExtRm , O(F30F00,E6,_,_,_,_,_,_ ), 0 , 6 , 0 , 3499 , 6 , 4 ), // #119
199 INST(Cvtdq2ps , ExtRm , O(000F00,5B,_,_,_,_,_,_ ), 0 , 4 , 0 , 3509 , 5 , 4 ), // #120
200 INST(Cvtpd2dq , ExtRm , O(F20F00,E6,_,_,_,_,_,_ ), 0 , 5 , 0 , 3548 , 5 , 4 ), // #121
201 INST(Cvtpd2pi , ExtRm , O(660F00,2D,_,_,_,_,_,_ ), 0 , 3 , 0 , 490 , 43 , 4 ), // #122
202 INST(Cvtpd2ps , ExtRm , O(660F00,5A,_,_,_,_,_,_ ), 0 , 3 , 0 , 3558 , 5 , 4 ), // #123
203 INST(Cvtpi2pd , ExtRm , O(660F00,2A,_,_,_,_,_,_ ), 0 , 3 , 0 , 499 , 44 , 4 ), // #124
204 INST(Cvtpi2ps , ExtRm , O(000F00,2A,_,_,_,_,_,_ ), 0 , 4 , 0 , 508 , 44 , 5 ), // #125
205 INST(Cvtps2dq , ExtRm , O(660F00,5B,_,_,_,_,_,_ ), 0 , 3 , 0 , 3610 , 5 , 4 ), // #126
206 INST(Cvtps2pd , ExtRm , O(000F00,5A,_,_,_,_,_,_ ), 0 , 4 , 0 , 3620 , 6 , 4 ), // #127
207 INST(Cvtps2pi , ExtRm , O(000F00,2D,_,_,_,_,_,_ ), 0 , 4 , 0 , 517 , 45 , 5 ), // #128
208 INST(Cvtsd2si , ExtRm_Wx , O(F20F00,2D,_,_,x,_,_,_ ), 0 , 5 , 0 , 3692 , 46 , 4 ), // #129
209 INST(Cvtsd2ss , ExtRm , O(F20F00,5A,_,_,_,_,_,_ ), 0 , 5 , 0 , 3702 , 6 , 4 ), // #130
210 INST(Cvtsi2sd , ExtRm_Wx , O(F20F00,2A,_,_,x,_,_,_ ), 0 , 5 , 0 , 3723 , 47 , 4 ), // #131
211 INST(Cvtsi2ss , ExtRm_Wx , O(F30F00,2A,_,_,x,_,_,_ ), 0 , 6 , 0 , 3733 , 47 , 5 ), // #132
212 INST(Cvtss2sd , ExtRm , O(F30F00,5A,_,_,_,_,_,_ ), 0 , 6 , 0 , 3743 , 7 , 4 ), // #133
213 INST(Cvtss2si , ExtRm_Wx , O(F30F00,2D,_,_,x,_,_,_ ), 0 , 6 , 0 , 3753 , 48 , 5 ), // #134
214 INST(Cvttpd2dq , ExtRm , O(660F00,E6,_,_,_,_,_,_ ), 0 , 3 , 0 , 3774 , 5 , 4 ), // #135
215 INST(Cvttpd2pi , ExtRm , O(660F00,2C,_,_,_,_,_,_ ), 0 , 3 , 0 , 526 , 43 , 4 ), // #136
216 INST(Cvttps2dq , ExtRm , O(F30F00,5B,_,_,_,_,_,_ ), 0 , 6 , 0 , 3820 , 5 , 4 ), // #137
217 INST(Cvttps2pi , ExtRm , O(000F00,2C,_,_,_,_,_,_ ), 0 , 4 , 0 , 536 , 45 , 5 ), // #138
218 INST(Cvttsd2si , ExtRm_Wx , O(F20F00,2C,_,_,x,_,_,_ ), 0 , 5 , 0 , 3866 , 46 , 4 ), // #139
219 INST(Cvttss2si , ExtRm_Wx , O(F30F00,2C,_,_,x,_,_,_ ), 0 , 6 , 0 , 3889 , 48 , 5 ), // #140
220 INST(Cwd , X86Op_xDX_xAX , O(660000,99,_,_,_,_,_,_ ), 0 , 19 , 0 , 546 , 49 , 0 ), // #141
221 INST(Cwde , X86Op_xAX , O(000000,98,_,_,_,_,_,_ ), 0 , 0 , 0 , 550 , 50 , 0 ), // #142
222 INST(Daa , X86Op , O(000000,27,_,_,_,_,_,_ ), 0 , 0 , 0 , 555 , 1 , 1 ), // #143
223 INST(Das , X86Op , O(000000,2F,_,_,_,_,_,_ ), 0 , 0 , 0 , 559 , 1 , 1 ), // #144
224 INST(Dec , X86IncDec , O(000000,FE,1,_,x,_,_,_ ), O(000000,48,_,_,x,_,_,_ ), 29 , 6 , 3013 , 51 , 43 ), // #145
225 INST(Div , X86M_GPB_MulDiv , O(000000,F6,6,_,x,_,_,_ ), 0 , 30 , 0 , 780 , 52 , 1 ), // #146
226 INST(Divpd , ExtRm , O(660F00,5E,_,_,_,_,_,_ ), 0 , 3 , 0 , 3988 , 5 , 4 ), // #147
227 INST(Divps , ExtRm , O(000F00,5E,_,_,_,_,_,_ ), 0 , 4 , 0 , 3995 , 5 , 5 ), // #148
228 INST(Divsd , ExtRm , O(F20F00,5E,_,_,_,_,_,_ ), 0 , 5 , 0 , 4002 , 6 , 4 ), // #149
229 INST(Divss , ExtRm , O(F30F00,5E,_,_,_,_,_,_ ), 0 , 6 , 0 , 4009 , 7 , 5 ), // #150
230 INST(Dppd , ExtRmi , O(660F3A,41,_,_,_,_,_,_ ), 0 , 8 , 0 , 4026 , 8 , 12 ), // #151
231 INST(Dpps , ExtRmi , O(660F3A,40,_,_,_,_,_,_ ), 0 , 8 , 0 , 4032 , 8 , 12 ), // #152
232 INST(Emms , X86Op , O(000F00,77,_,_,_,_,_,_ ), 0 , 4 , 0 , 748 , 53 , 44 ), // #153
233 INST(Enqcmd , X86EnqcmdMovdir64b , O(F20F38,F8,_,_,_,_,_,_ ), 0 , 28 , 0 , 563 , 54 , 45 ), // #154
234 INST(Enqcmds , X86EnqcmdMovdir64b , O(F30F38,F8,_,_,_,_,_,_ ), 0 , 7 , 0 , 570 , 54 , 45 ), // #155
235 INST(Enter , X86Enter , O(000000,C8,_,_,_,_,_,_ ), 0 , 0 , 0 , 2856 , 55 , 0 ), // #156
236 INST(Extractps , ExtExtract , O(660F3A,17,_,_,_,_,_,_ ), 0 , 8 , 0 , 4222 , 56 , 12 ), // #157
237 INST(Extrq , ExtExtrq , O(660F00,79,_,_,_,_,_,_ ), O(660F00,78,0,_,_,_,_,_ ), 3 , 7 , 7290 , 57 , 46 ), // #158
238 INST(F2xm1 , FpuOp , O_FPU(00,D9F0,_) , 0 , 31 , 0 , 578 , 30 , 0 ), // #159
239 INST(Fabs , FpuOp , O_FPU(00,D9E1,_) , 0 , 31 , 0 , 584 , 30 , 0 ), // #160
240 INST(Fadd , FpuArith , O_FPU(00,C0C0,0) , 0 , 32 , 0 , 2067 , 58 , 0 ), // #161
241 INST(Faddp , FpuRDef , O_FPU(00,DEC0,_) , 0 , 33 , 0 , 589 , 59 , 0 ), // #162
242 INST(Fbld , X86M_Only , O_FPU(00,00DF,4) , 0 , 34 , 0 , 595 , 60 , 0 ), // #163
243 INST(Fbstp , X86M_Only , O_FPU(00,00DF,6) , 0 , 35 , 0 , 600 , 60 , 0 ), // #164
244 INST(Fchs , FpuOp , O_FPU(00,D9E0,_) , 0 , 31 , 0 , 606 , 30 , 0 ), // #165
245 INST(Fclex , FpuOp , O_FPU(9B,DBE2,_) , 0 , 36 , 0 , 611 , 30 , 0 ), // #166
246 INST(Fcmovb , FpuR , O_FPU(00,DAC0,_) , 0 , 37 , 0 , 617 , 61 , 28 ), // #167
247 INST(Fcmovbe , FpuR , O_FPU(00,DAD0,_) , 0 , 37 , 0 , 624 , 61 , 27 ), // #168
248 INST(Fcmove , FpuR , O_FPU(00,DAC8,_) , 0 , 37 , 0 , 632 , 61 , 29 ), // #169
249 INST(Fcmovnb , FpuR , O_FPU(00,DBC0,_) , 0 , 38 , 0 , 639 , 61 , 28 ), // #170
250 INST(Fcmovnbe , FpuR , O_FPU(00,DBD0,_) , 0 , 38 , 0 , 647 , 61 , 27 ), // #171
251 INST(Fcmovne , FpuR , O_FPU(00,DBC8,_) , 0 , 38 , 0 , 656 , 61 , 29 ), // #172
252 INST(Fcmovnu , FpuR , O_FPU(00,DBD8,_) , 0 , 38 , 0 , 664 , 61 , 33 ), // #173
253 INST(Fcmovu , FpuR , O_FPU(00,DAD8,_) , 0 , 37 , 0 , 672 , 61 , 33 ), // #174
254 INST(Fcom , FpuCom , O_FPU(00,D0D0,2) , 0 , 39 , 0 , 679 , 62 , 0 ), // #175
255 INST(Fcomi , FpuR , O_FPU(00,DBF0,_) , 0 , 38 , 0 , 684 , 61 , 47 ), // #176
256 INST(Fcomip , FpuR , O_FPU(00,DFF0,_) , 0 , 40 , 0 , 690 , 61 , 47 ), // #177
257 INST(Fcomp , FpuCom , O_FPU(00,D8D8,3) , 0 , 41 , 0 , 697 , 62 , 0 ), // #178
258 INST(Fcompp , FpuOp , O_FPU(00,DED9,_) , 0 , 33 , 0 , 703 , 30 , 0 ), // #179
259 INST(Fcos , FpuOp , O_FPU(00,D9FF,_) , 0 , 31 , 0 , 710 , 30 , 0 ), // #180
260 INST(Fdecstp , FpuOp , O_FPU(00,D9F6,_) , 0 , 31 , 0 , 715 , 30 , 0 ), // #181
261 INST(Fdiv , FpuArith , O_FPU(00,F0F8,6) , 0 , 42 , 0 , 723 , 58 , 0 ), // #182
262 INST(Fdivp , FpuRDef , O_FPU(00,DEF8,_) , 0 , 33 , 0 , 728 , 59 , 0 ), // #183
263 INST(Fdivr , FpuArith , O_FPU(00,F8F0,7) , 0 , 43 , 0 , 734 , 58 , 0 ), // #184
264 INST(Fdivrp , FpuRDef , O_FPU(00,DEF0,_) , 0 , 33 , 0 , 740 , 59 , 0 ), // #185
265 INST(Femms , X86Op , O(000F00,0E,_,_,_,_,_,_ ), 0 , 4 , 0 , 747 , 30 , 48 ), // #186
266 INST(Ffree , FpuR , O_FPU(00,DDC0,_) , 0 , 44 , 0 , 753 , 61 , 0 ), // #187
267 INST(Fiadd , FpuM , O_FPU(00,00DA,0) , 0 , 45 , 0 , 759 , 63 , 0 ), // #188
268 INST(Ficom , FpuM , O_FPU(00,00DA,2) , 0 , 46 , 0 , 765 , 63 , 0 ), // #189
269 INST(Ficomp , FpuM , O_FPU(00,00DA,3) , 0 , 47 , 0 , 771 , 63 , 0 ), // #190
270 INST(Fidiv , FpuM , O_FPU(00,00DA,6) , 0 , 35 , 0 , 778 , 63 , 0 ), // #191
271 INST(Fidivr , FpuM , O_FPU(00,00DA,7) , 0 , 48 , 0 , 784 , 63 , 0 ), // #192
272 INST(Fild , FpuM , O_FPU(00,00DB,0) , O_FPU(00,00DF,5) , 45 , 8 , 791 , 64 , 0 ), // #193
273 INST(Fimul , FpuM , O_FPU(00,00DA,1) , 0 , 49 , 0 , 796 , 63 , 0 ), // #194
274 INST(Fincstp , FpuOp , O_FPU(00,D9F7,_) , 0 , 31 , 0 , 802 , 30 , 0 ), // #195
275 INST(Finit , FpuOp , O_FPU(9B,DBE3,_) , 0 , 36 , 0 , 810 , 30 , 0 ), // #196
276 INST(Fist , FpuM , O_FPU(00,00DB,2) , 0 , 46 , 0 , 816 , 63 , 0 ), // #197
277 INST(Fistp , FpuM , O_FPU(00,00DB,3) , O_FPU(00,00DF,7) , 47 , 9 , 821 , 64 , 0 ), // #198
278 INST(Fisttp , FpuM , O_FPU(00,00DB,1) , O_FPU(00,00DD,1) , 49 , 10 , 827 , 64 , 6 ), // #199
279 INST(Fisub , FpuM , O_FPU(00,00DA,4) , 0 , 34 , 0 , 834 , 63 , 0 ), // #200
280 INST(Fisubr , FpuM , O_FPU(00,00DA,5) , 0 , 50 , 0 , 840 , 63 , 0 ), // #201
281 INST(Fld , FpuFldFst , O_FPU(00,00D9,0) , O_FPU(00,00DB,5) , 45 , 11 , 847 , 65 , 0 ), // #202
282 INST(Fld1 , FpuOp , O_FPU(00,D9E8,_) , 0 , 31 , 0 , 851 , 30 , 0 ), // #203
283 INST(Fldcw , X86M_Only , O_FPU(00,00D9,5) , 0 , 50 , 0 , 856 , 66 , 0 ), // #204
284 INST(Fldenv , X86M_Only , O_FPU(00,00D9,4) , 0 , 34 , 0 , 862 , 31 , 0 ), // #205
285 INST(Fldl2e , FpuOp , O_FPU(00,D9EA,_) , 0 , 31 , 0 , 869 , 30 , 0 ), // #206
286 INST(Fldl2t , FpuOp , O_FPU(00,D9E9,_) , 0 , 31 , 0 , 876 , 30 , 0 ), // #207
287 INST(Fldlg2 , FpuOp , O_FPU(00,D9EC,_) , 0 , 31 , 0 , 883 , 30 , 0 ), // #208
288 INST(Fldln2 , FpuOp , O_FPU(00,D9ED,_) , 0 , 31 , 0 , 890 , 30 , 0 ), // #209
289 INST(Fldpi , FpuOp , O_FPU(00,D9EB,_) , 0 , 31 , 0 , 897 , 30 , 0 ), // #210
290 INST(Fldz , FpuOp , O_FPU(00,D9EE,_) , 0 , 31 , 0 , 903 , 30 , 0 ), // #211
291 INST(Fmul , FpuArith , O_FPU(00,C8C8,1) , 0 , 51 , 0 , 2109 , 58 , 0 ), // #212
292 INST(Fmulp , FpuRDef , O_FPU(00,DEC8,_) , 0 , 33 , 0 , 908 , 59 , 0 ), // #213
293 INST(Fnclex , FpuOp , O_FPU(00,DBE2,_) , 0 , 38 , 0 , 914 , 30 , 0 ), // #214
294 INST(Fninit , FpuOp , O_FPU(00,DBE3,_) , 0 , 38 , 0 , 921 , 30 , 0 ), // #215
295 INST(Fnop , FpuOp , O_FPU(00,D9D0,_) , 0 , 31 , 0 , 928 , 30 , 0 ), // #216
296 INST(Fnsave , X86M_Only , O_FPU(00,00DD,6) , 0 , 35 , 0 , 933 , 31 , 0 ), // #217
297 INST(Fnstcw , X86M_Only , O_FPU(00,00D9,7) , 0 , 48 , 0 , 940 , 66 , 0 ), // #218
298 INST(Fnstenv , X86M_Only , O_FPU(00,00D9,6) , 0 , 35 , 0 , 947 , 31 , 0 ), // #219
299 INST(Fnstsw , FpuStsw , O_FPU(00,00DD,7) , O_FPU(00,DFE0,_) , 48 , 12 , 955 , 67 , 0 ), // #220
300 INST(Fpatan , FpuOp , O_FPU(00,D9F3,_) , 0 , 31 , 0 , 962 , 30 , 0 ), // #221
301 INST(Fprem , FpuOp , O_FPU(00,D9F8,_) , 0 , 31 , 0 , 969 , 30 , 0 ), // #222
302 INST(Fprem1 , FpuOp , O_FPU(00,D9F5,_) , 0 , 31 , 0 , 975 , 30 , 0 ), // #223
303 INST(Fptan , FpuOp , O_FPU(00,D9F2,_) , 0 , 31 , 0 , 982 , 30 , 0 ), // #224
304 INST(Frndint , FpuOp , O_FPU(00,D9FC,_) , 0 , 31 , 0 , 988 , 30 , 0 ), // #225
305 INST(Frstor , X86M_Only , O_FPU(00,00DD,4) , 0 , 34 , 0 , 996 , 31 , 0 ), // #226
306 INST(Fsave , X86M_Only , O_FPU(9B,00DD,6) , 0 , 52 , 0 , 1003 , 31 , 0 ), // #227
307 INST(Fscale , FpuOp , O_FPU(00,D9FD,_) , 0 , 31 , 0 , 1009 , 30 , 0 ), // #228
308 INST(Fsin , FpuOp , O_FPU(00,D9FE,_) , 0 , 31 , 0 , 1016 , 30 , 0 ), // #229
309 INST(Fsincos , FpuOp , O_FPU(00,D9FB,_) , 0 , 31 , 0 , 1021 , 30 , 0 ), // #230
310 INST(Fsqrt , FpuOp , O_FPU(00,D9FA,_) , 0 , 31 , 0 , 1029 , 30 , 0 ), // #231
311 INST(Fst , FpuFldFst , O_FPU(00,00D9,2) , 0 , 46 , 0 , 1035 , 68 , 0 ), // #232
312 INST(Fstcw , X86M_Only , O_FPU(9B,00D9,7) , 0 , 53 , 0 , 1039 , 66 , 0 ), // #233
313 INST(Fstenv , X86M_Only , O_FPU(9B,00D9,6) , 0 , 52 , 0 , 1045 , 31 , 0 ), // #234
314 INST(Fstp , FpuFldFst , O_FPU(00,00D9,3) , O(000000,DB,7,_,_,_,_,_ ), 47 , 13 , 1052 , 65 , 0 ), // #235
315 INST(Fstsw , FpuStsw , O_FPU(9B,00DD,7) , O_FPU(9B,DFE0,_) , 53 , 14 , 1057 , 67 , 0 ), // #236
316 INST(Fsub , FpuArith , O_FPU(00,E0E8,4) , 0 , 54 , 0 , 2187 , 58 , 0 ), // #237
317 INST(Fsubp , FpuRDef , O_FPU(00,DEE8,_) , 0 , 33 , 0 , 1063 , 59 , 0 ), // #238
318 INST(Fsubr , FpuArith , O_FPU(00,E8E0,5) , 0 , 55 , 0 , 2193 , 58 , 0 ), // #239
319 INST(Fsubrp , FpuRDef , O_FPU(00,DEE0,_) , 0 , 33 , 0 , 1069 , 59 , 0 ), // #240
320 INST(Ftst , FpuOp , O_FPU(00,D9E4,_) , 0 , 31 , 0 , 1076 , 30 , 0 ), // #241
321 INST(Fucom , FpuRDef , O_FPU(00,DDE0,_) , 0 , 44 , 0 , 1081 , 59 , 0 ), // #242
322 INST(Fucomi , FpuR , O_FPU(00,DBE8,_) , 0 , 38 , 0 , 1087 , 61 , 47 ), // #243
323 INST(Fucomip , FpuR , O_FPU(00,DFE8,_) , 0 , 40 , 0 , 1094 , 61 , 47 ), // #244
324 INST(Fucomp , FpuRDef , O_FPU(00,DDE8,_) , 0 , 44 , 0 , 1102 , 59 , 0 ), // #245
325 INST(Fucompp , FpuOp , O_FPU(00,DAE9,_) , 0 , 37 , 0 , 1109 , 30 , 0 ), // #246
326 INST(Fwait , X86Op , O_FPU(00,009B,_) , 0 , 56 , 0 , 1117 , 30 , 0 ), // #247
327 INST(Fxam , FpuOp , O_FPU(00,D9E5,_) , 0 , 31 , 0 , 1123 , 30 , 0 ), // #248
328 INST(Fxch , FpuR , O_FPU(00,D9C8,_) , 0 , 31 , 0 , 1128 , 59 , 0 ), // #249
329 INST(Fxrstor , X86M_Only , O(000F00,AE,1,_,_,_,_,_ ), 0 , 27 , 0 , 1133 , 31 , 49 ), // #250
330 INST(Fxrstor64 , X86M_Only , O(000F00,AE,1,_,1,_,_,_ ), 0 , 26 , 0 , 1141 , 69 , 49 ), // #251
331 INST(Fxsave , X86M_Only , O(000F00,AE,0,_,_,_,_,_ ), 0 , 4 , 0 , 1151 , 31 , 49 ), // #252
332 INST(Fxsave64 , X86M_Only , O(000F00,AE,0,_,1,_,_,_ ), 0 , 57 , 0 , 1158 , 69 , 49 ), // #253
333 INST(Fxtract , FpuOp , O_FPU(00,D9F4,_) , 0 , 31 , 0 , 1167 , 30 , 0 ), // #254
334 INST(Fyl2x , FpuOp , O_FPU(00,D9F1,_) , 0 , 31 , 0 , 1175 , 30 , 0 ), // #255
335 INST(Fyl2xp1 , FpuOp , O_FPU(00,D9F9,_) , 0 , 31 , 0 , 1181 , 30 , 0 ), // #256
336 INST(Getsec , X86Op , O(000F00,37,_,_,_,_,_,_ ), 0 , 4 , 0 , 1189 , 30 , 50 ), // #257
337 INST(Gf2p8affineinvqb , ExtRmi , O(660F3A,CF,_,_,_,_,_,_ ), 0 , 8 , 0 , 5577 , 8 , 51 ), // #258
338 INST(Gf2p8affineqb , ExtRmi , O(660F3A,CE,_,_,_,_,_,_ ), 0 , 8 , 0 , 5595 , 8 , 51 ), // #259
339 INST(Gf2p8mulb , ExtRm , O(660F38,CF,_,_,_,_,_,_ ), 0 , 2 , 0 , 5610 , 5 , 51 ), // #260
340 INST(Haddpd , ExtRm , O(660F00,7C,_,_,_,_,_,_ ), 0 , 3 , 0 , 5621 , 5 , 6 ), // #261
341 INST(Haddps , ExtRm , O(F20F00,7C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5629 , 5 , 6 ), // #262
342 INST(Hlt , X86Op , O(000000,F4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1196 , 30 , 0 ), // #263
343 INST(Hsubpd , ExtRm , O(660F00,7D,_,_,_,_,_,_ ), 0 , 3 , 0 , 5637 , 5 , 6 ), // #264
344 INST(Hsubps , ExtRm , O(F20F00,7D,_,_,_,_,_,_ ), 0 , 5 , 0 , 5645 , 5 , 6 ), // #265
345 INST(Idiv , X86M_GPB_MulDiv , O(000000,F6,7,_,x,_,_,_ ), 0 , 25 , 0 , 779 , 52 , 1 ), // #266
346 INST(Imul , X86Imul , O(000000,F6,5,_,x,_,_,_ ), 0 , 58 , 0 , 797 , 70 , 1 ), // #267
347 INST(In , X86In , O(000000,EC,_,_,_,_,_,_ ), O(000000,E4,_,_,_,_,_,_ ), 0 , 15 , 10076, 71 , 0 ), // #268
348 INST(Inc , X86IncDec , O(000000,FE,0,_,x,_,_,_ ), O(000000,40,_,_,x,_,_,_ ), 0 , 16 , 1200 , 51 , 43 ), // #269
349 INST(Ins , X86Ins , O(000000,6C,_,_,_,_,_,_ ), 0 , 0 , 0 , 1857 , 72 , 0 ), // #270
350 INST(Insertps , ExtRmi , O(660F3A,21,_,_,_,_,_,_ ), 0 , 8 , 0 , 5781 , 36 , 12 ), // #271
351 INST(Insertq , ExtInsertq , O(F20F00,79,_,_,_,_,_,_ ), O(F20F00,78,_,_,_,_,_,_ ), 5 , 17 , 1204 , 73 , 46 ), // #272
352 INST(Int , X86Int , O(000000,CD,_,_,_,_,_,_ ), 0 , 0 , 0 , 992 , 74 , 0 ), // #273
353 INST(Int3 , X86Op , O(000000,CC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1212 , 30 , 0 ), // #274
354 INST(Into , X86Op , O(000000,CE,_,_,_,_,_,_ ), 0 , 0 , 0 , 1217 , 75 , 52 ), // #275
355 INST(Invd , X86Op , O(000F00,08,_,_,_,_,_,_ ), 0 , 4 , 0 , 10031, 30 , 41 ), // #276
356 INST(Invept , X86Rm_NoSize , O(660F38,80,_,_,_,_,_,_ ), 0 , 2 , 0 , 1222 , 76 , 53 ), // #277
357 INST(Invlpg , X86M_Only , O(000F00,01,7,_,_,_,_,_ ), 0 , 22 , 0 , 1229 , 31 , 41 ), // #278
358 INST(Invlpga , X86Op_xAddr , O(000F01,DF,_,_,_,_,_,_ ), 0 , 21 , 0 , 1236 , 77 , 22 ), // #279
359 INST(Invpcid , X86Rm_NoSize , O(660F38,82,_,_,_,_,_,_ ), 0 , 2 , 0 , 1244 , 76 , 41 ), // #280
360 INST(Invvpid , X86Rm_NoSize , O(660F38,81,_,_,_,_,_,_ ), 0 , 2 , 0 , 1252 , 76 , 53 ), // #281
361 INST(Iret , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1260 , 78 , 1 ), // #282
362 INST(Iretd , X86Op , O(000000,CF,_,_,_,_,_,_ ), 0 , 0 , 0 , 1265 , 78 , 1 ), // #283
363 INST(Iretq , X86Op , O(000000,CF,_,_,1,_,_,_ ), 0 , 20 , 0 , 1271 , 79 , 1 ), // #284
364 INST(Iretw , X86Op , O(660000,CF,_,_,_,_,_,_ ), 0 , 19 , 0 , 1277 , 78 , 1 ), // #285
365 INST(Ja , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1283 , 80 , 54 ), // #286
366 INST(Jae , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1286 , 80 , 55 ), // #287
367 INST(Jb , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1290 , 80 , 55 ), // #288
368 INST(Jbe , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1293 , 80 , 54 ), // #289
369 INST(Jc , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1297 , 80 , 55 ), // #290
370 INST(Je , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1300 , 80 , 56 ), // #291
371 INST(Jecxz , X86JecxzLoop , 0 , O(000000,E3,_,_,_,_,_,_ ), 0 , 23 , 1303 , 81 , 0 ), // #292
372 INST(Jg , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1309 , 80 , 57 ), // #293
373 INST(Jge , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1312 , 80 , 58 ), // #294
374 INST(Jl , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1316 , 80 , 58 ), // #295
375 INST(Jle , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1319 , 80 , 57 ), // #296
376 INST(Jmp , X86Jmp , O(000000,FF,4,_,_,_,_,_ ), O(000000,EB,_,_,_,_,_,_ ), 9 , 28 , 1323 , 82 , 0 ), // #297
377 INST(Jna , X86Jcc , O(000F00,86,_,_,_,_,_,_ ), O(000000,76,_,_,_,_,_,_ ), 4 , 21 , 1327 , 80 , 54 ), // #298
378 INST(Jnae , X86Jcc , O(000F00,82,_,_,_,_,_,_ ), O(000000,72,_,_,_,_,_,_ ), 4 , 20 , 1331 , 80 , 55 ), // #299
379 INST(Jnb , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1336 , 80 , 55 ), // #300
380 INST(Jnbe , X86Jcc , O(000F00,87,_,_,_,_,_,_ ), O(000000,77,_,_,_,_,_,_ ), 4 , 18 , 1340 , 80 , 54 ), // #301
381 INST(Jnc , X86Jcc , O(000F00,83,_,_,_,_,_,_ ), O(000000,73,_,_,_,_,_,_ ), 4 , 19 , 1345 , 80 , 55 ), // #302
382 INST(Jne , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1349 , 80 , 56 ), // #303
383 INST(Jng , X86Jcc , O(000F00,8E,_,_,_,_,_,_ ), O(000000,7E,_,_,_,_,_,_ ), 4 , 27 , 1353 , 80 , 57 ), // #304
384 INST(Jnge , X86Jcc , O(000F00,8C,_,_,_,_,_,_ ), O(000000,7C,_,_,_,_,_,_ ), 4 , 26 , 1357 , 80 , 58 ), // #305
385 INST(Jnl , X86Jcc , O(000F00,8D,_,_,_,_,_,_ ), O(000000,7D,_,_,_,_,_,_ ), 4 , 25 , 1362 , 80 , 58 ), // #306
386 INST(Jnle , X86Jcc , O(000F00,8F,_,_,_,_,_,_ ), O(000000,7F,_,_,_,_,_,_ ), 4 , 24 , 1366 , 80 , 57 ), // #307
387 INST(Jno , X86Jcc , O(000F00,81,_,_,_,_,_,_ ), O(000000,71,_,_,_,_,_,_ ), 4 , 30 , 1371 , 80 , 52 ), // #308
388 INST(Jnp , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1375 , 80 , 59 ), // #309
389 INST(Jns , X86Jcc , O(000F00,89,_,_,_,_,_,_ ), O(000000,79,_,_,_,_,_,_ ), 4 , 32 , 1379 , 80 , 60 ), // #310
390 INST(Jnz , X86Jcc , O(000F00,85,_,_,_,_,_,_ ), O(000000,75,_,_,_,_,_,_ ), 4 , 29 , 1383 , 80 , 56 ), // #311
391 INST(Jo , X86Jcc , O(000F00,80,_,_,_,_,_,_ ), O(000000,70,_,_,_,_,_,_ ), 4 , 33 , 1387 , 80 , 52 ), // #312
392 INST(Jp , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1390 , 80 , 59 ), // #313
393 INST(Jpe , X86Jcc , O(000F00,8A,_,_,_,_,_,_ ), O(000000,7A,_,_,_,_,_,_ ), 4 , 34 , 1393 , 80 , 59 ), // #314
394 INST(Jpo , X86Jcc , O(000F00,8B,_,_,_,_,_,_ ), O(000000,7B,_,_,_,_,_,_ ), 4 , 31 , 1397 , 80 , 59 ), // #315
395 INST(Js , X86Jcc , O(000F00,88,_,_,_,_,_,_ ), O(000000,78,_,_,_,_,_,_ ), 4 , 35 , 1401 , 80 , 60 ), // #316
396 INST(Jz , X86Jcc , O(000F00,84,_,_,_,_,_,_ ), O(000000,74,_,_,_,_,_,_ ), 4 , 22 , 1404 , 80 , 56 ), // #317
397 INST(Kaddb , VexRvm , V(660F00,4A,_,1,0,_,_,_ ), 0 , 59 , 0 , 1407 , 83 , 61 ), // #318
398 INST(Kaddd , VexRvm , V(660F00,4A,_,1,1,_,_,_ ), 0 , 60 , 0 , 1413 , 83 , 62 ), // #319
399 INST(Kaddq , VexRvm , V(000F00,4A,_,1,1,_,_,_ ), 0 , 61 , 0 , 1419 , 83 , 62 ), // #320
400 INST(Kaddw , VexRvm , V(000F00,4A,_,1,0,_,_,_ ), 0 , 62 , 0 , 1425 , 83 , 61 ), // #321
401 INST(Kandb , VexRvm , V(660F00,41,_,1,0,_,_,_ ), 0 , 59 , 0 , 1431 , 83 , 61 ), // #322
402 INST(Kandd , VexRvm , V(660F00,41,_,1,1,_,_,_ ), 0 , 60 , 0 , 1437 , 83 , 62 ), // #323
403 INST(Kandnb , VexRvm , V(660F00,42,_,1,0,_,_,_ ), 0 , 59 , 0 , 1443 , 83 , 61 ), // #324
404 INST(Kandnd , VexRvm , V(660F00,42,_,1,1,_,_,_ ), 0 , 60 , 0 , 1450 , 83 , 62 ), // #325
405 INST(Kandnq , VexRvm , V(000F00,42,_,1,1,_,_,_ ), 0 , 61 , 0 , 1457 , 83 , 62 ), // #326
406 INST(Kandnw , VexRvm , V(000F00,42,_,1,0,_,_,_ ), 0 , 62 , 0 , 1464 , 83 , 63 ), // #327
407 INST(Kandq , VexRvm , V(000F00,41,_,1,1,_,_,_ ), 0 , 61 , 0 , 1471 , 83 , 62 ), // #328
408 INST(Kandw , VexRvm , V(000F00,41,_,1,0,_,_,_ ), 0 , 62 , 0 , 1477 , 83 , 63 ), // #329
409 INST(Kmovb , VexKmov , V(660F00,90,_,0,0,_,_,_ ), V(660F00,92,_,0,0,_,_,_ ), 63 , 36 , 1483 , 84 , 61 ), // #330
410 INST(Kmovd , VexKmov , V(660F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,0,_,_,_ ), 64 , 37 , 7770 , 85 , 62 ), // #331
411 INST(Kmovq , VexKmov , V(000F00,90,_,0,1,_,_,_ ), V(F20F00,92,_,0,1,_,_,_ ), 65 , 38 , 7781 , 86 , 62 ), // #332
412 INST(Kmovw , VexKmov , V(000F00,90,_,0,0,_,_,_ ), V(000F00,92,_,0,0,_,_,_ ), 66 , 39 , 1489 , 87 , 63 ), // #333
413 INST(Knotb , VexRm , V(660F00,44,_,0,0,_,_,_ ), 0 , 63 , 0 , 1495 , 88 , 61 ), // #334
414 INST(Knotd , VexRm , V(660F00,44,_,0,1,_,_,_ ), 0 , 64 , 0 , 1501 , 88 , 62 ), // #335
415 INST(Knotq , VexRm , V(000F00,44,_,0,1,_,_,_ ), 0 , 65 , 0 , 1507 , 88 , 62 ), // #336
416 INST(Knotw , VexRm , V(000F00,44,_,0,0,_,_,_ ), 0 , 66 , 0 , 1513 , 88 , 63 ), // #337
417 INST(Korb , VexRvm , V(660F00,45,_,1,0,_,_,_ ), 0 , 59 , 0 , 1519 , 83 , 61 ), // #338
418 INST(Kord , VexRvm , V(660F00,45,_,1,1,_,_,_ ), 0 , 60 , 0 , 1524 , 83 , 62 ), // #339
419 INST(Korq , VexRvm , V(000F00,45,_,1,1,_,_,_ ), 0 , 61 , 0 , 1529 , 83 , 62 ), // #340
420 INST(Kortestb , VexRm , V(660F00,98,_,0,0,_,_,_ ), 0 , 63 , 0 , 1534 , 88 , 64 ), // #341
421 INST(Kortestd , VexRm , V(660F00,98,_,0,1,_,_,_ ), 0 , 64 , 0 , 1543 , 88 , 65 ), // #342
422 INST(Kortestq , VexRm , V(000F00,98,_,0,1,_,_,_ ), 0 , 65 , 0 , 1552 , 88 , 65 ), // #343
423 INST(Kortestw , VexRm , V(000F00,98,_,0,0,_,_,_ ), 0 , 66 , 0 , 1561 , 88 , 66 ), // #344
424 INST(Korw , VexRvm , V(000F00,45,_,1,0,_,_,_ ), 0 , 62 , 0 , 1570 , 83 , 63 ), // #345
425 INST(Kshiftlb , VexRmi , V(660F3A,32,_,0,0,_,_,_ ), 0 , 67 , 0 , 1575 , 89 , 61 ), // #346
426 INST(Kshiftld , VexRmi , V(660F3A,33,_,0,0,_,_,_ ), 0 , 67 , 0 , 1584 , 89 , 62 ), // #347
427 INST(Kshiftlq , VexRmi , V(660F3A,33,_,0,1,_,_,_ ), 0 , 68 , 0 , 1593 , 89 , 62 ), // #348
428 INST(Kshiftlw , VexRmi , V(660F3A,32,_,0,1,_,_,_ ), 0 , 68 , 0 , 1602 , 89 , 63 ), // #349
429 INST(Kshiftrb , VexRmi , V(660F3A,30,_,0,0,_,_,_ ), 0 , 67 , 0 , 1611 , 89 , 61 ), // #350
430 INST(Kshiftrd , VexRmi , V(660F3A,31,_,0,0,_,_,_ ), 0 , 67 , 0 , 1620 , 89 , 62 ), // #351
431 INST(Kshiftrq , VexRmi , V(660F3A,31,_,0,1,_,_,_ ), 0 , 68 , 0 , 1629 , 89 , 62 ), // #352
432 INST(Kshiftrw , VexRmi , V(660F3A,30,_,0,1,_,_,_ ), 0 , 68 , 0 , 1638 , 89 , 63 ), // #353
433 INST(Ktestb , VexRm , V(660F00,99,_,0,0,_,_,_ ), 0 , 63 , 0 , 1647 , 88 , 64 ), // #354
434 INST(Ktestd , VexRm , V(660F00,99,_,0,1,_,_,_ ), 0 , 64 , 0 , 1654 , 88 , 65 ), // #355
435 INST(Ktestq , VexRm , V(000F00,99,_,0,1,_,_,_ ), 0 , 65 , 0 , 1661 , 88 , 65 ), // #356
436 INST(Ktestw , VexRm , V(000F00,99,_,0,0,_,_,_ ), 0 , 66 , 0 , 1668 , 88 , 64 ), // #357
437 INST(Kunpckbw , VexRvm , V(660F00,4B,_,1,0,_,_,_ ), 0 , 59 , 0 , 1675 , 83 , 63 ), // #358
438 INST(Kunpckdq , VexRvm , V(000F00,4B,_,1,1,_,_,_ ), 0 , 61 , 0 , 1684 , 83 , 62 ), // #359
439 INST(Kunpckwd , VexRvm , V(000F00,4B,_,1,0,_,_,_ ), 0 , 62 , 0 , 1693 , 83 , 62 ), // #360
440 INST(Kxnorb , VexRvm , V(660F00,46,_,1,0,_,_,_ ), 0 , 59 , 0 , 1702 , 83 , 61 ), // #361
441 INST(Kxnord , VexRvm , V(660F00,46,_,1,1,_,_,_ ), 0 , 60 , 0 , 1709 , 83 , 62 ), // #362
442 INST(Kxnorq , VexRvm , V(000F00,46,_,1,1,_,_,_ ), 0 , 61 , 0 , 1716 , 83 , 62 ), // #363
443 INST(Kxnorw , VexRvm , V(000F00,46,_,1,0,_,_,_ ), 0 , 62 , 0 , 1723 , 83 , 63 ), // #364
444 INST(Kxorb , VexRvm , V(660F00,47,_,1,0,_,_,_ ), 0 , 59 , 0 , 1730 , 83 , 61 ), // #365
445 INST(Kxord , VexRvm , V(660F00,47,_,1,1,_,_,_ ), 0 , 60 , 0 , 1736 , 83 , 62 ), // #366
446 INST(Kxorq , VexRvm , V(000F00,47,_,1,1,_,_,_ ), 0 , 61 , 0 , 1742 , 83 , 62 ), // #367
447 INST(Kxorw , VexRvm , V(000F00,47,_,1,0,_,_,_ ), 0 , 62 , 0 , 1748 , 83 , 63 ), // #368
448 INST(Lahf , X86Op , O(000000,9F,_,_,_,_,_,_ ), 0 , 0 , 0 , 1754 , 90 , 67 ), // #369
449 INST(Lar , X86Rm , O(000F00,02,_,_,_,_,_,_ ), 0 , 4 , 0 , 1759 , 91 , 10 ), // #370
450 INST(Lddqu , ExtRm , O(F20F00,F0,_,_,_,_,_,_ ), 0 , 5 , 0 , 5791 , 92 , 6 ), // #371
451 INST(Ldmxcsr , X86M_Only , O(000F00,AE,2,_,_,_,_,_ ), 0 , 69 , 0 , 5798 , 93 , 5 ), // #372
452 INST(Lds , X86Rm , O(000000,C5,_,_,_,_,_,_ ), 0 , 0 , 0 , 1763 , 94 , 0 ), // #373
453 INST(Lea , X86Lea , O(000000,8D,_,_,x,_,_,_ ), 0 , 0 , 0 , 1767 , 95 , 0 ), // #374
454 INST(Leave , X86Op , O(000000,C9,_,_,_,_,_,_ ), 0 , 0 , 0 , 1771 , 30 , 0 ), // #375
455 INST(Les , X86Rm , O(000000,C4,_,_,_,_,_,_ ), 0 , 0 , 0 , 1777 , 94 , 0 ), // #376
456 INST(Lfence , X86Fence , O(000F00,AE,5,_,_,_,_,_ ), 0 , 70 , 0 , 1781 , 30 , 4 ), // #377
457 INST(Lfs , X86Rm , O(000F00,B4,_,_,_,_,_,_ ), 0 , 4 , 0 , 1788 , 96 , 0 ), // #378
458 INST(Lgdt , X86M_Only , O(000F00,01,2,_,_,_,_,_ ), 0 , 69 , 0 , 1792 , 31 , 0 ), // #379
459 INST(Lgs , X86Rm , O(000F00,B5,_,_,_,_,_,_ ), 0 , 4 , 0 , 1797 , 96 , 0 ), // #380
460 INST(Lidt , X86M_Only , O(000F00,01,3,_,_,_,_,_ ), 0 , 71 , 0 , 1801 , 31 , 0 ), // #381
461 INST(Lldt , X86M_NoSize , O(000F00,00,2,_,_,_,_,_ ), 0 , 69 , 0 , 1806 , 97 , 0 ), // #382
462 INST(Llwpcb , VexR_Wx , V(XOP_M9,12,0,0,x,_,_,_ ), 0 , 72 , 0 , 1811 , 98 , 68 ), // #383
463 INST(Lmsw , X86M_NoSize , O(000F00,01,6,_,_,_,_,_ ), 0 , 73 , 0 , 1818 , 97 , 0 ), // #384
464 INST(Lods , X86StrRm , O(000000,AC,_,_,_,_,_,_ ), 0 , 0 , 0 , 1823 , 99 , 69 ), // #385
465 INST(Loop , X86JecxzLoop , 0 , O(000000,E2,_,_,_,_,_,_ ), 0 , 40 , 1828 , 100, 0 ), // #386
466 INST(Loope , X86JecxzLoop , 0 , O(000000,E1,_,_,_,_,_,_ ), 0 , 41 , 1833 , 100, 56 ), // #387
467 INST(Loopne , X86JecxzLoop , 0 , O(000000,E0,_,_,_,_,_,_ ), 0 , 42 , 1839 , 100, 56 ), // #388
468 INST(Lsl , X86Rm , O(000F00,03,_,_,_,_,_,_ ), 0 , 4 , 0 , 1846 , 101, 10 ), // #389
469 INST(Lss , X86Rm , O(000F00,B2,_,_,_,_,_,_ ), 0 , 4 , 0 , 6289 , 96 , 0 ), // #390
470 INST(Ltr , X86M_NoSize , O(000F00,00,3,_,_,_,_,_ ), 0 , 71 , 0 , 1850 , 97 , 0 ), // #391
471 INST(Lwpins , VexVmi4_Wx , V(XOP_MA,12,0,0,x,_,_,_ ), 0 , 74 , 0 , 1854 , 102, 68 ), // #392
472 INST(Lwpval , VexVmi4_Wx , V(XOP_MA,12,1,0,x,_,_,_ ), 0 , 75 , 0 , 1861 , 102, 68 ), // #393
473 INST(Lzcnt , X86Rm_Raw66H , O(F30F00,BD,_,_,x,_,_,_ ), 0 , 6 , 0 , 1868 , 22 , 70 ), // #394
474 INST(Maskmovdqu , ExtRm_ZDI , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 5807 , 103, 4 ), // #395
475 INST(Maskmovq , ExtRm_ZDI , O(000F00,F7,_,_,_,_,_,_ ), 0 , 4 , 0 , 7778 , 104, 71 ), // #396
476 INST(Maxpd , ExtRm , O(660F00,5F,_,_,_,_,_,_ ), 0 , 3 , 0 , 5841 , 5 , 4 ), // #397
477 INST(Maxps , ExtRm , O(000F00,5F,_,_,_,_,_,_ ), 0 , 4 , 0 , 5848 , 5 , 5 ), // #398
478 INST(Maxsd , ExtRm , O(F20F00,5F,_,_,_,_,_,_ ), 0 , 5 , 0 , 7797 , 6 , 4 ), // #399
479 INST(Maxss , ExtRm , O(F30F00,5F,_,_,_,_,_,_ ), 0 , 6 , 0 , 5862 , 7 , 5 ), // #400
480 INST(Mfence , X86Fence , O(000F00,AE,6,_,_,_,_,_ ), 0 , 73 , 0 , 1874 , 30 , 4 ), // #401
481 INST(Minpd , ExtRm , O(660F00,5D,_,_,_,_,_,_ ), 0 , 3 , 0 , 5891 , 5 , 4 ), // #402
482 INST(Minps , ExtRm , O(000F00,5D,_,_,_,_,_,_ ), 0 , 4 , 0 , 5898 , 5 , 5 ), // #403
483 INST(Minsd , ExtRm , O(F20F00,5D,_,_,_,_,_,_ ), 0 , 5 , 0 , 7861 , 6 , 4 ), // #404
484 INST(Minss , ExtRm , O(F30F00,5D,_,_,_,_,_,_ ), 0 , 6 , 0 , 5912 , 7 , 5 ), // #405
485 INST(Monitor , X86Op , O(000F01,C8,_,_,_,_,_,_ ), 0 , 21 , 0 , 1881 , 105, 72 ), // #406
486 INST(Monitorx , X86Op , O(000F01,FA,_,_,_,_,_,_ ), 0 , 21 , 0 , 1889 , 105, 73 ), // #407
487 INST(Mov , X86Mov , 0 , 0 , 0 , 0 , 138 , 106, 0 ), // #408
488 INST(Movapd , ExtMov , O(660F00,28,_,_,_,_,_,_ ), O(660F00,29,_,_,_,_,_,_ ), 3 , 43 , 5943 , 107, 4 ), // #409
489 INST(Movaps , ExtMov , O(000F00,28,_,_,_,_,_,_ ), O(000F00,29,_,_,_,_,_,_ ), 4 , 44 , 5951 , 107, 5 ), // #410
490 INST(Movbe , ExtMovbe , O(000F38,F0,_,_,x,_,_,_ ), O(000F38,F1,_,_,x,_,_,_ ), 76 , 45 , 626 , 108, 74 ), // #411
491 INST(Movd , ExtMovd , O(000F00,6E,_,_,_,_,_,_ ), O(000F00,7E,_,_,_,_,_,_ ), 4 , 46 , 7771 , 109, 75 ), // #412
492 INST(Movddup , ExtMov , O(F20F00,12,_,_,_,_,_,_ ), 0 , 5 , 0 , 5965 , 6 , 6 ), // #413
493 INST(Movdir64b , X86EnqcmdMovdir64b , O(660F38,F8,_,_,_,_,_,_ ), 0 , 2 , 0 , 1898 , 110, 76 ), // #414
494 INST(Movdiri , X86MovntiMovdiri , O(000F38,F9,_,_,_,_,_,_ ), 0 , 76 , 0 , 1908 , 111, 77 ), // #415
495 INST(Movdq2q , ExtMov , O(F20F00,D6,_,_,_,_,_,_ ), 0 , 5 , 0 , 1916 , 112, 4 ), // #416
496 INST(Movdqa , ExtMov , O(660F00,6F,_,_,_,_,_,_ ), O(660F00,7F,_,_,_,_,_,_ ), 3 , 47 , 5974 , 107, 4 ), // #417
497 INST(Movdqu , ExtMov , O(F30F00,6F,_,_,_,_,_,_ ), O(F30F00,7F,_,_,_,_,_,_ ), 6 , 48 , 5811 , 107, 4 ), // #418
498 INST(Movhlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), 0 , 4 , 0 , 6049 , 113, 5 ), // #419
499 INST(Movhpd , ExtMov , O(660F00,16,_,_,_,_,_,_ ), O(660F00,17,_,_,_,_,_,_ ), 3 , 49 , 6058 , 114, 4 ), // #420
500 INST(Movhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), O(000F00,17,_,_,_,_,_,_ ), 4 , 50 , 6066 , 114, 5 ), // #421
501 INST(Movlhps , ExtMov , O(000F00,16,_,_,_,_,_,_ ), 0 , 4 , 0 , 6074 , 113, 5 ), // #422
502 INST(Movlpd , ExtMov , O(660F00,12,_,_,_,_,_,_ ), O(660F00,13,_,_,_,_,_,_ ), 3 , 51 , 6083 , 114, 4 ), // #423
503 INST(Movlps , ExtMov , O(000F00,12,_,_,_,_,_,_ ), O(000F00,13,_,_,_,_,_,_ ), 4 , 52 , 6091 , 114, 5 ), // #424
504 INST(Movmskpd , ExtMov , O(660F00,50,_,_,_,_,_,_ ), 0 , 3 , 0 , 6099 , 115, 4 ), // #425
505 INST(Movmskps , ExtMov , O(000F00,50,_,_,_,_,_,_ ), 0 , 4 , 0 , 6109 , 115, 5 ), // #426
506 INST(Movntdq , ExtMov , 0 , O(660F00,E7,_,_,_,_,_,_ ), 0 , 53 , 6119 , 116, 4 ), // #427
507 INST(Movntdqa , ExtMov , O(660F38,2A,_,_,_,_,_,_ ), 0 , 2 , 0 , 6128 , 92 , 12 ), // #428
508 INST(Movnti , X86MovntiMovdiri , O(000F00,C3,_,_,x,_,_,_ ), 0 , 4 , 0 , 1924 , 111, 4 ), // #429
509 INST(Movntpd , ExtMov , 0 , O(660F00,2B,_,_,_,_,_,_ ), 0 , 54 , 6138 , 116, 4 ), // #430
510 INST(Movntps , ExtMov , 0 , O(000F00,2B,_,_,_,_,_,_ ), 0 , 55 , 6147 , 116, 5 ), // #431
511 INST(Movntq , ExtMov , 0 , O(000F00,E7,_,_,_,_,_,_ ), 0 , 56 , 1931 , 117, 71 ), // #432
512 INST(Movntsd , ExtMov , 0 , O(F20F00,2B,_,_,_,_,_,_ ), 0 , 57 , 1938 , 118, 46 ), // #433
513 INST(Movntss , ExtMov , 0 , O(F30F00,2B,_,_,_,_,_,_ ), 0 , 58 , 1946 , 119, 46 ), // #434
514 INST(Movq , ExtMovq , O(000F00,6E,_,_,x,_,_,_ ), O(000F00,7E,_,_,x,_,_,_ ), 4 , 59 , 7782 , 120, 75 ), // #435
515 INST(Movq2dq , ExtRm , O(F30F00,D6,_,_,_,_,_,_ ), 0 , 6 , 0 , 1954 , 121, 4 ), // #436
516 INST(Movs , X86StrMm , O(000000,A4,_,_,_,_,_,_ ), 0 , 0 , 0 , 425 , 122, 69 ), // #437
517 INST(Movsd , ExtMov , O(F20F00,10,_,_,_,_,_,_ ), O(F20F00,11,_,_,_,_,_,_ ), 5 , 60 , 6162 , 123, 4 ), // #438
518 INST(Movshdup , ExtRm , O(F30F00,16,_,_,_,_,_,_ ), 0 , 6 , 0 , 6169 , 5 , 6 ), // #439
519 INST(Movsldup , ExtRm , O(F30F00,12,_,_,_,_,_,_ ), 0 , 6 , 0 , 6179 , 5 , 6 ), // #440
520 INST(Movss , ExtMov , O(F30F00,10,_,_,_,_,_,_ ), O(F30F00,11,_,_,_,_,_,_ ), 6 , 61 , 6189 , 124, 5 ), // #441
521 INST(Movsx , X86MovsxMovzx , O(000F00,BE,_,_,x,_,_,_ ), 0 , 4 , 0 , 1962 , 125, 0 ), // #442
522 INST(Movsxd , X86Rm , O(000000,63,_,_,1,_,_,_ ), 0 , 20 , 0 , 1968 , 126, 0 ), // #443
523 INST(Movupd , ExtMov , O(660F00,10,_,_,_,_,_,_ ), O(660F00,11,_,_,_,_,_,_ ), 3 , 62 , 6196 , 107, 4 ), // #444
524 INST(Movups , ExtMov , O(000F00,10,_,_,_,_,_,_ ), O(000F00,11,_,_,_,_,_,_ ), 4 , 63 , 6204 , 107, 5 ), // #445
525 INST(Movzx , X86MovsxMovzx , O(000F00,B6,_,_,x,_,_,_ ), 0 , 4 , 0 , 1975 , 125, 0 ), // #446
526 INST(Mpsadbw , ExtRmi , O(660F3A,42,_,_,_,_,_,_ ), 0 , 8 , 0 , 6212 , 8 , 12 ), // #447
527 INST(Mul , X86M_GPB_MulDiv , O(000000,F6,4,_,x,_,_,_ ), 0 , 9 , 0 , 798 , 52 , 1 ), // #448
528 INST(Mulpd , ExtRm , O(660F00,59,_,_,_,_,_,_ ), 0 , 3 , 0 , 6266 , 5 , 4 ), // #449
529 INST(Mulps , ExtRm , O(000F00,59,_,_,_,_,_,_ ), 0 , 4 , 0 , 6273 , 5 , 5 ), // #450
530 INST(Mulsd , ExtRm , O(F20F00,59,_,_,_,_,_,_ ), 0 , 5 , 0 , 6280 , 6 , 4 ), // #451
531 INST(Mulss , ExtRm , O(F30F00,59,_,_,_,_,_,_ ), 0 , 6 , 0 , 6287 , 7 , 5 ), // #452
532 INST(Mulx , VexRvm_ZDX_Wx , V(F20F38,F6,_,0,x,_,_,_ ), 0 , 77 , 0 , 1981 , 127, 78 ), // #453
533 INST(Mwait , X86Op , O(000F01,C9,_,_,_,_,_,_ ), 0 , 21 , 0 , 1986 , 128, 72 ), // #454
534 INST(Mwaitx , X86Op , O(000F01,FB,_,_,_,_,_,_ ), 0 , 21 , 0 , 1992 , 129, 73 ), // #455
535 INST(Neg , X86M_GPB , O(000000,F6,3,_,x,_,_,_ ), 0 , 78 , 0 , 1999 , 130, 1 ), // #456
536 INST(Nop , X86M_Nop , O(000000,90,_,_,_,_,_,_ ), 0 , 0 , 0 , 929 , 131, 0 ), // #457
537 INST(Not , X86M_GPB , O(000000,F6,2,_,x,_,_,_ ), 0 , 1 , 0 , 2003 , 130, 0 ), // #458
538 INST(Or , X86Arith , O(000000,08,1,_,x,_,_,_ ), 0 , 29 , 0 , 1138 , 132, 1 ), // #459
539 INST(Orpd , ExtRm , O(660F00,56,_,_,_,_,_,_ ), 0 , 3 , 0 , 9988 , 11 , 4 ), // #460
540 INST(Orps , ExtRm , O(000F00,56,_,_,_,_,_,_ ), 0 , 4 , 0 , 9995 , 11 , 5 ), // #461
541 INST(Out , X86Out , O(000000,EE,_,_,_,_,_,_ ), O(000000,E6,_,_,_,_,_,_ ), 0 , 64 , 2007 , 133, 0 ), // #462
542 INST(Outs , X86Outs , O(000000,6E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2011 , 134, 0 ), // #463
543 INST(Pabsb , ExtRm_P , O(000F38,1C,_,_,_,_,_,_ ), 0 , 76 , 0 , 6341 , 135, 79 ), // #464
544 INST(Pabsd , ExtRm_P , O(000F38,1E,_,_,_,_,_,_ ), 0 , 76 , 0 , 6348 , 135, 79 ), // #465
545 INST(Pabsw , ExtRm_P , O(000F38,1D,_,_,_,_,_,_ ), 0 , 76 , 0 , 6362 , 135, 79 ), // #466
546 INST(Packssdw , ExtRm_P , O(000F00,6B,_,_,_,_,_,_ ), 0 , 4 , 0 , 6369 , 135, 75 ), // #467
547 INST(Packsswb , ExtRm_P , O(000F00,63,_,_,_,_,_,_ ), 0 , 4 , 0 , 6379 , 135, 75 ), // #468
548 INST(Packusdw , ExtRm , O(660F38,2B,_,_,_,_,_,_ ), 0 , 2 , 0 , 6389 , 5 , 12 ), // #469
549 INST(Packuswb , ExtRm_P , O(000F00,67,_,_,_,_,_,_ ), 0 , 4 , 0 , 6399 , 135, 75 ), // #470
550 INST(Paddb , ExtRm_P , O(000F00,FC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6409 , 135, 75 ), // #471
551 INST(Paddd , ExtRm_P , O(000F00,FE,_,_,_,_,_,_ ), 0 , 4 , 0 , 6416 , 135, 75 ), // #472
552 INST(Paddq , ExtRm_P , O(000F00,D4,_,_,_,_,_,_ ), 0 , 4 , 0 , 6423 , 135, 4 ), // #473
553 INST(Paddsb , ExtRm_P , O(000F00,EC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6430 , 135, 75 ), // #474
554 INST(Paddsw , ExtRm_P , O(000F00,ED,_,_,_,_,_,_ ), 0 , 4 , 0 , 6438 , 135, 75 ), // #475
555 INST(Paddusb , ExtRm_P , O(000F00,DC,_,_,_,_,_,_ ), 0 , 4 , 0 , 6446 , 135, 75 ), // #476
556 INST(Paddusw , ExtRm_P , O(000F00,DD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6455 , 135, 75 ), // #477
557 INST(Paddw , ExtRm_P , O(000F00,FD,_,_,_,_,_,_ ), 0 , 4 , 0 , 6464 , 135, 75 ), // #478
558 INST(Palignr , ExtRmi_P , O(000F3A,0F,_,_,_,_,_,_ ), 0 , 79 , 0 , 6471 , 136, 6 ), // #479
559 INST(Pand , ExtRm_P , O(000F00,DB,_,_,_,_,_,_ ), 0 , 4 , 0 , 6480 , 137, 75 ), // #480
560 INST(Pandn , ExtRm_P , O(000F00,DF,_,_,_,_,_,_ ), 0 , 4 , 0 , 6493 , 138, 75 ), // #481
561 INST(Pause , X86Op , O(F30000,90,_,_,_,_,_,_ ), 0 , 80 , 0 , 2016 , 30 , 0 ), // #482
562 INST(Pavgb , ExtRm_P , O(000F00,E0,_,_,_,_,_,_ ), 0 , 4 , 0 , 6523 , 135, 80 ), // #483
563 INST(Pavgusb , Ext3dNow , O(000F0F,BF,_,_,_,_,_,_ ), 0 , 81 , 0 , 2022 , 139, 48 ), // #484
564 INST(Pavgw , ExtRm_P , O(000F00,E3,_,_,_,_,_,_ ), 0 , 4 , 0 , 6530 , 135, 80 ), // #485
565 INST(Pblendvb , ExtRm_XMM0 , O(660F38,10,_,_,_,_,_,_ ), 0 , 2 , 0 , 6546 , 15 , 12 ), // #486
566 INST(Pblendw , ExtRmi , O(660F3A,0E,_,_,_,_,_,_ ), 0 , 8 , 0 , 6556 , 8 , 12 ), // #487
567 INST(Pclmulqdq , ExtRmi , O(660F3A,44,_,_,_,_,_,_ ), 0 , 8 , 0 , 6649 , 8 , 81 ), // #488
568 INST(Pcmpeqb , ExtRm_P , O(000F00,74,_,_,_,_,_,_ ), 0 , 4 , 0 , 6681 , 138, 75 ), // #489
569 INST(Pcmpeqd , ExtRm_P , O(000F00,76,_,_,_,_,_,_ ), 0 , 4 , 0 , 6690 , 138, 75 ), // #490
570 INST(Pcmpeqq , ExtRm , O(660F38,29,_,_,_,_,_,_ ), 0 , 2 , 0 , 6699 , 140, 12 ), // #491
571 INST(Pcmpeqw , ExtRm_P , O(000F00,75,_,_,_,_,_,_ ), 0 , 4 , 0 , 6708 , 138, 75 ), // #492
572 INST(Pcmpestri , ExtRmi , O(660F3A,61,_,_,_,_,_,_ ), 0 , 8 , 0 , 6717 , 141, 82 ), // #493
573 INST(Pcmpestrm , ExtRmi , O(660F3A,60,_,_,_,_,_,_ ), 0 , 8 , 0 , 6728 , 142, 82 ), // #494
574 INST(Pcmpgtb , ExtRm_P , O(000F00,64,_,_,_,_,_,_ ), 0 , 4 , 0 , 6739 , 138, 75 ), // #495
575 INST(Pcmpgtd , ExtRm_P , O(000F00,66,_,_,_,_,_,_ ), 0 , 4 , 0 , 6748 , 138, 75 ), // #496
576 INST(Pcmpgtq , ExtRm , O(660F38,37,_,_,_,_,_,_ ), 0 , 2 , 0 , 6757 , 140, 42 ), // #497
577 INST(Pcmpgtw , ExtRm_P , O(000F00,65,_,_,_,_,_,_ ), 0 , 4 , 0 , 6766 , 138, 75 ), // #498
578 INST(Pcmpistri , ExtRmi , O(660F3A,63,_,_,_,_,_,_ ), 0 , 8 , 0 , 6775 , 143, 82 ), // #499
579 INST(Pcmpistrm , ExtRmi , O(660F3A,62,_,_,_,_,_,_ ), 0 , 8 , 0 , 6786 , 144, 82 ), // #500
580 INST(Pcommit , X86Op_O , O(660F00,AE,7,_,_,_,_,_ ), 0 , 23 , 0 , 2030 , 30 , 83 ), // #501
581 INST(Pdep , VexRvm_Wx , V(F20F38,F5,_,0,x,_,_,_ ), 0 , 77 , 0 , 2038 , 10 , 78 ), // #502
582 INST(Pext , VexRvm_Wx , V(F30F38,F5,_,0,x,_,_,_ ), 0 , 82 , 0 , 2043 , 10 , 78 ), // #503
583 INST(Pextrb , ExtExtract , O(000F3A,14,_,_,_,_,_,_ ), 0 , 79 , 0 , 7273 , 145, 12 ), // #504
584 INST(Pextrd , ExtExtract , O(000F3A,16,_,_,_,_,_,_ ), 0 , 79 , 0 , 7281 , 56 , 12 ), // #505
585 INST(Pextrq , ExtExtract , O(000F3A,16,_,_,1,_,_,_ ), 0 , 83 , 0 , 7289 , 146, 12 ), // #506
586 INST(Pextrw , ExtPextrw , O(000F00,C5,_,_,_,_,_,_ ), O(000F3A,15,_,_,_,_,_,_ ), 4 , 65 , 7297 , 147, 84 ), // #507
587 INST(Pf2id , Ext3dNow , O(000F0F,1D,_,_,_,_,_,_ ), 0 , 81 , 0 , 2048 , 139, 48 ), // #508
588 INST(Pf2iw , Ext3dNow , O(000F0F,1C,_,_,_,_,_,_ ), 0 , 81 , 0 , 2054 , 139, 85 ), // #509
589 INST(Pfacc , Ext3dNow , O(000F0F,AE,_,_,_,_,_,_ ), 0 , 81 , 0 , 2060 , 139, 48 ), // #510
590 INST(Pfadd , Ext3dNow , O(000F0F,9E,_,_,_,_,_,_ ), 0 , 81 , 0 , 2066 , 139, 48 ), // #511
591 INST(Pfcmpeq , Ext3dNow , O(000F0F,B0,_,_,_,_,_,_ ), 0 , 81 , 0 , 2072 , 139, 48 ), // #512
592 INST(Pfcmpge , Ext3dNow , O(000F0F,90,_,_,_,_,_,_ ), 0 , 81 , 0 , 2080 , 139, 48 ), // #513
593 INST(Pfcmpgt , Ext3dNow , O(000F0F,A0,_,_,_,_,_,_ ), 0 , 81 , 0 , 2088 , 139, 48 ), // #514
594 INST(Pfmax , Ext3dNow , O(000F0F,A4,_,_,_,_,_,_ ), 0 , 81 , 0 , 2096 , 139, 48 ), // #515
595 INST(Pfmin , Ext3dNow , O(000F0F,94,_,_,_,_,_,_ ), 0 , 81 , 0 , 2102 , 139, 48 ), // #516
596 INST(Pfmul , Ext3dNow , O(000F0F,B4,_,_,_,_,_,_ ), 0 , 81 , 0 , 2108 , 139, 48 ), // #517
597 INST(Pfnacc , Ext3dNow , O(000F0F,8A,_,_,_,_,_,_ ), 0 , 81 , 0 , 2114 , 139, 85 ), // #518
598 INST(Pfpnacc , Ext3dNow , O(000F0F,8E,_,_,_,_,_,_ ), 0 , 81 , 0 , 2121 , 139, 85 ), // #519
599 INST(Pfrcp , Ext3dNow , O(000F0F,96,_,_,_,_,_,_ ), 0 , 81 , 0 , 2129 , 139, 48 ), // #520
600 INST(Pfrcpit1 , Ext3dNow , O(000F0F,A6,_,_,_,_,_,_ ), 0 , 81 , 0 , 2135 , 139, 48 ), // #521
601 INST(Pfrcpit2 , Ext3dNow , O(000F0F,B6,_,_,_,_,_,_ ), 0 , 81 , 0 , 2144 , 139, 48 ), // #522
602 INST(Pfrcpv , Ext3dNow , O(000F0F,86,_,_,_,_,_,_ ), 0 , 81 , 0 , 2153 , 139, 86 ), // #523
603 INST(Pfrsqit1 , Ext3dNow , O(000F0F,A7,_,_,_,_,_,_ ), 0 , 81 , 0 , 2160 , 139, 48 ), // #524
604 INST(Pfrsqrt , Ext3dNow , O(000F0F,97,_,_,_,_,_,_ ), 0 , 81 , 0 , 2169 , 139, 48 ), // #525
605 INST(Pfrsqrtv , Ext3dNow , O(000F0F,87,_,_,_,_,_,_ ), 0 , 81 , 0 , 2177 , 139, 86 ), // #526
606 INST(Pfsub , Ext3dNow , O(000F0F,9A,_,_,_,_,_,_ ), 0 , 81 , 0 , 2186 , 139, 48 ), // #527
607 INST(Pfsubr , Ext3dNow , O(000F0F,AA,_,_,_,_,_,_ ), 0 , 81 , 0 , 2192 , 139, 48 ), // #528
608 INST(Phaddd , ExtRm_P , O(000F38,02,_,_,_,_,_,_ ), 0 , 76 , 0 , 7376 , 135, 79 ), // #529
609 INST(Phaddsw , ExtRm_P , O(000F38,03,_,_,_,_,_,_ ), 0 , 76 , 0 , 7393 , 135, 79 ), // #530
610 INST(Phaddw , ExtRm_P , O(000F38,01,_,_,_,_,_,_ ), 0 , 76 , 0 , 7462 , 135, 79 ), // #531
611 INST(Phminposuw , ExtRm , O(660F38,41,_,_,_,_,_,_ ), 0 , 2 , 0 , 7488 , 5 , 12 ), // #532
612 INST(Phsubd , ExtRm_P , O(000F38,06,_,_,_,_,_,_ ), 0 , 76 , 0 , 7509 , 135, 79 ), // #533
613 INST(Phsubsw , ExtRm_P , O(000F38,07,_,_,_,_,_,_ ), 0 , 76 , 0 , 7526 , 135, 79 ), // #534
614 INST(Phsubw , ExtRm_P , O(000F38,05,_,_,_,_,_,_ ), 0 , 76 , 0 , 7535 , 135, 79 ), // #535
615 INST(Pi2fd , Ext3dNow , O(000F0F,0D,_,_,_,_,_,_ ), 0 , 81 , 0 , 2199 , 139, 48 ), // #536
616 INST(Pi2fw , Ext3dNow , O(000F0F,0C,_,_,_,_,_,_ ), 0 , 81 , 0 , 2205 , 139, 85 ), // #537
617 INST(Pinsrb , ExtRmi , O(660F3A,20,_,_,_,_,_,_ ), 0 , 8 , 0 , 7552 , 148, 12 ), // #538
618 INST(Pinsrd , ExtRmi , O(660F3A,22,_,_,_,_,_,_ ), 0 , 8 , 0 , 7560 , 149, 12 ), // #539
619 INST(Pinsrq , ExtRmi , O(660F3A,22,_,_,1,_,_,_ ), 0 , 84 , 0 , 7568 , 150, 12 ), // #540
620 INST(Pinsrw , ExtRmi_P , O(000F00,C4,_,_,_,_,_,_ ), 0 , 4 , 0 , 7576 , 151, 80 ), // #541
621 INST(Pmaddubsw , ExtRm_P , O(000F38,04,_,_,_,_,_,_ ), 0 , 76 , 0 , 7746 , 135, 79 ), // #542
622 INST(Pmaddwd , ExtRm_P , O(000F00,F5,_,_,_,_,_,_ ), 0 , 4 , 0 , 7757 , 135, 75 ), // #543
623 INST(Pmaxsb , ExtRm , O(660F38,3C,_,_,_,_,_,_ ), 0 , 2 , 0 , 7788 , 11 , 12 ), // #544
624 INST(Pmaxsd , ExtRm , O(660F38,3D,_,_,_,_,_,_ ), 0 , 2 , 0 , 7796 , 11 , 12 ), // #545
625 INST(Pmaxsw , ExtRm_P , O(000F00,EE,_,_,_,_,_,_ ), 0 , 4 , 0 , 7812 , 137, 80 ), // #546
626 INST(Pmaxub , ExtRm_P , O(000F00,DE,_,_,_,_,_,_ ), 0 , 4 , 0 , 7820 , 137, 80 ), // #547
627 INST(Pmaxud , ExtRm , O(660F38,3F,_,_,_,_,_,_ ), 0 , 2 , 0 , 7828 , 11 , 12 ), // #548
628 INST(Pmaxuw , ExtRm , O(660F38,3E,_,_,_,_,_,_ ), 0 , 2 , 0 , 7844 , 11 , 12 ), // #549
629 INST(Pminsb , ExtRm , O(660F38,38,_,_,_,_,_,_ ), 0 , 2 , 0 , 7852 , 11 , 12 ), // #550
630 INST(Pminsd , ExtRm , O(660F38,39,_,_,_,_,_,_ ), 0 , 2 , 0 , 7860 , 11 , 12 ), // #551
631 INST(Pminsw , ExtRm_P , O(000F00,EA,_,_,_,_,_,_ ), 0 , 4 , 0 , 7876 , 137, 80 ), // #552
632 INST(Pminub , ExtRm_P , O(000F00,DA,_,_,_,_,_,_ ), 0 , 4 , 0 , 7884 , 137, 80 ), // #553
633 INST(Pminud , ExtRm , O(660F38,3B,_,_,_,_,_,_ ), 0 , 2 , 0 , 7892 , 11 , 12 ), // #554
634 INST(Pminuw , ExtRm , O(660F38,3A,_,_,_,_,_,_ ), 0 , 2 , 0 , 7908 , 11 , 12 ), // #555
635 INST(Pmovmskb , ExtRm_P , O(000F00,D7,_,_,_,_,_,_ ), 0 , 4 , 0 , 7986 , 152, 80 ), // #556
636 INST(Pmovsxbd , ExtRm , O(660F38,21,_,_,_,_,_,_ ), 0 , 2 , 0 , 8083 , 7 , 12 ), // #557
637 INST(Pmovsxbq , ExtRm , O(660F38,22,_,_,_,_,_,_ ), 0 , 2 , 0 , 8093 , 153, 12 ), // #558
638 INST(Pmovsxbw , ExtRm , O(660F38,20,_,_,_,_,_,_ ), 0 , 2 , 0 , 8103 , 6 , 12 ), // #559
639 INST(Pmovsxdq , ExtRm , O(660F38,25,_,_,_,_,_,_ ), 0 , 2 , 0 , 8113 , 6 , 12 ), // #560
640 INST(Pmovsxwd , ExtRm , O(660F38,23,_,_,_,_,_,_ ), 0 , 2 , 0 , 8123 , 6 , 12 ), // #561
641 INST(Pmovsxwq , ExtRm , O(660F38,24,_,_,_,_,_,_ ), 0 , 2 , 0 , 8133 , 7 , 12 ), // #562
642 INST(Pmovzxbd , ExtRm , O(660F38,31,_,_,_,_,_,_ ), 0 , 2 , 0 , 8220 , 7 , 12 ), // #563
643 INST(Pmovzxbq , ExtRm , O(660F38,32,_,_,_,_,_,_ ), 0 , 2 , 0 , 8230 , 153, 12 ), // #564
644 INST(Pmovzxbw , ExtRm , O(660F38,30,_,_,_,_,_,_ ), 0 , 2 , 0 , 8240 , 6 , 12 ), // #565
645 INST(Pmovzxdq , ExtRm , O(660F38,35,_,_,_,_,_,_ ), 0 , 2 , 0 , 8250 , 6 , 12 ), // #566
646 INST(Pmovzxwd , ExtRm , O(660F38,33,_,_,_,_,_,_ ), 0 , 2 , 0 , 8260 , 6 , 12 ), // #567
647 INST(Pmovzxwq , ExtRm , O(660F38,34,_,_,_,_,_,_ ), 0 , 2 , 0 , 8270 , 7 , 12 ), // #568
648 INST(Pmuldq , ExtRm , O(660F38,28,_,_,_,_,_,_ ), 0 , 2 , 0 , 8280 , 5 , 12 ), // #569
649 INST(Pmulhrsw , ExtRm_P , O(000F38,0B,_,_,_,_,_,_ ), 0 , 76 , 0 , 8288 , 135, 79 ), // #570
650 INST(Pmulhrw , Ext3dNow , O(000F0F,B7,_,_,_,_,_,_ ), 0 , 81 , 0 , 2211 , 139, 48 ), // #571
651 INST(Pmulhuw , ExtRm_P , O(000F00,E4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8298 , 135, 80 ), // #572
652 INST(Pmulhw , ExtRm_P , O(000F00,E5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8307 , 135, 75 ), // #573
653 INST(Pmulld , ExtRm , O(660F38,40,_,_,_,_,_,_ ), 0 , 2 , 0 , 8315 , 5 , 12 ), // #574
654 INST(Pmullw , ExtRm_P , O(000F00,D5,_,_,_,_,_,_ ), 0 , 4 , 0 , 8331 , 135, 75 ), // #575
655 INST(Pmuludq , ExtRm_P , O(000F00,F4,_,_,_,_,_,_ ), 0 , 4 , 0 , 8354 , 135, 4 ), // #576
656 INST(Pop , X86Pop , O(000000,8F,0,_,_,_,_,_ ), O(000000,58,_,_,_,_,_,_ ), 0 , 66 , 2219 , 154, 0 ), // #577
657 INST(Popa , X86Op , O(660000,61,_,_,_,_,_,_ ), 0 , 19 , 0 , 2223 , 75 , 0 ), // #578
658 INST(Popad , X86Op , O(000000,61,_,_,_,_,_,_ ), 0 , 0 , 0 , 2228 , 75 , 0 ), // #579
659 INST(Popcnt , X86Rm_Raw66H , O(F30F00,B8,_,_,x,_,_,_ ), 0 , 6 , 0 , 2234 , 22 , 87 ), // #580
660 INST(Popf , X86Op , O(660000,9D,_,_,_,_,_,_ ), 0 , 19 , 0 , 2241 , 30 , 88 ), // #581
661 INST(Popfd , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2246 , 75 , 88 ), // #582
662 INST(Popfq , X86Op , O(000000,9D,_,_,_,_,_,_ ), 0 , 0 , 0 , 2252 , 155, 88 ), // #583
663 INST(Por , ExtRm_P , O(000F00,EB,_,_,_,_,_,_ ), 0 , 4 , 0 , 8399 , 137, 75 ), // #584
664 INST(Prefetch , X86M_Only , O(000F00,0D,0,_,_,_,_,_ ), 0 , 4 , 0 , 2258 , 31 , 48 ), // #585
665 INST(Prefetchnta , X86M_Only , O(000F00,18,0,_,_,_,_,_ ), 0 , 4 , 0 , 2267 , 31 , 71 ), // #586
666 INST(Prefetcht0 , X86M_Only , O(000F00,18,1,_,_,_,_,_ ), 0 , 27 , 0 , 2279 , 31 , 71 ), // #587
667 INST(Prefetcht1 , X86M_Only , O(000F00,18,2,_,_,_,_,_ ), 0 , 69 , 0 , 2290 , 31 , 71 ), // #588
668 INST(Prefetcht2 , X86M_Only , O(000F00,18,3,_,_,_,_,_ ), 0 , 71 , 0 , 2301 , 31 , 71 ), // #589
669 INST(Prefetchw , X86M_Only , O(000F00,0D,1,_,_,_,_,_ ), 0 , 27 , 0 , 2312 , 31 , 89 ), // #590
670 INST(Prefetchwt1 , X86M_Only , O(000F00,0D,2,_,_,_,_,_ ), 0 , 69 , 0 , 2322 , 31 , 90 ), // #591
671 INST(Psadbw , ExtRm_P , O(000F00,F6,_,_,_,_,_,_ ), 0 , 4 , 0 , 3980 , 135, 80 ), // #592
672 INST(Pshufb , ExtRm_P , O(000F38,00,_,_,_,_,_,_ ), 0 , 76 , 0 , 8725 , 135, 79 ), // #593
673 INST(Pshufd , ExtRmi , O(660F00,70,_,_,_,_,_,_ ), 0 , 3 , 0 , 8746 , 8 , 4 ), // #594
674 INST(Pshufhw , ExtRmi , O(F30F00,70,_,_,_,_,_,_ ), 0 , 6 , 0 , 8754 , 8 , 4 ), // #595
675 INST(Pshuflw , ExtRmi , O(F20F00,70,_,_,_,_,_,_ ), 0 , 5 , 0 , 8763 , 8 , 4 ), // #596
676 INST(Pshufw , ExtRmi_P , O(000F00,70,_,_,_,_,_,_ ), 0 , 4 , 0 , 2334 , 156, 71 ), // #597
677 INST(Psignb , ExtRm_P , O(000F38,08,_,_,_,_,_,_ ), 0 , 76 , 0 , 8772 , 135, 79 ), // #598
678 INST(Psignd , ExtRm_P , O(000F38,0A,_,_,_,_,_,_ ), 0 , 76 , 0 , 8780 , 135, 79 ), // #599
679 INST(Psignw , ExtRm_P , O(000F38,09,_,_,_,_,_,_ ), 0 , 76 , 0 , 8788 , 135, 79 ), // #600
680 INST(Pslld , ExtRmRi_P , O(000F00,F2,_,_,_,_,_,_ ), O(000F00,72,6,_,_,_,_,_ ), 4 , 67 , 8796 , 157, 75 ), // #601
681 INST(Pslldq , ExtRmRi , 0 , O(660F00,73,7,_,_,_,_,_ ), 0 , 68 , 8803 , 158, 4 ), // #602
682 INST(Psllq , ExtRmRi_P , O(000F00,F3,_,_,_,_,_,_ ), O(000F00,73,6,_,_,_,_,_ ), 4 , 69 , 8811 , 157, 75 ), // #603
683 INST(Psllw , ExtRmRi_P , O(000F00,F1,_,_,_,_,_,_ ), O(000F00,71,6,_,_,_,_,_ ), 4 , 70 , 8842 , 157, 75 ), // #604
684 INST(Psrad , ExtRmRi_P , O(000F00,E2,_,_,_,_,_,_ ), O(000F00,72,4,_,_,_,_,_ ), 4 , 71 , 8849 , 157, 75 ), // #605
685 INST(Psraw , ExtRmRi_P , O(000F00,E1,_,_,_,_,_,_ ), O(000F00,71,4,_,_,_,_,_ ), 4 , 72 , 8887 , 157, 75 ), // #606
686 INST(Psrld , ExtRmRi_P , O(000F00,D2,_,_,_,_,_,_ ), O(000F00,72,2,_,_,_,_,_ ), 4 , 73 , 8894 , 157, 75 ), // #607
687 INST(Psrldq , ExtRmRi , 0 , O(660F00,73,3,_,_,_,_,_ ), 0 , 74 , 8901 , 158, 4 ), // #608
688 INST(Psrlq , ExtRmRi_P , O(000F00,D3,_,_,_,_,_,_ ), O(000F00,73,2,_,_,_,_,_ ), 4 , 75 , 8909 , 157, 75 ), // #609
689 INST(Psrlw , ExtRmRi_P , O(000F00,D1,_,_,_,_,_,_ ), O(000F00,71,2,_,_,_,_,_ ), 4 , 76 , 8940 , 157, 75 ), // #610
690 INST(Psubb , ExtRm_P , O(000F00,F8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8947 , 138, 75 ), // #611
691 INST(Psubd , ExtRm_P , O(000F00,FA,_,_,_,_,_,_ ), 0 , 4 , 0 , 8954 , 138, 75 ), // #612
692 INST(Psubq , ExtRm_P , O(000F00,FB,_,_,_,_,_,_ ), 0 , 4 , 0 , 8961 , 138, 4 ), // #613
693 INST(Psubsb , ExtRm_P , O(000F00,E8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8968 , 138, 75 ), // #614
694 INST(Psubsw , ExtRm_P , O(000F00,E9,_,_,_,_,_,_ ), 0 , 4 , 0 , 8976 , 138, 75 ), // #615
695 INST(Psubusb , ExtRm_P , O(000F00,D8,_,_,_,_,_,_ ), 0 , 4 , 0 , 8984 , 138, 75 ), // #616
696 INST(Psubusw , ExtRm_P , O(000F00,D9,_,_,_,_,_,_ ), 0 , 4 , 0 , 8993 , 138, 75 ), // #617
697 INST(Psubw , ExtRm_P , O(000F00,F9,_,_,_,_,_,_ ), 0 , 4 , 0 , 9002 , 138, 75 ), // #618
698 INST(Pswapd , Ext3dNow , O(000F0F,BB,_,_,_,_,_,_ ), 0 , 81 , 0 , 2341 , 139, 85 ), // #619
699 INST(Ptest , ExtRm , O(660F38,17,_,_,_,_,_,_ ), 0 , 2 , 0 , 9031 , 5 , 91 ), // #620
700 INST(Punpckhbw , ExtRm_P , O(000F00,68,_,_,_,_,_,_ ), 0 , 4 , 0 , 9114 , 135, 75 ), // #621
701 INST(Punpckhdq , ExtRm_P , O(000F00,6A,_,_,_,_,_,_ ), 0 , 4 , 0 , 9125 , 135, 75 ), // #622
702 INST(Punpckhqdq , ExtRm , O(660F00,6D,_,_,_,_,_,_ ), 0 , 3 , 0 , 9136 , 5 , 4 ), // #623
703 INST(Punpckhwd , ExtRm_P , O(000F00,69,_,_,_,_,_,_ ), 0 , 4 , 0 , 9148 , 135, 75 ), // #624
704 INST(Punpcklbw , ExtRm_P , O(000F00,60,_,_,_,_,_,_ ), 0 , 4 , 0 , 9159 , 135, 75 ), // #625
705 INST(Punpckldq , ExtRm_P , O(000F00,62,_,_,_,_,_,_ ), 0 , 4 , 0 , 9170 , 135, 75 ), // #626
706 INST(Punpcklqdq , ExtRm , O(660F00,6C,_,_,_,_,_,_ ), 0 , 3 , 0 , 9181 , 5 , 4 ), // #627
707 INST(Punpcklwd , ExtRm_P , O(000F00,61,_,_,_,_,_,_ ), 0 , 4 , 0 , 9193 , 135, 75 ), // #628
708 INST(Push , X86Push , O(000000,FF,6,_,_,_,_,_ ), O(000000,50,_,_,_,_,_,_ ), 30 , 77 , 2348 , 159, 0 ), // #629
709 INST(Pusha , X86Op , O(660000,60,_,_,_,_,_,_ ), 0 , 19 , 0 , 2353 , 75 , 0 ), // #630
710 INST(Pushad , X86Op , O(000000,60,_,_,_,_,_,_ ), 0 , 0 , 0 , 2359 , 75 , 0 ), // #631
711 INST(Pushf , X86Op , O(660000,9C,_,_,_,_,_,_ ), 0 , 19 , 0 , 2366 , 30 , 92 ), // #632
712 INST(Pushfd , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2372 , 75 , 92 ), // #633
713 INST(Pushfq , X86Op , O(000000,9C,_,_,_,_,_,_ ), 0 , 0 , 0 , 2379 , 155, 92 ), // #634
714 INST(Pxor , ExtRm_P , O(000F00,EF,_,_,_,_,_,_ ), 0 , 4 , 0 , 9204 , 138, 75 ), // #635
715 INST(Rcl , X86Rot , O(000000,D0,2,_,x,_,_,_ ), 0 , 1 , 0 , 2386 , 160, 93 ), // #636
716 INST(Rcpps , ExtRm , O(000F00,53,_,_,_,_,_,_ ), 0 , 4 , 0 , 9332 , 5 , 5 ), // #637
717 INST(Rcpss , ExtRm , O(F30F00,53,_,_,_,_,_,_ ), 0 , 6 , 0 , 9339 , 7 , 5 ), // #638
718 INST(Rcr , X86Rot , O(000000,D0,3,_,x,_,_,_ ), 0 , 78 , 0 , 2390 , 160, 93 ), // #639
719 INST(Rdfsbase , X86M , O(F30F00,AE,0,_,x,_,_,_ ), 0 , 6 , 0 , 2394 , 161, 94 ), // #640
720 INST(Rdgsbase , X86M , O(F30F00,AE,1,_,x,_,_,_ ), 0 , 85 , 0 , 2403 , 161, 94 ), // #641
721 INST(Rdmsr , X86Op , O(000F00,32,_,_,_,_,_,_ ), 0 , 4 , 0 , 2412 , 162, 95 ), // #642
722 INST(Rdpid , X86R_Native , O(F30F00,C7,7,_,_,_,_,_ ), 0 , 86 , 0 , 2418 , 163, 96 ), // #643
723 INST(Rdpmc , X86Op , O(000F00,33,_,_,_,_,_,_ ), 0 , 4 , 0 , 2424 , 162, 0 ), // #644
724 INST(Rdrand , X86M , O(000F00,C7,6,_,x,_,_,_ ), 0 , 73 , 0 , 2430 , 23 , 97 ), // #645
725 INST(Rdseed , X86M , O(000F00,C7,7,_,x,_,_,_ ), 0 , 22 , 0 , 2437 , 23 , 98 ), // #646
726 INST(Rdtsc , X86Op , O(000F00,31,_,_,_,_,_,_ ), 0 , 4 , 0 , 2444 , 28 , 99 ), // #647
727 INST(Rdtscp , X86Op , O(000F01,F9,_,_,_,_,_,_ ), 0 , 21 , 0 , 2450 , 162, 100), // #648
728 INST(Ret , X86Ret , O(000000,C2,_,_,_,_,_,_ ), 0 , 0 , 0 , 2883 , 164, 0 ), // #649
729 INST(Rol , X86Rot , O(000000,D0,0,_,x,_,_,_ ), 0 , 0 , 0 , 2457 , 160, 101), // #650
730 INST(Ror , X86Rot , O(000000,D0,1,_,x,_,_,_ ), 0 , 29 , 0 , 2461 , 160, 101), // #651
731 INST(Rorx , VexRmi_Wx , V(F20F3A,F0,_,0,x,_,_,_ ), 0 , 87 , 0 , 2465 , 165, 78 ), // #652
732 INST(Roundpd , ExtRmi , O(660F3A,09,_,_,_,_,_,_ ), 0 , 8 , 0 , 9434 , 8 , 12 ), // #653
733 INST(Roundps , ExtRmi , O(660F3A,08,_,_,_,_,_,_ ), 0 , 8 , 0 , 9443 , 8 , 12 ), // #654
734 INST(Roundsd , ExtRmi , O(660F3A,0B,_,_,_,_,_,_ ), 0 , 8 , 0 , 9452 , 35 , 12 ), // #655
735 INST(Roundss , ExtRmi , O(660F3A,0A,_,_,_,_,_,_ ), 0 , 8 , 0 , 9461 , 36 , 12 ), // #656
736 INST(Rsm , X86Op , O(000F00,AA,_,_,_,_,_,_ ), 0 , 4 , 0 , 2470 , 75 , 1 ), // #657
737 INST(Rsqrtps , ExtRm , O(000F00,52,_,_,_,_,_,_ ), 0 , 4 , 0 , 9558 , 5 , 5 ), // #658
738 INST(Rsqrtss , ExtRm , O(F30F00,52,_,_,_,_,_,_ ), 0 , 6 , 0 , 9567 , 7 , 5 ), // #659
739 INST(Sahf , X86Op , O(000000,9E,_,_,_,_,_,_ ), 0 , 0 , 0 , 2474 , 90 , 102), // #660
740 INST(Sal , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2479 , 160, 1 ), // #661
741 INST(Sar , X86Rot , O(000000,D0,7,_,x,_,_,_ ), 0 , 25 , 0 , 2483 , 160, 1 ), // #662
742 INST(Sarx , VexRmv_Wx , V(F30F38,F7,_,0,x,_,_,_ ), 0 , 82 , 0 , 2487 , 13 , 78 ), // #663
743 INST(Sbb , X86Arith , O(000000,18,3,_,x,_,_,_ ), 0 , 78 , 0 , 2492 , 166, 2 ), // #664
744 INST(Scas , X86StrRm , O(000000,AE,_,_,_,_,_,_ ), 0 , 0 , 0 , 2496 , 167, 35 ), // #665
745 INST(Seta , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2501 , 168, 54 ), // #666
746 INST(Setae , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2506 , 168, 55 ), // #667
747 INST(Setb , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2512 , 168, 55 ), // #668
748 INST(Setbe , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2517 , 168, 54 ), // #669
749 INST(Setc , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2523 , 168, 55 ), // #670
750 INST(Sete , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2528 , 168, 56 ), // #671
751 INST(Setg , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2533 , 168, 57 ), // #672
752 INST(Setge , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2538 , 168, 58 ), // #673
753 INST(Setl , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2544 , 168, 58 ), // #674
754 INST(Setle , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2549 , 168, 57 ), // #675
755 INST(Setna , X86Set , O(000F00,96,_,_,_,_,_,_ ), 0 , 4 , 0 , 2555 , 168, 54 ), // #676
756 INST(Setnae , X86Set , O(000F00,92,_,_,_,_,_,_ ), 0 , 4 , 0 , 2561 , 168, 55 ), // #677
757 INST(Setnb , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2568 , 168, 55 ), // #678
758 INST(Setnbe , X86Set , O(000F00,97,_,_,_,_,_,_ ), 0 , 4 , 0 , 2574 , 168, 54 ), // #679
759 INST(Setnc , X86Set , O(000F00,93,_,_,_,_,_,_ ), 0 , 4 , 0 , 2581 , 168, 55 ), // #680
760 INST(Setne , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2587 , 168, 56 ), // #681
761 INST(Setng , X86Set , O(000F00,9E,_,_,_,_,_,_ ), 0 , 4 , 0 , 2593 , 168, 57 ), // #682
762 INST(Setnge , X86Set , O(000F00,9C,_,_,_,_,_,_ ), 0 , 4 , 0 , 2599 , 168, 58 ), // #683
763 INST(Setnl , X86Set , O(000F00,9D,_,_,_,_,_,_ ), 0 , 4 , 0 , 2606 , 168, 58 ), // #684
764 INST(Setnle , X86Set , O(000F00,9F,_,_,_,_,_,_ ), 0 , 4 , 0 , 2612 , 168, 57 ), // #685
765 INST(Setno , X86Set , O(000F00,91,_,_,_,_,_,_ ), 0 , 4 , 0 , 2619 , 168, 52 ), // #686
766 INST(Setnp , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2625 , 168, 59 ), // #687
767 INST(Setns , X86Set , O(000F00,99,_,_,_,_,_,_ ), 0 , 4 , 0 , 2631 , 168, 60 ), // #688
768 INST(Setnz , X86Set , O(000F00,95,_,_,_,_,_,_ ), 0 , 4 , 0 , 2637 , 168, 56 ), // #689
769 INST(Seto , X86Set , O(000F00,90,_,_,_,_,_,_ ), 0 , 4 , 0 , 2643 , 168, 52 ), // #690
770 INST(Setp , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2648 , 168, 59 ), // #691
771 INST(Setpe , X86Set , O(000F00,9A,_,_,_,_,_,_ ), 0 , 4 , 0 , 2653 , 168, 59 ), // #692
772 INST(Setpo , X86Set , O(000F00,9B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2659 , 168, 59 ), // #693
773 INST(Sets , X86Set , O(000F00,98,_,_,_,_,_,_ ), 0 , 4 , 0 , 2665 , 168, 60 ), // #694
774 INST(Setz , X86Set , O(000F00,94,_,_,_,_,_,_ ), 0 , 4 , 0 , 2670 , 168, 56 ), // #695
775 INST(Sfence , X86Fence , O(000F00,AE,7,_,_,_,_,_ ), 0 , 22 , 0 , 2675 , 30 , 71 ), // #696
776 INST(Sgdt , X86M_Only , O(000F00,01,0,_,_,_,_,_ ), 0 , 4 , 0 , 2682 , 31 , 0 ), // #697
777 INST(Sha1msg1 , ExtRm , O(000F38,C9,_,_,_,_,_,_ ), 0 , 76 , 0 , 2687 , 5 , 103), // #698
778 INST(Sha1msg2 , ExtRm , O(000F38,CA,_,_,_,_,_,_ ), 0 , 76 , 0 , 2696 , 5 , 103), // #699
779 INST(Sha1nexte , ExtRm , O(000F38,C8,_,_,_,_,_,_ ), 0 , 76 , 0 , 2705 , 5 , 103), // #700
780 INST(Sha1rnds4 , ExtRmi , O(000F3A,CC,_,_,_,_,_,_ ), 0 , 79 , 0 , 2715 , 8 , 103), // #701
781 INST(Sha256msg1 , ExtRm , O(000F38,CC,_,_,_,_,_,_ ), 0 , 76 , 0 , 2725 , 5 , 103), // #702
782 INST(Sha256msg2 , ExtRm , O(000F38,CD,_,_,_,_,_,_ ), 0 , 76 , 0 , 2736 , 5 , 103), // #703
783 INST(Sha256rnds2 , ExtRm_XMM0 , O(000F38,CB,_,_,_,_,_,_ ), 0 , 76 , 0 , 2747 , 15 , 103), // #704
784 INST(Shl , X86Rot , O(000000,D0,4,_,x,_,_,_ ), 0 , 9 , 0 , 2759 , 160, 1 ), // #705
785 INST(Shld , X86ShldShrd , O(000F00,A4,_,_,x,_,_,_ ), 0 , 4 , 0 , 8603 , 169, 1 ), // #706
786 INST(Shlx , VexRmv_Wx , V(660F38,F7,_,0,x,_,_,_ ), 0 , 88 , 0 , 2763 , 13 , 78 ), // #707
787 INST(Shr , X86Rot , O(000000,D0,5,_,x,_,_,_ ), 0 , 58 , 0 , 2768 , 160, 1 ), // #708
788 INST(Shrd , X86ShldShrd , O(000F00,AC,_,_,x,_,_,_ ), 0 , 4 , 0 , 2772 , 169, 1 ), // #709
789 INST(Shrx , VexRmv_Wx , V(F20F38,F7,_,0,x,_,_,_ ), 0 , 77 , 0 , 2777 , 13 , 78 ), // #710
790 INST(Shufpd , ExtRmi , O(660F00,C6,_,_,_,_,_,_ ), 0 , 3 , 0 , 9828 , 8 , 4 ), // #711
791 INST(Shufps , ExtRmi , O(000F00,C6,_,_,_,_,_,_ ), 0 , 4 , 0 , 9836 , 8 , 5 ), // #712
792 INST(Sidt , X86M_Only , O(000F00,01,1,_,_,_,_,_ ), 0 , 27 , 0 , 2782 , 31 , 0 ), // #713
793 INST(Skinit , X86Op_xAX , O(000F01,DE,_,_,_,_,_,_ ), 0 , 21 , 0 , 2787 , 50 , 104), // #714
794 INST(Sldt , X86M , O(000F00,00,0,_,_,_,_,_ ), 0 , 4 , 0 , 2794 , 170, 0 ), // #715
795 INST(Slwpcb , VexR_Wx , V(XOP_M9,12,1,0,x,_,_,_ ), 0 , 11 , 0 , 2799 , 98 , 68 ), // #716
796 INST(Smsw , X86M , O(000F00,01,4,_,_,_,_,_ ), 0 , 89 , 0 , 2806 , 170, 0 ), // #717
797 INST(Sqrtpd , ExtRm , O(660F00,51,_,_,_,_,_,_ ), 0 , 3 , 0 , 9844 , 5 , 4 ), // #718
798 INST(Sqrtps , ExtRm , O(000F00,51,_,_,_,_,_,_ ), 0 , 4 , 0 , 9559 , 5 , 5 ), // #719
799 INST(Sqrtsd , ExtRm , O(F20F00,51,_,_,_,_,_,_ ), 0 , 5 , 0 , 9860 , 6 , 4 ), // #720
800 INST(Sqrtss , ExtRm , O(F30F00,51,_,_,_,_,_,_ ), 0 , 6 , 0 , 9568 , 7 , 5 ), // #721
801 INST(Stac , X86Op , O(000F01,CB,_,_,_,_,_,_ ), 0 , 21 , 0 , 2811 , 30 , 16 ), // #722
802 INST(Stc , X86Op , O(000000,F9,_,_,_,_,_,_ ), 0 , 0 , 0 , 2816 , 30 , 17 ), // #723
803 INST(Std , X86Op , O(000000,FD,_,_,_,_,_,_ ), 0 , 0 , 0 , 6586 , 30 , 18 ), // #724
804 INST(Stgi , X86Op , O(000F01,DC,_,_,_,_,_,_ ), 0 , 21 , 0 , 2820 , 30 , 104), // #725
805 INST(Sti , X86Op , O(000000,FB,_,_,_,_,_,_ ), 0 , 0 , 0 , 2825 , 30 , 23 ), // #726
806 INST(Stmxcsr , X86M_Only , O(000F00,AE,3,_,_,_,_,_ ), 0 , 71 , 0 , 9876 , 93 , 5 ), // #727
807 INST(Stos , X86StrMr , O(000000,AA,_,_,_,_,_,_ ), 0 , 0 , 0 , 2829 , 171, 69 ), // #728
808 INST(Str , X86M , O(000F00,00,1,_,_,_,_,_ ), 0 , 27 , 0 , 2834 , 170, 0 ), // #729
809 INST(Sub , X86Arith , O(000000,28,5,_,x,_,_,_ ), 0 , 58 , 0 , 836 , 166, 1 ), // #730
810 INST(Subpd , ExtRm , O(660F00,5C,_,_,_,_,_,_ ), 0 , 3 , 0 , 4556 , 5 , 4 ), // #731
811 INST(Subps , ExtRm , O(000F00,5C,_,_,_,_,_,_ ), 0 , 4 , 0 , 4568 , 5 , 5 ), // #732
812 INST(Subsd , ExtRm , O(F20F00,5C,_,_,_,_,_,_ ), 0 , 5 , 0 , 5244 , 6 , 4 ), // #733
813 INST(Subss , ExtRm , O(F30F00,5C,_,_,_,_,_,_ ), 0 , 6 , 0 , 5254 , 7 , 5 ), // #734
814 INST(Swapgs , X86Op , O(000F01,F8,_,_,_,_,_,_ ), 0 , 21 , 0 , 2838 , 155, 0 ), // #735
815 INST(Syscall , X86Op , O(000F00,05,_,_,_,_,_,_ ), 0 , 4 , 0 , 2845 , 155, 0 ), // #736
816 INST(Sysenter , X86Op , O(000F00,34,_,_,_,_,_,_ ), 0 , 4 , 0 , 2853 , 30 , 0 ), // #737
817 INST(Sysexit , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 2862 , 30 , 0 ), // #738
818 INST(Sysexit64 , X86Op , O(000F00,35,_,_,_,_,_,_ ), 0 , 4 , 0 , 2870 , 30 , 0 ), // #739
819 INST(Sysret , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 2880 , 155, 0 ), // #740
820 INST(Sysret64 , X86Op , O(000F00,07,_,_,_,_,_,_ ), 0 , 4 , 0 , 2887 , 155, 0 ), // #741
821 INST(T1mskc , VexVm_Wx , V(XOP_M9,01,7,0,x,_,_,_ ), 0 , 90 , 0 , 2896 , 14 , 11 ), // #742
822 INST(Test , X86Test , O(000000,84,_,_,x,_,_,_ ), O(000000,F6,_,_,x,_,_,_ ), 0 , 78 , 9032 , 172, 1 ), // #743
823 INST(Tzcnt , X86Rm_Raw66H , O(F30F00,BC,_,_,x,_,_,_ ), 0 , 6 , 0 , 2903 , 22 , 9 ), // #744
824 INST(Tzmsk , VexVm_Wx , V(XOP_M9,01,4,0,x,_,_,_ ), 0 , 91 , 0 , 2909 , 14 , 11 ), // #745
825 INST(Ucomisd , ExtRm , O(660F00,2E,_,_,_,_,_,_ ), 0 , 3 , 0 , 9929 , 6 , 39 ), // #746
826 INST(Ucomiss , ExtRm , O(000F00,2E,_,_,_,_,_,_ ), 0 , 4 , 0 , 9938 , 7 , 40 ), // #747
827 INST(Ud2 , X86Op , O(000F00,0B,_,_,_,_,_,_ ), 0 , 4 , 0 , 2915 , 30 , 0 ), // #748
828 INST(Unpckhpd , ExtRm , O(660F00,15,_,_,_,_,_,_ ), 0 , 3 , 0 , 9947 , 5 , 4 ), // #749
829 INST(Unpckhps , ExtRm , O(000F00,15,_,_,_,_,_,_ ), 0 , 4 , 0 , 9957 , 5 , 5 ), // #750
830 INST(Unpcklpd , ExtRm , O(660F00,14,_,_,_,_,_,_ ), 0 , 3 , 0 , 9967 , 5 , 4 ), // #751
831 INST(Unpcklps , ExtRm , O(000F00,14,_,_,_,_,_,_ ), 0 , 4 , 0 , 9977 , 5 , 5 ), // #752
832 INST(V4fmaddps , VexRm_T1_4X , E(F20F38,9A,_,2,_,0,2,T4X), 0 , 92 , 0 , 2919 , 173, 105), // #753
833 INST(V4fmaddss , VexRm_T1_4X , E(F20F38,9B,_,2,_,0,2,T4X), 0 , 92 , 0 , 2929 , 174, 105), // #754
834 INST(V4fnmaddps , VexRm_T1_4X , E(F20F38,AA,_,2,_,0,2,T4X), 0 , 92 , 0 , 2939 , 173, 105), // #755
835 INST(V4fnmaddss , VexRm_T1_4X , E(F20F38,AB,_,2,_,0,2,T4X), 0 , 92 , 0 , 2950 , 174, 105), // #756
836 INST(Vaddpd , VexRvm_Lx , V(660F00,58,_,x,I,1,4,FV ), 0 , 93 , 0 , 2961 , 175, 106), // #757
837 INST(Vaddps , VexRvm_Lx , V(000F00,58,_,x,I,0,4,FV ), 0 , 94 , 0 , 2968 , 176, 106), // #758
838 INST(Vaddsd , VexRvm , V(F20F00,58,_,I,I,1,3,T1S), 0 , 95 , 0 , 2975 , 177, 107), // #759
839 INST(Vaddss , VexRvm , V(F30F00,58,_,I,I,0,2,T1S), 0 , 96 , 0 , 2982 , 178, 107), // #760
840 INST(Vaddsubpd , VexRvm_Lx , V(660F00,D0,_,x,I,_,_,_ ), 0 , 63 , 0 , 2989 , 179, 108), // #761
841 INST(Vaddsubps , VexRvm_Lx , V(F20F00,D0,_,x,I,_,_,_ ), 0 , 97 , 0 , 2999 , 179, 108), // #762
842 INST(Vaesdec , VexRvm_Lx , V(660F38,DE,_,x,I,_,4,FVM), 0 , 98 , 0 , 3009 , 180, 109), // #763
843 INST(Vaesdeclast , VexRvm_Lx , V(660F38,DF,_,x,I,_,4,FVM), 0 , 98 , 0 , 3017 , 180, 109), // #764
844 INST(Vaesenc , VexRvm_Lx , V(660F38,DC,_,x,I,_,4,FVM), 0 , 98 , 0 , 3029 , 180, 109), // #765
845 INST(Vaesenclast , VexRvm_Lx , V(660F38,DD,_,x,I,_,4,FVM), 0 , 98 , 0 , 3037 , 180, 109), // #766
846 INST(Vaesimc , VexRm , V(660F38,DB,_,0,I,_,_,_ ), 0 , 88 , 0 , 3049 , 181, 110), // #767
847 INST(Vaeskeygenassist , VexRmi , V(660F3A,DF,_,0,I,_,_,_ ), 0 , 67 , 0 , 3057 , 182, 110), // #768
848 INST(Valignd , VexRvmi_Lx , E(660F3A,03,_,x,_,0,4,FV ), 0 , 99 , 0 , 3074 , 183, 111), // #769
849 INST(Valignq , VexRvmi_Lx , E(660F3A,03,_,x,_,1,4,FV ), 0 , 100, 0 , 3082 , 184, 111), // #770
850 INST(Vandnpd , VexRvm_Lx , V(660F00,55,_,x,I,1,4,FV ), 0 , 93 , 0 , 3090 , 185, 112), // #771
851 INST(Vandnps , VexRvm_Lx , V(000F00,55,_,x,I,0,4,FV ), 0 , 94 , 0 , 3098 , 186, 112), // #772
852 INST(Vandpd , VexRvm_Lx , V(660F00,54,_,x,I,1,4,FV ), 0 , 93 , 0 , 3106 , 187, 112), // #773
853 INST(Vandps , VexRvm_Lx , V(000F00,54,_,x,I,0,4,FV ), 0 , 94 , 0 , 3113 , 188, 112), // #774
854 INST(Vblendmb , VexRvm_Lx , E(660F38,66,_,x,_,0,4,FVM), 0 , 101, 0 , 3120 , 189, 113), // #775
855 INST(Vblendmd , VexRvm_Lx , E(660F38,64,_,x,_,0,4,FV ), 0 , 102, 0 , 3129 , 190, 111), // #776
856 INST(Vblendmpd , VexRvm_Lx , E(660F38,65,_,x,_,1,4,FV ), 0 , 103, 0 , 3138 , 191, 111), // #777
857 INST(Vblendmps , VexRvm_Lx , E(660F38,65,_,x,_,0,4,FV ), 0 , 102, 0 , 3148 , 190, 111), // #778
858 INST(Vblendmq , VexRvm_Lx , E(660F38,64,_,x,_,1,4,FV ), 0 , 103, 0 , 3158 , 191, 111), // #779
859 INST(Vblendmw , VexRvm_Lx , E(660F38,66,_,x,_,1,4,FVM), 0 , 104, 0 , 3167 , 189, 113), // #780
860 INST(Vblendpd , VexRvmi_Lx , V(660F3A,0D,_,x,I,_,_,_ ), 0 , 67 , 0 , 3176 , 192, 108), // #781
861 INST(Vblendps , VexRvmi_Lx , V(660F3A,0C,_,x,I,_,_,_ ), 0 , 67 , 0 , 3185 , 192, 108), // #782
862 INST(Vblendvpd , VexRvmr_Lx , V(660F3A,4B,_,x,0,_,_,_ ), 0 , 67 , 0 , 3194 , 193, 108), // #783
863 INST(Vblendvps , VexRvmr_Lx , V(660F3A,4A,_,x,0,_,_,_ ), 0 , 67 , 0 , 3204 , 193, 108), // #784
864 INST(Vbroadcastf128 , VexRm , V(660F38,1A,_,1,0,_,_,_ ), 0 , 105, 0 , 3214 , 194, 108), // #785
865 INST(Vbroadcastf32x2 , VexRm_Lx , E(660F38,19,_,x,_,0,3,T2 ), 0 , 106, 0 , 3229 , 195, 114), // #786
866 INST(Vbroadcastf32x4 , VexRm_Lx , E(660F38,1A,_,x,_,0,4,T4 ), 0 , 107, 0 , 3245 , 196, 63 ), // #787
867 INST(Vbroadcastf32x8 , VexRm , E(660F38,1B,_,2,_,0,5,T8 ), 0 , 108, 0 , 3261 , 197, 61 ), // #788
868 INST(Vbroadcastf64x2 , VexRm_Lx , E(660F38,1A,_,x,_,1,4,T2 ), 0 , 109, 0 , 3277 , 196, 114), // #789
869 INST(Vbroadcastf64x4 , VexRm , E(660F38,1B,_,2,_,1,5,T4 ), 0 , 110, 0 , 3293 , 197, 63 ), // #790
870 INST(Vbroadcasti128 , VexRm , V(660F38,5A,_,1,0,_,_,_ ), 0 , 105, 0 , 3309 , 194, 115), // #791
871 INST(Vbroadcasti32x2 , VexRm_Lx , E(660F38,59,_,x,_,0,3,T2 ), 0 , 106, 0 , 3324 , 198, 114), // #792
872 INST(Vbroadcasti32x4 , VexRm_Lx , E(660F38,5A,_,x,_,0,4,T4 ), 0 , 107, 0 , 3340 , 196, 111), // #793
873 INST(Vbroadcasti32x8 , VexRm , E(660F38,5B,_,2,_,0,5,T8 ), 0 , 108, 0 , 3356 , 197, 61 ), // #794
874 INST(Vbroadcasti64x2 , VexRm_Lx , E(660F38,5A,_,x,_,1,4,T2 ), 0 , 109, 0 , 3372 , 196, 114), // #795
875 INST(Vbroadcasti64x4 , VexRm , E(660F38,5B,_,2,_,1,5,T4 ), 0 , 110, 0 , 3388 , 197, 63 ), // #796
876 INST(Vbroadcastsd , VexRm_Lx , V(660F38,19,_,x,0,1,3,T1S), 0 , 111, 0 , 3404 , 199, 116), // #797
877 INST(Vbroadcastss , VexRm_Lx , V(660F38,18,_,x,0,0,2,T1S), 0 , 112, 0 , 3417 , 200, 116), // #798
878 INST(Vcmppd , VexRvmi_Lx , V(660F00,C2,_,x,I,1,4,FV ), 0 , 93 , 0 , 3430 , 201, 106), // #799
879 INST(Vcmpps , VexRvmi_Lx , V(000F00,C2,_,x,I,0,4,FV ), 0 , 94 , 0 , 3437 , 202, 106), // #800
880 INST(Vcmpsd , VexRvmi , V(F20F00,C2,_,I,I,1,3,T1S), 0 , 95 , 0 , 3444 , 203, 107), // #801
881 INST(Vcmpss , VexRvmi , V(F30F00,C2,_,I,I,0,2,T1S), 0 , 96 , 0 , 3451 , 204, 107), // #802
882 INST(Vcomisd , VexRm , V(660F00,2F,_,I,I,1,3,T1S), 0 , 113, 0 , 3458 , 205, 117), // #803
883 INST(Vcomiss , VexRm , V(000F00,2F,_,I,I,0,2,T1S), 0 , 114, 0 , 3466 , 206, 117), // #804
884 INST(Vcompresspd , VexMr_Lx , E(660F38,8A,_,x,_,1,3,T1S), 0 , 115, 0 , 3474 , 207, 111), // #805
885 INST(Vcompressps , VexMr_Lx , E(660F38,8A,_,x,_,0,2,T1S), 0 , 116, 0 , 3486 , 207, 111), // #806
886 INST(Vcvtdq2pd , VexRm_Lx , V(F30F00,E6,_,x,I,0,3,HV ), 0 , 117, 0 , 3498 , 208, 106), // #807
887 INST(Vcvtdq2ps , VexRm_Lx , V(000F00,5B,_,x,I,0,4,FV ), 0 , 94 , 0 , 3508 , 209, 106), // #808
888 INST(Vcvtne2ps2bf16 , VexRvm , E(F20F38,72,_,_,_,0,_,_ ), 0 , 118, 0 , 3518 , 190, 118), // #809
889 INST(Vcvtneps2bf16 , VexRm , E(F30F38,72,_,_,_,0,_,_ ), 0 , 119, 0 , 3533 , 210, 118), // #810
890 INST(Vcvtpd2dq , VexRm_Lx , V(F20F00,E6,_,x,I,1,4,FV ), 0 , 120, 0 , 3547 , 211, 106), // #811
891 INST(Vcvtpd2ps , VexRm_Lx , V(660F00,5A,_,x,I,1,4,FV ), 0 , 93 , 0 , 3557 , 211, 106), // #812
892 INST(Vcvtpd2qq , VexRm_Lx , E(660F00,7B,_,x,_,1,4,FV ), 0 , 121, 0 , 3567 , 212, 114), // #813
893 INST(Vcvtpd2udq , VexRm_Lx , E(000F00,79,_,x,_,1,4,FV ), 0 , 122, 0 , 3577 , 213, 111), // #814
894 INST(Vcvtpd2uqq , VexRm_Lx , E(660F00,79,_,x,_,1,4,FV ), 0 , 121, 0 , 3588 , 212, 114), // #815
895 INST(Vcvtph2ps , VexRm_Lx , V(660F38,13,_,x,0,0,3,HVM), 0 , 123, 0 , 3599 , 214, 119), // #816
896 INST(Vcvtps2dq , VexRm_Lx , V(660F00,5B,_,x,I,0,4,FV ), 0 , 124, 0 , 3609 , 209, 106), // #817
897 INST(Vcvtps2pd , VexRm_Lx , V(000F00,5A,_,x,I,0,4,HV ), 0 , 125, 0 , 3619 , 215, 106), // #818
898 INST(Vcvtps2ph , VexMri_Lx , V(660F3A,1D,_,x,0,0,3,HVM), 0 , 126, 0 , 3629 , 216, 119), // #819
899 INST(Vcvtps2qq , VexRm_Lx , E(660F00,7B,_,x,_,0,3,HV ), 0 , 127, 0 , 3639 , 217, 114), // #820
900 INST(Vcvtps2udq , VexRm_Lx , E(000F00,79,_,x,_,0,4,FV ), 0 , 128, 0 , 3649 , 218, 111), // #821
901 INST(Vcvtps2uqq , VexRm_Lx , E(660F00,79,_,x,_,0,3,HV ), 0 , 127, 0 , 3660 , 217, 114), // #822
902 INST(Vcvtqq2pd , VexRm_Lx , E(F30F00,E6,_,x,_,1,4,FV ), 0 , 129, 0 , 3671 , 212, 114), // #823
903 INST(Vcvtqq2ps , VexRm_Lx , E(000F00,5B,_,x,_,1,4,FV ), 0 , 122, 0 , 3681 , 213, 114), // #824
904 INST(Vcvtsd2si , VexRm_Wx , V(F20F00,2D,_,I,x,x,3,T1F), 0 , 130, 0 , 3691 , 219, 107), // #825
905 INST(Vcvtsd2ss , VexRvm , V(F20F00,5A,_,I,I,1,3,T1S), 0 , 95 , 0 , 3701 , 177, 107), // #826
906 INST(Vcvtsd2usi , VexRm_Wx , E(F20F00,79,_,I,_,x,3,T1F), 0 , 131, 0 , 3711 , 220, 63 ), // #827
907 INST(Vcvtsi2sd , VexRvm_Wx , V(F20F00,2A,_,I,x,x,2,T1W), 0 , 132, 0 , 3722 , 221, 107), // #828
908 INST(Vcvtsi2ss , VexRvm_Wx , V(F30F00,2A,_,I,x,x,2,T1W), 0 , 133, 0 , 3732 , 221, 107), // #829
909 INST(Vcvtss2sd , VexRvm , V(F30F00,5A,_,I,I,0,2,T1S), 0 , 96 , 0 , 3742 , 222, 107), // #830
910 INST(Vcvtss2si , VexRm_Wx , V(F30F00,2D,_,I,x,x,2,T1F), 0 , 134, 0 , 3752 , 223, 107), // #831
911 INST(Vcvtss2usi , VexRm_Wx , E(F30F00,79,_,I,_,x,2,T1F), 0 , 135, 0 , 3762 , 224, 63 ), // #832
912 INST(Vcvttpd2dq , VexRm_Lx , V(660F00,E6,_,x,I,1,4,FV ), 0 , 93 , 0 , 3773 , 225, 106), // #833
913 INST(Vcvttpd2qq , VexRm_Lx , E(660F00,7A,_,x,_,1,4,FV ), 0 , 121, 0 , 3784 , 226, 111), // #834
914 INST(Vcvttpd2udq , VexRm_Lx , E(000F00,78,_,x,_,1,4,FV ), 0 , 122, 0 , 3795 , 227, 111), // #835
915 INST(Vcvttpd2uqq , VexRm_Lx , E(660F00,78,_,x,_,1,4,FV ), 0 , 121, 0 , 3807 , 226, 114), // #836
916 INST(Vcvttps2dq , VexRm_Lx , V(F30F00,5B,_,x,I,0,4,FV ), 0 , 136, 0 , 3819 , 228, 106), // #837
917 INST(Vcvttps2qq , VexRm_Lx , E(660F00,7A,_,x,_,0,3,HV ), 0 , 127, 0 , 3830 , 229, 114), // #838
918 INST(Vcvttps2udq , VexRm_Lx , E(000F00,78,_,x,_,0,4,FV ), 0 , 128, 0 , 3841 , 230, 111), // #839
919 INST(Vcvttps2uqq , VexRm_Lx , E(660F00,78,_,x,_,0,3,HV ), 0 , 127, 0 , 3853 , 229, 114), // #840
920 INST(Vcvttsd2si , VexRm_Wx , V(F20F00,2C,_,I,x,x,3,T1F), 0 , 130, 0 , 3865 , 231, 107), // #841
921 INST(Vcvttsd2usi , VexRm_Wx , E(F20F00,78,_,I,_,x,3,T1F), 0 , 131, 0 , 3876 , 232, 63 ), // #842
922 INST(Vcvttss2si , VexRm_Wx , V(F30F00,2C,_,I,x,x,2,T1F), 0 , 134, 0 , 3888 , 233, 107), // #843
923 INST(Vcvttss2usi , VexRm_Wx , E(F30F00,78,_,I,_,x,2,T1F), 0 , 135, 0 , 3899 , 234, 63 ), // #844
924 INST(Vcvtudq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,0,3,HV ), 0 , 137, 0 , 3911 , 235, 111), // #845
925 INST(Vcvtudq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,0,4,FV ), 0 , 138, 0 , 3922 , 218, 111), // #846
926 INST(Vcvtuqq2pd , VexRm_Lx , E(F30F00,7A,_,x,_,1,4,FV ), 0 , 129, 0 , 3933 , 212, 114), // #847
927 INST(Vcvtuqq2ps , VexRm_Lx , E(F20F00,7A,_,x,_,1,4,FV ), 0 , 139, 0 , 3944 , 213, 114), // #848
928 INST(Vcvtusi2sd , VexRvm_Wx , E(F20F00,7B,_,I,_,x,2,T1W), 0 , 140, 0 , 3955 , 236, 63 ), // #849
929 INST(Vcvtusi2ss , VexRvm_Wx , E(F30F00,7B,_,I,_,x,2,T1W), 0 , 141, 0 , 3966 , 236, 63 ), // #850
930 INST(Vdbpsadbw , VexRvmi_Lx , E(660F3A,42,_,x,_,0,4,FVM), 0 , 142, 0 , 3977 , 237, 113), // #851
931 INST(Vdivpd , VexRvm_Lx , V(660F00,5E,_,x,I,1,4,FV ), 0 , 93 , 0 , 3987 , 175, 106), // #852
932 INST(Vdivps , VexRvm_Lx , V(000F00,5E,_,x,I,0,4,FV ), 0 , 94 , 0 , 3994 , 176, 106), // #853
933 INST(Vdivsd , VexRvm , V(F20F00,5E,_,I,I,1,3,T1S), 0 , 95 , 0 , 4001 , 177, 107), // #854
934 INST(Vdivss , VexRvm , V(F30F00,5E,_,I,I,0,2,T1S), 0 , 96 , 0 , 4008 , 178, 107), // #855
935 INST(Vdpbf16ps , VexRvm , E(F30F38,52,_,_,_,0,_,_ ), 0 , 119, 0 , 4015 , 190, 118), // #856
936 INST(Vdppd , VexRvmi_Lx , V(660F3A,41,_,x,I,_,_,_ ), 0 , 67 , 0 , 4025 , 238, 108), // #857
937 INST(Vdpps , VexRvmi_Lx , V(660F3A,40,_,x,I,_,_,_ ), 0 , 67 , 0 , 4031 , 192, 108), // #858
938 INST(Verr , X86M_NoSize , O(000F00,00,4,_,_,_,_,_ ), 0 , 89 , 0 , 4037 , 97 , 10 ), // #859
939 INST(Verw , X86M_NoSize , O(000F00,00,5,_,_,_,_,_ ), 0 , 70 , 0 , 4042 , 97 , 10 ), // #860
940 INST(Vexp2pd , VexRm , E(660F38,C8,_,2,_,1,4,FV ), 0 , 143, 0 , 4047 , 239, 120), // #861
941 INST(Vexp2ps , VexRm , E(660F38,C8,_,2,_,0,4,FV ), 0 , 144, 0 , 4055 , 240, 120), // #862
942 INST(Vexpandpd , VexRm_Lx , E(660F38,88,_,x,_,1,3,T1S), 0 , 115, 0 , 4063 , 241, 111), // #863
943 INST(Vexpandps , VexRm_Lx , E(660F38,88,_,x,_,0,2,T1S), 0 , 116, 0 , 4073 , 241, 111), // #864
944 INST(Vextractf128 , VexMri , V(660F3A,19,_,1,0,_,_,_ ), 0 , 145, 0 , 4083 , 242, 108), // #865
945 INST(Vextractf32x4 , VexMri_Lx , E(660F3A,19,_,x,_,0,4,T4 ), 0 , 146, 0 , 4096 , 243, 111), // #866
946 INST(Vextractf32x8 , VexMri , E(660F3A,1B,_,2,_,0,5,T8 ), 0 , 147, 0 , 4110 , 244, 61 ), // #867
947 INST(Vextractf64x2 , VexMri_Lx , E(660F3A,19,_,x,_,1,4,T2 ), 0 , 148, 0 , 4124 , 243, 114), // #868
948 INST(Vextractf64x4 , VexMri , E(660F3A,1B,_,2,_,1,5,T4 ), 0 , 149, 0 , 4138 , 244, 63 ), // #869
949 INST(Vextracti128 , VexMri , V(660F3A,39,_,1,0,_,_,_ ), 0 , 145, 0 , 4152 , 242, 115), // #870
950 INST(Vextracti32x4 , VexMri_Lx , E(660F3A,39,_,x,_,0,4,T4 ), 0 , 146, 0 , 4165 , 243, 111), // #871
951 INST(Vextracti32x8 , VexMri , E(660F3A,3B,_,2,_,0,5,T8 ), 0 , 147, 0 , 4179 , 244, 61 ), // #872
952 INST(Vextracti64x2 , VexMri_Lx , E(660F3A,39,_,x,_,1,4,T2 ), 0 , 148, 0 , 4193 , 243, 114), // #873
953 INST(Vextracti64x4 , VexMri , E(660F3A,3B,_,2,_,1,5,T4 ), 0 , 149, 0 , 4207 , 244, 63 ), // #874
954 INST(Vextractps , VexMri , V(660F3A,17,_,0,I,I,2,T1S), 0 , 150, 0 , 4221 , 245, 107), // #875
955 INST(Vfixupimmpd , VexRvmi_Lx , E(660F3A,54,_,x,_,1,4,FV ), 0 , 100, 0 , 4232 , 246, 111), // #876
956 INST(Vfixupimmps , VexRvmi_Lx , E(660F3A,54,_,x,_,0,4,FV ), 0 , 99 , 0 , 4244 , 247, 111), // #877
957 INST(Vfixupimmsd , VexRvmi , E(660F3A,55,_,I,_,1,3,T1S), 0 , 151, 0 , 4256 , 248, 63 ), // #878
958 INST(Vfixupimmss , VexRvmi , E(660F3A,55,_,I,_,0,2,T1S), 0 , 152, 0 , 4268 , 249, 63 ), // #879
959 INST(Vfmadd132pd , VexRvm_Lx , V(660F38,98,_,x,1,1,4,FV ), 0 , 153, 0 , 4280 , 175, 121), // #880
960 INST(Vfmadd132ps , VexRvm_Lx , V(660F38,98,_,x,0,0,4,FV ), 0 , 154, 0 , 4292 , 176, 121), // #881
961 INST(Vfmadd132sd , VexRvm , V(660F38,99,_,I,1,1,3,T1S), 0 , 155, 0 , 4304 , 177, 122), // #882
962 INST(Vfmadd132ss , VexRvm , V(660F38,99,_,I,0,0,2,T1S), 0 , 112, 0 , 4316 , 178, 122), // #883
963 INST(Vfmadd213pd , VexRvm_Lx , V(660F38,A8,_,x,1,1,4,FV ), 0 , 153, 0 , 4328 , 175, 121), // #884
964 INST(Vfmadd213ps , VexRvm_Lx , V(660F38,A8,_,x,0,0,4,FV ), 0 , 154, 0 , 4340 , 176, 121), // #885
965 INST(Vfmadd213sd , VexRvm , V(660F38,A9,_,I,1,1,3,T1S), 0 , 155, 0 , 4352 , 177, 122), // #886
966 INST(Vfmadd213ss , VexRvm , V(660F38,A9,_,I,0,0,2,T1S), 0 , 112, 0 , 4364 , 178, 122), // #887
967 INST(Vfmadd231pd , VexRvm_Lx , V(660F38,B8,_,x,1,1,4,FV ), 0 , 153, 0 , 4376 , 175, 121), // #888
968 INST(Vfmadd231ps , VexRvm_Lx , V(660F38,B8,_,x,0,0,4,FV ), 0 , 154, 0 , 4388 , 176, 121), // #889
969 INST(Vfmadd231sd , VexRvm , V(660F38,B9,_,I,1,1,3,T1S), 0 , 155, 0 , 4400 , 177, 122), // #890
970 INST(Vfmadd231ss , VexRvm , V(660F38,B9,_,I,0,0,2,T1S), 0 , 112, 0 , 4412 , 178, 122), // #891
971 INST(Vfmaddpd , Fma4_Lx , V(660F3A,69,_,x,x,_,_,_ ), 0 , 67 , 0 , 4424 , 250, 123), // #892
972 INST(Vfmaddps , Fma4_Lx , V(660F3A,68,_,x,x,_,_,_ ), 0 , 67 , 0 , 4433 , 250, 123), // #893
973 INST(Vfmaddsd , Fma4 , V(660F3A,6B,_,0,x,_,_,_ ), 0 , 67 , 0 , 4442 , 251, 123), // #894
974 INST(Vfmaddss , Fma4 , V(660F3A,6A,_,0,x,_,_,_ ), 0 , 67 , 0 , 4451 , 252, 123), // #895
975 INST(Vfmaddsub132pd , VexRvm_Lx , V(660F38,96,_,x,1,1,4,FV ), 0 , 153, 0 , 4460 , 175, 121), // #896
976 INST(Vfmaddsub132ps , VexRvm_Lx , V(660F38,96,_,x,0,0,4,FV ), 0 , 154, 0 , 4475 , 176, 121), // #897
977 INST(Vfmaddsub213pd , VexRvm_Lx , V(660F38,A6,_,x,1,1,4,FV ), 0 , 153, 0 , 4490 , 175, 121), // #898
978 INST(Vfmaddsub213ps , VexRvm_Lx , V(660F38,A6,_,x,0,0,4,FV ), 0 , 154, 0 , 4505 , 176, 121), // #899
979 INST(Vfmaddsub231pd , VexRvm_Lx , V(660F38,B6,_,x,1,1,4,FV ), 0 , 153, 0 , 4520 , 175, 121), // #900
980 INST(Vfmaddsub231ps , VexRvm_Lx , V(660F38,B6,_,x,0,0,4,FV ), 0 , 154, 0 , 4535 , 176, 121), // #901
981 INST(Vfmaddsubpd , Fma4_Lx , V(660F3A,5D,_,x,x,_,_,_ ), 0 , 67 , 0 , 4550 , 250, 123), // #902
982 INST(Vfmaddsubps , Fma4_Lx , V(660F3A,5C,_,x,x,_,_,_ ), 0 , 67 , 0 , 4562 , 250, 123), // #903
983 INST(Vfmsub132pd , VexRvm_Lx , V(660F38,9A,_,x,1,1,4,FV ), 0 , 153, 0 , 4574 , 175, 121), // #904
984 INST(Vfmsub132ps , VexRvm_Lx , V(660F38,9A,_,x,0,0,4,FV ), 0 , 154, 0 , 4586 , 176, 121), // #905
985 INST(Vfmsub132sd , VexRvm , V(660F38,9B,_,I,1,1,3,T1S), 0 , 155, 0 , 4598 , 177, 122), // #906
986 INST(Vfmsub132ss , VexRvm , V(660F38,9B,_,I,0,0,2,T1S), 0 , 112, 0 , 4610 , 178, 122), // #907
987 INST(Vfmsub213pd , VexRvm_Lx , V(660F38,AA,_,x,1,1,4,FV ), 0 , 153, 0 , 4622 , 175, 121), // #908
988 INST(Vfmsub213ps , VexRvm_Lx , V(660F38,AA,_,x,0,0,4,FV ), 0 , 154, 0 , 4634 , 176, 121), // #909
989 INST(Vfmsub213sd , VexRvm , V(660F38,AB,_,I,1,1,3,T1S), 0 , 155, 0 , 4646 , 177, 122), // #910
990 INST(Vfmsub213ss , VexRvm , V(660F38,AB,_,I,0,0,2,T1S), 0 , 112, 0 , 4658 , 178, 122), // #911
991 INST(Vfmsub231pd , VexRvm_Lx , V(660F38,BA,_,x,1,1,4,FV ), 0 , 153, 0 , 4670 , 175, 121), // #912
992 INST(Vfmsub231ps , VexRvm_Lx , V(660F38,BA,_,x,0,0,4,FV ), 0 , 154, 0 , 4682 , 176, 121), // #913
993 INST(Vfmsub231sd , VexRvm , V(660F38,BB,_,I,1,1,3,T1S), 0 , 155, 0 , 4694 , 177, 122), // #914
994 INST(Vfmsub231ss , VexRvm , V(660F38,BB,_,I,0,0,2,T1S), 0 , 112, 0 , 4706 , 178, 122), // #915
995 INST(Vfmsubadd132pd , VexRvm_Lx , V(660F38,97,_,x,1,1,4,FV ), 0 , 153, 0 , 4718 , 175, 121), // #916
996 INST(Vfmsubadd132ps , VexRvm_Lx , V(660F38,97,_,x,0,0,4,FV ), 0 , 154, 0 , 4733 , 176, 121), // #917
997 INST(Vfmsubadd213pd , VexRvm_Lx , V(660F38,A7,_,x,1,1,4,FV ), 0 , 153, 0 , 4748 , 175, 121), // #918
998 INST(Vfmsubadd213ps , VexRvm_Lx , V(660F38,A7,_,x,0,0,4,FV ), 0 , 154, 0 , 4763 , 176, 121), // #919
999 INST(Vfmsubadd231pd , VexRvm_Lx , V(660F38,B7,_,x,1,1,4,FV ), 0 , 153, 0 , 4778 , 175, 121), // #920
1000 INST(Vfmsubadd231ps , VexRvm_Lx , V(660F38,B7,_,x,0,0,4,FV ), 0 , 154, 0 , 4793 , 176, 121), // #921
1001 INST(Vfmsubaddpd , Fma4_Lx , V(660F3A,5F,_,x,x,_,_,_ ), 0 , 67 , 0 , 4808 , 250, 123), // #922
1002 INST(Vfmsubaddps , Fma4_Lx , V(660F3A,5E,_,x,x,_,_,_ ), 0 , 67 , 0 , 4820 , 250, 123), // #923
1003 INST(Vfmsubpd , Fma4_Lx , V(660F3A,6D,_,x,x,_,_,_ ), 0 , 67 , 0 , 4832 , 250, 123), // #924
1004 INST(Vfmsubps , Fma4_Lx , V(660F3A,6C,_,x,x,_,_,_ ), 0 , 67 , 0 , 4841 , 250, 123), // #925
1005 INST(Vfmsubsd , Fma4 , V(660F3A,6F,_,0,x,_,_,_ ), 0 , 67 , 0 , 4850 , 251, 123), // #926
1006 INST(Vfmsubss , Fma4 , V(660F3A,6E,_,0,x,_,_,_ ), 0 , 67 , 0 , 4859 , 252, 123), // #927
1007 INST(Vfnmadd132pd , VexRvm_Lx , V(660F38,9C,_,x,1,1,4,FV ), 0 , 153, 0 , 4868 , 175, 121), // #928
1008 INST(Vfnmadd132ps , VexRvm_Lx , V(660F38,9C,_,x,0,0,4,FV ), 0 , 154, 0 , 4881 , 176, 121), // #929
1009 INST(Vfnmadd132sd , VexRvm , V(660F38,9D,_,I,1,1,3,T1S), 0 , 155, 0 , 4894 , 177, 122), // #930
1010 INST(Vfnmadd132ss , VexRvm , V(660F38,9D,_,I,0,0,2,T1S), 0 , 112, 0 , 4907 , 178, 122), // #931
1011 INST(Vfnmadd213pd , VexRvm_Lx , V(660F38,AC,_,x,1,1,4,FV ), 0 , 153, 0 , 4920 , 175, 121), // #932
1012 INST(Vfnmadd213ps , VexRvm_Lx , V(660F38,AC,_,x,0,0,4,FV ), 0 , 154, 0 , 4933 , 176, 121), // #933
1013 INST(Vfnmadd213sd , VexRvm , V(660F38,AD,_,I,1,1,3,T1S), 0 , 155, 0 , 4946 , 177, 122), // #934
1014 INST(Vfnmadd213ss , VexRvm , V(660F38,AD,_,I,0,0,2,T1S), 0 , 112, 0 , 4959 , 178, 122), // #935
1015 INST(Vfnmadd231pd , VexRvm_Lx , V(660F38,BC,_,x,1,1,4,FV ), 0 , 153, 0 , 4972 , 175, 121), // #936
1016 INST(Vfnmadd231ps , VexRvm_Lx , V(660F38,BC,_,x,0,0,4,FV ), 0 , 154, 0 , 4985 , 176, 121), // #937
1017 INST(Vfnmadd231sd , VexRvm , V(660F38,BC,_,I,1,1,3,T1S), 0 , 155, 0 , 4998 , 177, 122), // #938
1018 INST(Vfnmadd231ss , VexRvm , V(660F38,BC,_,I,0,0,2,T1S), 0 , 112, 0 , 5011 , 178, 122), // #939
1019 INST(Vfnmaddpd , Fma4_Lx , V(660F3A,79,_,x,x,_,_,_ ), 0 , 67 , 0 , 5024 , 250, 123), // #940
1020 INST(Vfnmaddps , Fma4_Lx , V(660F3A,78,_,x,x,_,_,_ ), 0 , 67 , 0 , 5034 , 250, 123), // #941
1021 INST(Vfnmaddsd , Fma4 , V(660F3A,7B,_,0,x,_,_,_ ), 0 , 67 , 0 , 5044 , 251, 123), // #942
1022 INST(Vfnmaddss , Fma4 , V(660F3A,7A,_,0,x,_,_,_ ), 0 , 67 , 0 , 5054 , 252, 123), // #943
1023 INST(Vfnmsub132pd , VexRvm_Lx , V(660F38,9E,_,x,1,1,4,FV ), 0 , 153, 0 , 5064 , 175, 121), // #944
1024 INST(Vfnmsub132ps , VexRvm_Lx , V(660F38,9E,_,x,0,0,4,FV ), 0 , 154, 0 , 5077 , 176, 121), // #945
1025 INST(Vfnmsub132sd , VexRvm , V(660F38,9F,_,I,1,1,3,T1S), 0 , 155, 0 , 5090 , 177, 122), // #946
1026 INST(Vfnmsub132ss , VexRvm , V(660F38,9F,_,I,0,0,2,T1S), 0 , 112, 0 , 5103 , 178, 122), // #947
1027 INST(Vfnmsub213pd , VexRvm_Lx , V(660F38,AE,_,x,1,1,4,FV ), 0 , 153, 0 , 5116 , 175, 121), // #948
1028 INST(Vfnmsub213ps , VexRvm_Lx , V(660F38,AE,_,x,0,0,4,FV ), 0 , 154, 0 , 5129 , 176, 121), // #949
1029 INST(Vfnmsub213sd , VexRvm , V(660F38,AF,_,I,1,1,3,T1S), 0 , 155, 0 , 5142 , 177, 122), // #950
1030 INST(Vfnmsub213ss , VexRvm , V(660F38,AF,_,I,0,0,2,T1S), 0 , 112, 0 , 5155 , 178, 122), // #951
1031 INST(Vfnmsub231pd , VexRvm_Lx , V(660F38,BE,_,x,1,1,4,FV ), 0 , 153, 0 , 5168 , 175, 121), // #952
1032 INST(Vfnmsub231ps , VexRvm_Lx , V(660F38,BE,_,x,0,0,4,FV ), 0 , 154, 0 , 5181 , 176, 121), // #953
1033 INST(Vfnmsub231sd , VexRvm , V(660F38,BF,_,I,1,1,3,T1S), 0 , 155, 0 , 5194 , 177, 122), // #954
1034 INST(Vfnmsub231ss , VexRvm , V(660F38,BF,_,I,0,0,2,T1S), 0 , 112, 0 , 5207 , 178, 122), // #955
1035 INST(Vfnmsubpd , Fma4_Lx , V(660F3A,7D,_,x,x,_,_,_ ), 0 , 67 , 0 , 5220 , 250, 123), // #956
1036 INST(Vfnmsubps , Fma4_Lx , V(660F3A,7C,_,x,x,_,_,_ ), 0 , 67 , 0 , 5230 , 250, 123), // #957
1037 INST(Vfnmsubsd , Fma4 , V(660F3A,7F,_,0,x,_,_,_ ), 0 , 67 , 0 , 5240 , 251, 123), // #958
1038 INST(Vfnmsubss , Fma4 , V(660F3A,7E,_,0,x,_,_,_ ), 0 , 67 , 0 , 5250 , 252, 123), // #959
1039 INST(Vfpclasspd , VexRmi_Lx , E(660F3A,66,_,x,_,1,4,FV ), 0 , 100, 0 , 5260 , 253, 114), // #960
1040 INST(Vfpclassps , VexRmi_Lx , E(660F3A,66,_,x,_,0,4,FV ), 0 , 99 , 0 , 5271 , 254, 114), // #961
1041 INST(Vfpclasssd , VexRmi_Lx , E(660F3A,67,_,I,_,1,3,T1S), 0 , 151, 0 , 5282 , 255, 61 ), // #962
1042 INST(Vfpclassss , VexRmi_Lx , E(660F3A,67,_,I,_,0,2,T1S), 0 , 152, 0 , 5293 , 256, 61 ), // #963
1043 INST(Vfrczpd , VexRm_Lx , V(XOP_M9,81,_,x,0,_,_,_ ), 0 , 72 , 0 , 5304 , 257, 124), // #964
1044 INST(Vfrczps , VexRm_Lx , V(XOP_M9,80,_,x,0,_,_,_ ), 0 , 72 , 0 , 5312 , 257, 124), // #965
1045 INST(Vfrczsd , VexRm , V(XOP_M9,83,_,0,0,_,_,_ ), 0 , 72 , 0 , 5320 , 258, 124), // #966
1046 INST(Vfrczss , VexRm , V(XOP_M9,82,_,0,0,_,_,_ ), 0 , 72 , 0 , 5328 , 259, 124), // #967
1047 INST(Vgatherdpd , VexRmvRm_VM , V(660F38,92,_,x,1,_,_,_ ), V(660F38,92,_,x,_,1,3,T1S), 156, 79 , 5336 , 260, 125), // #968
1048 INST(Vgatherdps , VexRmvRm_VM , V(660F38,92,_,x,0,_,_,_ ), V(660F38,92,_,x,_,0,2,T1S), 88 , 80 , 5347 , 261, 125), // #969
1049 INST(Vgatherpf0dpd , VexM_VM , E(660F38,C6,1,2,_,1,3,T1S), 0 , 157, 0 , 5358 , 262, 126), // #970
1050 INST(Vgatherpf0dps , VexM_VM , E(660F38,C6,1,2,_,0,2,T1S), 0 , 158, 0 , 5372 , 263, 126), // #971
1051 INST(Vgatherpf0qpd , VexM_VM , E(660F38,C7,1,2,_,1,3,T1S), 0 , 157, 0 , 5386 , 264, 126), // #972
1052 INST(Vgatherpf0qps , VexM_VM , E(660F38,C7,1,2,_,0,2,T1S), 0 , 158, 0 , 5400 , 264, 126), // #973
1053 INST(Vgatherpf1dpd , VexM_VM , E(660F38,C6,2,2,_,1,3,T1S), 0 , 159, 0 , 5414 , 262, 126), // #974
1054 INST(Vgatherpf1dps , VexM_VM , E(660F38,C6,2,2,_,0,2,T1S), 0 , 160, 0 , 5428 , 263, 126), // #975
1055 INST(Vgatherpf1qpd , VexM_VM , E(660F38,C7,2,2,_,1,3,T1S), 0 , 159, 0 , 5442 , 264, 126), // #976
1056 INST(Vgatherpf1qps , VexM_VM , E(660F38,C7,2,2,_,0,2,T1S), 0 , 160, 0 , 5456 , 264, 126), // #977
1057 INST(Vgatherqpd , VexRmvRm_VM , V(660F38,93,_,x,1,_,_,_ ), V(660F38,93,_,x,_,1,3,T1S), 156, 81 , 5470 , 265, 125), // #978
1058 INST(Vgatherqps , VexRmvRm_VM , V(660F38,93,_,x,0,_,_,_ ), V(660F38,93,_,x,_,0,2,T1S), 88 , 82 , 5481 , 266, 125), // #979
1059 INST(Vgetexppd , VexRm_Lx , E(660F38,42,_,x,_,1,4,FV ), 0 , 103, 0 , 5492 , 226, 111), // #980
1060 INST(Vgetexpps , VexRm_Lx , E(660F38,42,_,x,_,0,4,FV ), 0 , 102, 0 , 5502 , 230, 111), // #981
1061 INST(Vgetexpsd , VexRvm , E(660F38,43,_,I,_,1,3,T1S), 0 , 115, 0 , 5512 , 267, 63 ), // #982
1062 INST(Vgetexpss , VexRvm , E(660F38,43,_,I,_,0,2,T1S), 0 , 116, 0 , 5522 , 268, 63 ), // #983
1063 INST(Vgetmantpd , VexRmi_Lx , E(660F3A,26,_,x,_,1,4,FV ), 0 , 100, 0 , 5532 , 269, 111), // #984
1064 INST(Vgetmantps , VexRmi_Lx , E(660F3A,26,_,x,_,0,4,FV ), 0 , 99 , 0 , 5543 , 270, 111), // #985
1065 INST(Vgetmantsd , VexRvmi , E(660F3A,27,_,I,_,1,3,T1S), 0 , 151, 0 , 5554 , 248, 63 ), // #986
1066 INST(Vgetmantss , VexRvmi , E(660F3A,27,_,I,_,0,2,T1S), 0 , 152, 0 , 5565 , 249, 63 ), // #987
1067 INST(Vgf2p8affineinvqb, VexRvmi_Lx , V(660F3A,CF,_,x,1,1,4,FV ), 0 , 161, 0 , 5576 , 271, 127), // #988
1068 INST(Vgf2p8affineqb , VexRvmi_Lx , V(660F3A,CE,_,x,1,1,4,FV ), 0 , 161, 0 , 5594 , 271, 127), // #989
1069 INST(Vgf2p8mulb , VexRvm_Lx , V(660F38,CF,_,x,0,0,4,FV ), 0 , 154, 0 , 5609 , 272, 127), // #990
1070 INST(Vhaddpd , VexRvm_Lx , V(660F00,7C,_,x,I,_,_,_ ), 0 , 63 , 0 , 5620 , 179, 108), // #991
1071 INST(Vhaddps , VexRvm_Lx , V(F20F00,7C,_,x,I,_,_,_ ), 0 , 97 , 0 , 5628 , 179, 108), // #992
1072 INST(Vhsubpd , VexRvm_Lx , V(660F00,7D,_,x,I,_,_,_ ), 0 , 63 , 0 , 5636 , 179, 108), // #993
1073 INST(Vhsubps , VexRvm_Lx , V(F20F00,7D,_,x,I,_,_,_ ), 0 , 97 , 0 , 5644 , 179, 108), // #994
1074 INST(Vinsertf128 , VexRvmi , V(660F3A,18,_,1,0,_,_,_ ), 0 , 145, 0 , 5652 , 273, 108), // #995
1075 INST(Vinsertf32x4 , VexRvmi_Lx , E(660F3A,18,_,x,_,0,4,T4 ), 0 , 146, 0 , 5664 , 274, 111), // #996
1076 INST(Vinsertf32x8 , VexRvmi , E(660F3A,1A,_,2,_,0,5,T8 ), 0 , 147, 0 , 5677 , 275, 61 ), // #997
1077 INST(Vinsertf64x2 , VexRvmi_Lx , E(660F3A,18,_,x,_,1,4,T2 ), 0 , 148, 0 , 5690 , 274, 114), // #998
1078 INST(Vinsertf64x4 , VexRvmi , E(660F3A,1A,_,2,_,1,5,T4 ), 0 , 149, 0 , 5703 , 275, 63 ), // #999
1079 INST(Vinserti128 , VexRvmi , V(660F3A,38,_,1,0,_,_,_ ), 0 , 145, 0 , 5716 , 273, 115), // #1000
1080 INST(Vinserti32x4 , VexRvmi_Lx , E(660F3A,38,_,x,_,0,4,T4 ), 0 , 146, 0 , 5728 , 274, 111), // #1001
1081 INST(Vinserti32x8 , VexRvmi , E(660F3A,3A,_,2,_,0,5,T8 ), 0 , 147, 0 , 5741 , 275, 61 ), // #1002
1082 INST(Vinserti64x2 , VexRvmi_Lx , E(660F3A,38,_,x,_,1,4,T2 ), 0 , 148, 0 , 5754 , 274, 114), // #1003
1083 INST(Vinserti64x4 , VexRvmi , E(660F3A,3A,_,2,_,1,5,T4 ), 0 , 149, 0 , 5767 , 275, 63 ), // #1004
1084 INST(Vinsertps , VexRvmi , V(660F3A,21,_,0,I,0,2,T1S), 0 , 150, 0 , 5780 , 276, 107), // #1005
1085 INST(Vlddqu , VexRm_Lx , V(F20F00,F0,_,x,I,_,_,_ ), 0 , 97 , 0 , 5790 , 277, 108), // #1006
1086 INST(Vldmxcsr , VexM , V(000F00,AE,2,0,I,_,_,_ ), 0 , 162, 0 , 5797 , 278, 108), // #1007
1087 INST(Vmaskmovdqu , VexRm_ZDI , V(660F00,F7,_,0,I,_,_,_ ), 0 , 63 , 0 , 5806 , 279, 108), // #1008
1088 INST(Vmaskmovpd , VexRvmMvr_Lx , V(660F38,2D,_,x,0,_,_,_ ), V(660F38,2F,_,x,0,_,_,_ ), 88 , 83 , 5818 , 280, 108), // #1009
1089 INST(Vmaskmovps , VexRvmMvr_Lx , V(660F38,2C,_,x,0,_,_,_ ), V(660F38,2E,_,x,0,_,_,_ ), 88 , 84 , 5829 , 280, 108), // #1010
1090 INST(Vmaxpd , VexRvm_Lx , V(660F00,5F,_,x,I,1,4,FV ), 0 , 93 , 0 , 5840 , 281, 106), // #1011
1091 INST(Vmaxps , VexRvm_Lx , V(000F00,5F,_,x,I,0,4,FV ), 0 , 94 , 0 , 5847 , 282, 106), // #1012
1092 INST(Vmaxsd , VexRvm , V(F20F00,5F,_,I,I,1,3,T1S), 0 , 95 , 0 , 5854 , 283, 106), // #1013
1093 INST(Vmaxss , VexRvm , V(F30F00,5F,_,I,I,0,2,T1S), 0 , 96 , 0 , 5861 , 222, 106), // #1014
1094 INST(Vmcall , X86Op , O(000F01,C1,_,_,_,_,_,_ ), 0 , 21 , 0 , 5868 , 30 , 53 ), // #1015
1095 INST(Vmclear , X86M_Only , O(660F00,C7,6,_,_,_,_,_ ), 0 , 24 , 0 , 5875 , 284, 53 ), // #1016
1096 INST(Vmfunc , X86Op , O(000F01,D4,_,_,_,_,_,_ ), 0 , 21 , 0 , 5883 , 30 , 53 ), // #1017
1097 INST(Vminpd , VexRvm_Lx , V(660F00,5D,_,x,I,1,4,FV ), 0 , 93 , 0 , 5890 , 281, 106), // #1018
1098 INST(Vminps , VexRvm_Lx , V(000F00,5D,_,x,I,0,4,FV ), 0 , 94 , 0 , 5897 , 282, 106), // #1019
1099 INST(Vminsd , VexRvm , V(F20F00,5D,_,I,I,1,3,T1S), 0 , 95 , 0 , 5904 , 283, 106), // #1020
1100 INST(Vminss , VexRvm , V(F30F00,5D,_,I,I,0,2,T1S), 0 , 96 , 0 , 5911 , 222, 106), // #1021
1101 INST(Vmlaunch , X86Op , O(000F01,C2,_,_,_,_,_,_ ), 0 , 21 , 0 , 5918 , 30 , 53 ), // #1022
1102 INST(Vmload , X86Op_xAX , O(000F01,DA,_,_,_,_,_,_ ), 0 , 21 , 0 , 5927 , 285, 22 ), // #1023
1103 INST(Vmmcall , X86Op , O(000F01,D9,_,_,_,_,_,_ ), 0 , 21 , 0 , 5934 , 30 , 22 ), // #1024
1104 INST(Vmovapd , VexRmMr_Lx , V(660F00,28,_,x,I,1,4,FVM), V(660F00,29,_,x,I,1,4,FVM), 163, 85 , 5942 , 286, 106), // #1025
1105 INST(Vmovaps , VexRmMr_Lx , V(000F00,28,_,x,I,0,4,FVM), V(000F00,29,_,x,I,0,4,FVM), 164, 86 , 5950 , 286, 106), // #1026
1106 INST(Vmovd , VexMovdMovq , V(660F00,6E,_,0,0,0,2,T1S), V(660F00,7E,_,0,0,0,2,T1S), 165, 87 , 5958 , 287, 107), // #1027
1107 INST(Vmovddup , VexRm_Lx , V(F20F00,12,_,x,I,1,3,DUP), 0 , 166, 0 , 5964 , 288, 106), // #1028
1108 INST(Vmovdqa , VexRmMr_Lx , V(660F00,6F,_,x,I,_,_,_ ), V(660F00,7F,_,x,I,_,_,_ ), 63 , 88 , 5973 , 289, 108), // #1029
1109 INST(Vmovdqa32 , VexRmMr_Lx , E(660F00,6F,_,x,_,0,4,FVM), E(660F00,7F,_,x,_,0,4,FVM), 167, 89 , 5981 , 290, 111), // #1030
1110 INST(Vmovdqa64 , VexRmMr_Lx , E(660F00,6F,_,x,_,1,4,FVM), E(660F00,7F,_,x,_,1,4,FVM), 168, 90 , 5991 , 290, 111), // #1031
1111 INST(Vmovdqu , VexRmMr_Lx , V(F30F00,6F,_,x,I,_,_,_ ), V(F30F00,7F,_,x,I,_,_,_ ), 169, 91 , 6001 , 289, 108), // #1032
1112 INST(Vmovdqu16 , VexRmMr_Lx , E(F20F00,6F,_,x,_,1,4,FVM), E(F20F00,7F,_,x,_,1,4,FVM), 170, 92 , 6009 , 290, 113), // #1033
1113 INST(Vmovdqu32 , VexRmMr_Lx , E(F30F00,6F,_,x,_,0,4,FVM), E(F30F00,7F,_,x,_,0,4,FVM), 171, 93 , 6019 , 290, 111), // #1034
1114 INST(Vmovdqu64 , VexRmMr_Lx , E(F30F00,6F,_,x,_,1,4,FVM), E(F30F00,7F,_,x,_,1,4,FVM), 172, 94 , 6029 , 290, 111), // #1035
1115 INST(Vmovdqu8 , VexRmMr_Lx , E(F20F00,6F,_,x,_,0,4,FVM), E(F20F00,7F,_,x,_,0,4,FVM), 173, 95 , 6039 , 290, 113), // #1036
1116 INST(Vmovhlps , VexRvm , V(000F00,12,_,0,I,0,_,_ ), 0 , 66 , 0 , 6048 , 291, 107), // #1037
1117 INST(Vmovhpd , VexRvmMr , V(660F00,16,_,0,I,1,3,T1S), V(660F00,17,_,0,I,1,3,T1S), 113, 96 , 6057 , 292, 107), // #1038
1118 INST(Vmovhps , VexRvmMr , V(000F00,16,_,0,I,0,3,T2 ), V(000F00,17,_,0,I,0,3,T2 ), 174, 97 , 6065 , 292, 107), // #1039
1119 INST(Vmovlhps , VexRvm , V(000F00,16,_,0,I,0,_,_ ), 0 , 66 , 0 , 6073 , 291, 107), // #1040
1120 INST(Vmovlpd , VexRvmMr , V(660F00,12,_,0,I,1,3,T1S), V(660F00,13,_,0,I,1,3,T1S), 113, 98 , 6082 , 292, 107), // #1041
1121 INST(Vmovlps , VexRvmMr , V(000F00,12,_,0,I,0,3,T2 ), V(000F00,13,_,0,I,0,3,T2 ), 174, 99 , 6090 , 292, 107), // #1042
1122 INST(Vmovmskpd , VexRm_Lx , V(660F00,50,_,x,I,_,_,_ ), 0 , 63 , 0 , 6098 , 293, 108), // #1043
1123 INST(Vmovmskps , VexRm_Lx , V(000F00,50,_,x,I,_,_,_ ), 0 , 66 , 0 , 6108 , 293, 108), // #1044
1124 INST(Vmovntdq , VexMr_Lx , V(660F00,E7,_,x,I,0,4,FVM), 0 , 175, 0 , 6118 , 294, 106), // #1045
1125 INST(Vmovntdqa , VexRm_Lx , V(660F38,2A,_,x,I,0,4,FVM), 0 , 98 , 0 , 6127 , 295, 116), // #1046
1126 INST(Vmovntpd , VexMr_Lx , V(660F00,2B,_,x,I,1,4,FVM), 0 , 163, 0 , 6137 , 294, 106), // #1047
1127 INST(Vmovntps , VexMr_Lx , V(000F00,2B,_,x,I,0,4,FVM), 0 , 164, 0 , 6146 , 294, 106), // #1048
1128 INST(Vmovq , VexMovdMovq , V(660F00,6E,_,0,I,1,3,T1S), V(660F00,7E,_,0,I,1,3,T1S), 113, 100, 6155 , 296, 107), // #1049
1129 INST(Vmovsd , VexMovssMovsd , V(F20F00,10,_,I,I,1,3,T1S), V(F20F00,11,_,I,I,1,3,T1S), 95 , 101, 6161 , 297, 107), // #1050
1130 INST(Vmovshdup , VexRm_Lx , V(F30F00,16,_,x,I,0,4,FVM), 0 , 176, 0 , 6168 , 298, 106), // #1051
1131 INST(Vmovsldup , VexRm_Lx , V(F30F00,12,_,x,I,0,4,FVM), 0 , 176, 0 , 6178 , 298, 106), // #1052
1132 INST(Vmovss , VexMovssMovsd , V(F30F00,10,_,I,I,0,2,T1S), V(F30F00,11,_,I,I,0,2,T1S), 96 , 102, 6188 , 299, 107), // #1053
1133 INST(Vmovupd , VexRmMr_Lx , V(660F00,10,_,x,I,1,4,FVM), V(660F00,11,_,x,I,1,4,FVM), 163, 103, 6195 , 286, 106), // #1054
1134 INST(Vmovups , VexRmMr_Lx , V(000F00,10,_,x,I,0,4,FVM), V(000F00,11,_,x,I,0,4,FVM), 164, 104, 6203 , 286, 106), // #1055
1135 INST(Vmpsadbw , VexRvmi_Lx , V(660F3A,42,_,x,I,_,_,_ ), 0 , 67 , 0 , 6211 , 192, 128), // #1056
1136 INST(Vmptrld , X86M_Only , O(000F00,C7,6,_,_,_,_,_ ), 0 , 73 , 0 , 6220 , 284, 53 ), // #1057
1137 INST(Vmptrst , X86M_Only , O(000F00,C7,7,_,_,_,_,_ ), 0 , 22 , 0 , 6228 , 284, 53 ), // #1058
1138 INST(Vmread , X86Mr_NoSize , O(000F00,78,_,_,_,_,_,_ ), 0 , 4 , 0 , 6236 , 300, 53 ), // #1059
1139 INST(Vmresume , X86Op , O(000F01,C3,_,_,_,_,_,_ ), 0 , 21 , 0 , 6243 , 30 , 53 ), // #1060
1140 INST(Vmrun , X86Op_xAX , O(000F01,D8,_,_,_,_,_,_ ), 0 , 21 , 0 , 6252 , 285, 22 ), // #1061
1141 INST(Vmsave , X86Op_xAX , O(000F01,DB,_,_,_,_,_,_ ), 0 , 21 , 0 , 6258 , 285, 22 ), // #1062
1142 INST(Vmulpd , VexRvm_Lx , V(660F00,59,_,x,I,1,4,FV ), 0 , 93 , 0 , 6265 , 175, 106), // #1063
1143 INST(Vmulps , VexRvm_Lx , V(000F00,59,_,x,I,0,4,FV ), 0 , 94 , 0 , 6272 , 176, 106), // #1064
1144 INST(Vmulsd , VexRvm_Lx , V(F20F00,59,_,I,I,1,3,T1S), 0 , 95 , 0 , 6279 , 177, 107), // #1065
1145 INST(Vmulss , VexRvm_Lx , V(F30F00,59,_,I,I,0,2,T1S), 0 , 96 , 0 , 6286 , 178, 107), // #1066
1146 INST(Vmwrite , X86Rm_NoSize , O(000F00,79,_,_,_,_,_,_ ), 0 , 4 , 0 , 6293 , 301, 53 ), // #1067
1147 INST(Vmxon , X86M_Only , O(F30F00,C7,6,_,_,_,_,_ ), 0 , 177, 0 , 6301 , 284, 53 ), // #1068
1148 INST(Vorpd , VexRvm_Lx , V(660F00,56,_,x,I,1,4,FV ), 0 , 93 , 0 , 6307 , 187, 112), // #1069
1149 INST(Vorps , VexRvm_Lx , V(000F00,56,_,x,I,0,4,FV ), 0 , 94 , 0 , 6313 , 188, 112), // #1070
1150 INST(Vp4dpwssd , VexRm_T1_4X , E(F20F38,52,_,2,_,0,2,T4X), 0 , 92 , 0 , 6319 , 173, 129), // #1071
1151 INST(Vp4dpwssds , VexRm_T1_4X , E(F20F38,53,_,2,_,0,2,T4X), 0 , 92 , 0 , 6329 , 173, 129), // #1072
1152 INST(Vpabsb , VexRm_Lx , V(660F38,1C,_,x,I,_,4,FVM), 0 , 98 , 0 , 6340 , 298, 130), // #1073
1153 INST(Vpabsd , VexRm_Lx , V(660F38,1E,_,x,I,0,4,FV ), 0 , 154, 0 , 6347 , 298, 116), // #1074
1154 INST(Vpabsq , VexRm_Lx , E(660F38,1F,_,x,_,1,4,FV ), 0 , 103, 0 , 6354 , 241, 111), // #1075
1155 INST(Vpabsw , VexRm_Lx , V(660F38,1D,_,x,I,_,4,FVM), 0 , 98 , 0 , 6361 , 298, 130), // #1076
1156 INST(Vpackssdw , VexRvm_Lx , V(660F00,6B,_,x,I,0,4,FV ), 0 , 124, 0 , 6368 , 186, 130), // #1077
1157 INST(Vpacksswb , VexRvm_Lx , V(660F00,63,_,x,I,I,4,FVM), 0 , 175, 0 , 6378 , 272, 130), // #1078
1158 INST(Vpackusdw , VexRvm_Lx , V(660F38,2B,_,x,I,0,4,FV ), 0 , 154, 0 , 6388 , 186, 130), // #1079
1159 INST(Vpackuswb , VexRvm_Lx , V(660F00,67,_,x,I,I,4,FVM), 0 , 175, 0 , 6398 , 272, 130), // #1080
1160 INST(Vpaddb , VexRvm_Lx , V(660F00,FC,_,x,I,I,4,FVM), 0 , 175, 0 , 6408 , 272, 130), // #1081
1161 INST(Vpaddd , VexRvm_Lx , V(660F00,FE,_,x,I,0,4,FV ), 0 , 124, 0 , 6415 , 186, 116), // #1082
1162 INST(Vpaddq , VexRvm_Lx , V(660F00,D4,_,x,I,1,4,FV ), 0 , 93 , 0 , 6422 , 185, 116), // #1083
1163 INST(Vpaddsb , VexRvm_Lx , V(660F00,EC,_,x,I,I,4,FVM), 0 , 175, 0 , 6429 , 272, 130), // #1084
1164 INST(Vpaddsw , VexRvm_Lx , V(660F00,ED,_,x,I,I,4,FVM), 0 , 175, 0 , 6437 , 272, 130), // #1085
1165 INST(Vpaddusb , VexRvm_Lx , V(660F00,DC,_,x,I,I,4,FVM), 0 , 175, 0 , 6445 , 272, 130), // #1086
1166 INST(Vpaddusw , VexRvm_Lx , V(660F00,DD,_,x,I,I,4,FVM), 0 , 175, 0 , 6454 , 272, 130), // #1087
1167 INST(Vpaddw , VexRvm_Lx , V(660F00,FD,_,x,I,I,4,FVM), 0 , 175, 0 , 6463 , 272, 130), // #1088
1168 INST(Vpalignr , VexRvmi_Lx , V(660F3A,0F,_,x,I,I,4,FVM), 0 , 178, 0 , 6470 , 271, 130), // #1089
1169 INST(Vpand , VexRvm_Lx , V(660F00,DB,_,x,I,_,_,_ ), 0 , 63 , 0 , 6479 , 302, 128), // #1090
1170 INST(Vpandd , VexRvm_Lx , E(660F00,DB,_,x,_,0,4,FV ), 0 , 179, 0 , 6485 , 303, 111), // #1091
1171 INST(Vpandn , VexRvm_Lx , V(660F00,DF,_,x,I,_,_,_ ), 0 , 63 , 0 , 6492 , 304, 128), // #1092
1172 INST(Vpandnd , VexRvm_Lx , E(660F00,DF,_,x,_,0,4,FV ), 0 , 179, 0 , 6499 , 305, 111), // #1093
1173 INST(Vpandnq , VexRvm_Lx , E(660F00,DF,_,x,_,1,4,FV ), 0 , 121, 0 , 6507 , 306, 111), // #1094
1174 INST(Vpandq , VexRvm_Lx , E(660F00,DB,_,x,_,1,4,FV ), 0 , 121, 0 , 6515 , 307, 111), // #1095
1175 INST(Vpavgb , VexRvm_Lx , V(660F00,E0,_,x,I,I,4,FVM), 0 , 175, 0 , 6522 , 272, 130), // #1096
1176 INST(Vpavgw , VexRvm_Lx , V(660F00,E3,_,x,I,I,4,FVM), 0 , 175, 0 , 6529 , 272, 130), // #1097
1177 INST(Vpblendd , VexRvmi_Lx , V(660F3A,02,_,x,0,_,_,_ ), 0 , 67 , 0 , 6536 , 192, 115), // #1098
1178 INST(Vpblendvb , VexRvmr , V(660F3A,4C,_,x,0,_,_,_ ), 0 , 67 , 0 , 6545 , 193, 128), // #1099
1179 INST(Vpblendw , VexRvmi_Lx , V(660F3A,0E,_,x,I,_,_,_ ), 0 , 67 , 0 , 6555 , 192, 128), // #1100
1180 INST(Vpbroadcastb , VexRm_Lx_Bcst , V(660F38,78,_,x,0,0,0,T1S), E(660F38,7A,_,x,0,0,0,T1S), 180, 105, 6564 , 308, 131), // #1101
1181 INST(Vpbroadcastd , VexRm_Lx_Bcst , V(660F38,58,_,x,0,0,2,T1S), E(660F38,7C,_,x,0,0,0,T1S), 112, 106, 6577 , 309, 125), // #1102
1182 INST(Vpbroadcastmb2d , VexRm_Lx , E(F30F38,3A,_,x,_,0,_,_ ), 0 , 119, 0 , 6590 , 310, 132), // #1103
1183 INST(Vpbroadcastmb2q , VexRm_Lx , E(F30F38,2A,_,x,_,1,_,_ ), 0 , 181, 0 , 6606 , 310, 132), // #1104
1184 INST(Vpbroadcastq , VexRm_Lx_Bcst , V(660F38,59,_,x,0,1,3,T1S), E(660F38,7C,_,x,0,1,0,T1S), 111, 107, 6622 , 311, 125), // #1105
1185 INST(Vpbroadcastw , VexRm_Lx_Bcst , V(660F38,79,_,x,0,0,1,T1S), E(660F38,7B,_,x,0,0,0,T1S), 182, 108, 6635 , 312, 131), // #1106
1186 INST(Vpclmulqdq , VexRvmi_Lx , V(660F3A,44,_,x,I,_,4,FVM), 0 , 178, 0 , 6648 , 313, 133), // #1107
1187 INST(Vpcmov , VexRvrmRvmr_Lx , V(XOP_M8,A2,_,x,x,_,_,_ ), 0 , 183, 0 , 6659 , 250, 124), // #1108
1188 INST(Vpcmpb , VexRvmi_Lx , E(660F3A,3F,_,x,_,0,4,FVM), 0 , 142, 0 , 6666 , 314, 113), // #1109
1189 INST(Vpcmpd , VexRvmi_Lx , E(660F3A,1F,_,x,_,0,4,FV ), 0 , 99 , 0 , 6673 , 315, 111), // #1110
1190 INST(Vpcmpeqb , VexRvm_Lx , V(660F00,74,_,x,I,I,4,FV ), 0 , 124, 0 , 6680 , 316, 130), // #1111
1191 INST(Vpcmpeqd , VexRvm_Lx , V(660F00,76,_,x,I,0,4,FVM), 0 , 175, 0 , 6689 , 317, 116), // #1112
1192 INST(Vpcmpeqq , VexRvm_Lx , V(660F38,29,_,x,I,1,4,FVM), 0 , 184, 0 , 6698 , 318, 116), // #1113
1193 INST(Vpcmpeqw , VexRvm_Lx , V(660F00,75,_,x,I,I,4,FV ), 0 , 124, 0 , 6707 , 316, 130), // #1114
1194 INST(Vpcmpestri , VexRmi , V(660F3A,61,_,0,I,_,_,_ ), 0 , 67 , 0 , 6716 , 319, 134), // #1115
1195 INST(Vpcmpestrm , VexRmi , V(660F3A,60,_,0,I,_,_,_ ), 0 , 67 , 0 , 6727 , 320, 134), // #1116
1196 INST(Vpcmpgtb , VexRvm_Lx , V(660F00,64,_,x,I,I,4,FV ), 0 , 124, 0 , 6738 , 316, 130), // #1117
1197 INST(Vpcmpgtd , VexRvm_Lx , V(660F00,66,_,x,I,0,4,FVM), 0 , 175, 0 , 6747 , 317, 116), // #1118
1198 INST(Vpcmpgtq , VexRvm_Lx , V(660F38,37,_,x,I,1,4,FVM), 0 , 184, 0 , 6756 , 318, 116), // #1119
1199 INST(Vpcmpgtw , VexRvm_Lx , V(660F00,65,_,x,I,I,4,FV ), 0 , 124, 0 , 6765 , 316, 130), // #1120
1200 INST(Vpcmpistri , VexRmi , V(660F3A,63,_,0,I,_,_,_ ), 0 , 67 , 0 , 6774 , 321, 134), // #1121
1201 INST(Vpcmpistrm , VexRmi , V(660F3A,62,_,0,I,_,_,_ ), 0 , 67 , 0 , 6785 , 322, 134), // #1122
1202 INST(Vpcmpq , VexRvmi_Lx , E(660F3A,1F,_,x,_,1,4,FV ), 0 , 100, 0 , 6796 , 323, 111), // #1123
1203 INST(Vpcmpub , VexRvmi_Lx , E(660F3A,3E,_,x,_,0,4,FVM), 0 , 142, 0 , 6803 , 314, 113), // #1124
1204 INST(Vpcmpud , VexRvmi_Lx , E(660F3A,1E,_,x,_,0,4,FV ), 0 , 99 , 0 , 6811 , 315, 111), // #1125
1205 INST(Vpcmpuq , VexRvmi_Lx , E(660F3A,1E,_,x,_,1,4,FV ), 0 , 100, 0 , 6819 , 323, 111), // #1126
1206 INST(Vpcmpuw , VexRvmi_Lx , E(660F3A,3E,_,x,_,1,4,FVM), 0 , 185, 0 , 6827 , 323, 113), // #1127
1207 INST(Vpcmpw , VexRvmi_Lx , E(660F3A,3F,_,x,_,1,4,FVM), 0 , 185, 0 , 6835 , 323, 113), // #1128
1208 INST(Vpcomb , VexRvmi , V(XOP_M8,CC,_,0,0,_,_,_ ), 0 , 183, 0 , 6842 , 238, 124), // #1129
1209 INST(Vpcomd , VexRvmi , V(XOP_M8,CE,_,0,0,_,_,_ ), 0 , 183, 0 , 6849 , 238, 124), // #1130
1210 INST(Vpcompressb , VexMr_Lx , E(660F38,63,_,x,_,0,0,T1S), 0 , 186, 0 , 6856 , 207, 135), // #1131
1211 INST(Vpcompressd , VexMr_Lx , E(660F38,8B,_,x,_,0,2,T1S), 0 , 116, 0 , 6868 , 207, 111), // #1132
1212 INST(Vpcompressq , VexMr_Lx , E(660F38,8B,_,x,_,1,3,T1S), 0 , 115, 0 , 6880 , 207, 111), // #1133
1213 INST(Vpcompressw , VexMr_Lx , E(660F38,63,_,x,_,1,1,T1S), 0 , 187, 0 , 6892 , 207, 135), // #1134
1214 INST(Vpcomq , VexRvmi , V(XOP_M8,CF,_,0,0,_,_,_ ), 0 , 183, 0 , 6904 , 238, 124), // #1135
1215 INST(Vpcomub , VexRvmi , V(XOP_M8,EC,_,0,0,_,_,_ ), 0 , 183, 0 , 6911 , 238, 124), // #1136
1216 INST(Vpcomud , VexRvmi , V(XOP_M8,EE,_,0,0,_,_,_ ), 0 , 183, 0 , 6919 , 238, 124), // #1137
1217 INST(Vpcomuq , VexRvmi , V(XOP_M8,EF,_,0,0,_,_,_ ), 0 , 183, 0 , 6927 , 238, 124), // #1138
1218 INST(Vpcomuw , VexRvmi , V(XOP_M8,ED,_,0,0,_,_,_ ), 0 , 183, 0 , 6935 , 238, 124), // #1139
1219 INST(Vpcomw , VexRvmi , V(XOP_M8,CD,_,0,0,_,_,_ ), 0 , 183, 0 , 6943 , 238, 124), // #1140
1220 INST(Vpconflictd , VexRm_Lx , E(660F38,C4,_,x,_,0,4,FV ), 0 , 102, 0 , 6950 , 324, 132), // #1141
1221 INST(Vpconflictq , VexRm_Lx , E(660F38,C4,_,x,_,1,4,FV ), 0 , 103, 0 , 6962 , 324, 132), // #1142
1222 INST(Vpdpbusd , VexRvm_Lx , E(660F38,50,_,x,_,0,4,FV ), 0 , 102, 0 , 6974 , 190, 136), // #1143
1223 INST(Vpdpbusds , VexRvm_Lx , E(660F38,51,_,x,_,0,4,FV ), 0 , 102, 0 , 6983 , 190, 136), // #1144
1224 INST(Vpdpwssd , VexRvm_Lx , E(660F38,52,_,x,_,0,4,FV ), 0 , 102, 0 , 6993 , 190, 136), // #1145
1225 INST(Vpdpwssds , VexRvm_Lx , E(660F38,53,_,x,_,0,4,FV ), 0 , 102, 0 , 7002 , 190, 136), // #1146
1226 INST(Vperm2f128 , VexRvmi , V(660F3A,06,_,1,0,_,_,_ ), 0 , 145, 0 , 7012 , 325, 108), // #1147
1227 INST(Vperm2i128 , VexRvmi , V(660F3A,46,_,1,0,_,_,_ ), 0 , 145, 0 , 7023 , 325, 115), // #1148
1228 INST(Vpermb , VexRvm_Lx , E(660F38,8D,_,x,_,0,4,FVM), 0 , 101, 0 , 7034 , 189, 137), // #1149
1229 INST(Vpermd , VexRvm_Lx , V(660F38,36,_,x,0,0,4,FV ), 0 , 154, 0 , 7041 , 326, 125), // #1150
1230 INST(Vpermi2b , VexRvm_Lx , E(660F38,75,_,x,_,0,4,FVM), 0 , 101, 0 , 7048 , 189, 137), // #1151
1231 INST(Vpermi2d , VexRvm_Lx , E(660F38,76,_,x,_,0,4,FV ), 0 , 102, 0 , 7057 , 190, 111), // #1152
1232 INST(Vpermi2pd , VexRvm_Lx , E(660F38,77,_,x,_,1,4,FV ), 0 , 103, 0 , 7066 , 191, 111), // #1153
1233 INST(Vpermi2ps , VexRvm_Lx , E(660F38,77,_,x,_,0,4,FV ), 0 , 102, 0 , 7076 , 190, 111), // #1154
1234 INST(Vpermi2q , VexRvm_Lx , E(660F38,76,_,x,_,1,4,FV ), 0 , 103, 0 , 7086 , 191, 111), // #1155
1235 INST(Vpermi2w , VexRvm_Lx , E(660F38,75,_,x,_,1,4,FVM), 0 , 104, 0 , 7095 , 189, 113), // #1156
1236 INST(Vpermil2pd , VexRvrmiRvmri_Lx , V(660F3A,49,_,x,x,_,_,_ ), 0 , 67 , 0 , 7104 , 327, 124), // #1157
1237 INST(Vpermil2ps , VexRvrmiRvmri_Lx , V(660F3A,48,_,x,x,_,_,_ ), 0 , 67 , 0 , 7115 , 327, 124), // #1158
1238 INST(Vpermilpd , VexRvmRmi_Lx , V(660F38,0D,_,x,0,1,4,FV ), V(660F3A,05,_,x,0,1,4,FV ), 188, 109, 7126 , 328, 106), // #1159
1239 INST(Vpermilps , VexRvmRmi_Lx , V(660F38,0C,_,x,0,0,4,FV ), V(660F3A,04,_,x,0,0,4,FV ), 154, 110, 7136 , 328, 106), // #1160
1240 INST(Vpermpd , VexRvmRmi_Lx , E(660F38,16,_,x,1,1,4,FV ), V(660F3A,01,_,x,1,1,4,FV ), 189, 111, 7146 , 329, 125), // #1161
1241 INST(Vpermps , VexRvm_Lx , V(660F38,16,_,x,0,0,4,FV ), 0 , 154, 0 , 7154 , 326, 125), // #1162
1242 INST(Vpermq , VexRvmRmi_Lx , V(660F38,36,_,x,_,1,4,FV ), V(660F3A,00,_,x,1,1,4,FV ), 188, 112, 7162 , 329, 125), // #1163
1243 INST(Vpermt2b , VexRvm_Lx , E(660F38,7D,_,x,_,0,4,FVM), 0 , 101, 0 , 7169 , 189, 137), // #1164
1244 INST(Vpermt2d , VexRvm_Lx , E(660F38,7E,_,x,_,0,4,FV ), 0 , 102, 0 , 7178 , 190, 111), // #1165
1245 INST(Vpermt2pd , VexRvm_Lx , E(660F38,7F,_,x,_,1,4,FV ), 0 , 103, 0 , 7187 , 191, 111), // #1166
1246 INST(Vpermt2ps , VexRvm_Lx , E(660F38,7F,_,x,_,0,4,FV ), 0 , 102, 0 , 7197 , 190, 111), // #1167
1247 INST(Vpermt2q , VexRvm_Lx , E(660F38,7E,_,x,_,1,4,FV ), 0 , 103, 0 , 7207 , 191, 111), // #1168
1248 INST(Vpermt2w , VexRvm_Lx , E(660F38,7D,_,x,_,1,4,FVM), 0 , 104, 0 , 7216 , 189, 113), // #1169
1249 INST(Vpermw , VexRvm_Lx , E(660F38,8D,_,x,_,1,4,FVM), 0 , 104, 0 , 7225 , 189, 113), // #1170
1250 INST(Vpexpandb , VexRm_Lx , E(660F38,62,_,x,_,0,0,T1S), 0 , 186, 0 , 7232 , 241, 135), // #1171
1251 INST(Vpexpandd , VexRm_Lx , E(660F38,89,_,x,_,0,2,T1S), 0 , 116, 0 , 7242 , 241, 111), // #1172
1252 INST(Vpexpandq , VexRm_Lx , E(660F38,89,_,x,_,1,3,T1S), 0 , 115, 0 , 7252 , 241, 111), // #1173
1253 INST(Vpexpandw , VexRm_Lx , E(660F38,62,_,x,_,1,1,T1S), 0 , 187, 0 , 7262 , 241, 135), // #1174
1254 INST(Vpextrb , VexMri , V(660F3A,14,_,0,0,I,0,T1S), 0 , 190, 0 , 7272 , 330, 138), // #1175
1255 INST(Vpextrd , VexMri , V(660F3A,16,_,0,0,0,2,T1S), 0 , 150, 0 , 7280 , 245, 139), // #1176
1256 INST(Vpextrq , VexMri , V(660F3A,16,_,0,1,1,3,T1S), 0 , 191, 0 , 7288 , 331, 139), // #1177
1257 INST(Vpextrw , VexMri , V(660F3A,15,_,0,0,I,1,T1S), 0 , 192, 0 , 7296 , 332, 138), // #1178
1258 INST(Vpgatherdd , VexRmvRm_VM , V(660F38,90,_,x,0,_,_,_ ), V(660F38,90,_,x,_,0,2,T1S), 88 , 113, 7304 , 261, 125), // #1179
1259 INST(Vpgatherdq , VexRmvRm_VM , V(660F38,90,_,x,1,_,_,_ ), V(660F38,90,_,x,_,1,3,T1S), 156, 114, 7315 , 260, 125), // #1180
1260 INST(Vpgatherqd , VexRmvRm_VM , V(660F38,91,_,x,0,_,_,_ ), V(660F38,91,_,x,_,0,2,T1S), 88 , 115, 7326 , 266, 125), // #1181
1261 INST(Vpgatherqq , VexRmvRm_VM , V(660F38,91,_,x,1,_,_,_ ), V(660F38,91,_,x,_,1,3,T1S), 156, 116, 7337 , 265, 125), // #1182
1262 INST(Vphaddbd , VexRm , V(XOP_M9,C2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7348 , 181, 124), // #1183
1263 INST(Vphaddbq , VexRm , V(XOP_M9,C3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7357 , 181, 124), // #1184
1264 INST(Vphaddbw , VexRm , V(XOP_M9,C1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7366 , 181, 124), // #1185
1265 INST(Vphaddd , VexRvm_Lx , V(660F38,02,_,x,I,_,_,_ ), 0 , 88 , 0 , 7375 , 179, 128), // #1186
1266 INST(Vphadddq , VexRm , V(XOP_M9,CB,_,0,0,_,_,_ ), 0 , 72 , 0 , 7383 , 181, 124), // #1187
1267 INST(Vphaddsw , VexRvm_Lx , V(660F38,03,_,x,I,_,_,_ ), 0 , 88 , 0 , 7392 , 179, 128), // #1188
1268 INST(Vphaddubd , VexRm , V(XOP_M9,D2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7401 , 181, 124), // #1189
1269 INST(Vphaddubq , VexRm , V(XOP_M9,D3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7411 , 181, 124), // #1190
1270 INST(Vphaddubw , VexRm , V(XOP_M9,D1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7421 , 181, 124), // #1191
1271 INST(Vphaddudq , VexRm , V(XOP_M9,DB,_,0,0,_,_,_ ), 0 , 72 , 0 , 7431 , 181, 124), // #1192
1272 INST(Vphadduwd , VexRm , V(XOP_M9,D6,_,0,0,_,_,_ ), 0 , 72 , 0 , 7441 , 181, 124), // #1193
1273 INST(Vphadduwq , VexRm , V(XOP_M9,D7,_,0,0,_,_,_ ), 0 , 72 , 0 , 7451 , 181, 124), // #1194
1274 INST(Vphaddw , VexRvm_Lx , V(660F38,01,_,x,I,_,_,_ ), 0 , 88 , 0 , 7461 , 179, 128), // #1195
1275 INST(Vphaddwd , VexRm , V(XOP_M9,C6,_,0,0,_,_,_ ), 0 , 72 , 0 , 7469 , 181, 124), // #1196
1276 INST(Vphaddwq , VexRm , V(XOP_M9,C7,_,0,0,_,_,_ ), 0 , 72 , 0 , 7478 , 181, 124), // #1197
1277 INST(Vphminposuw , VexRm , V(660F38,41,_,0,I,_,_,_ ), 0 , 88 , 0 , 7487 , 181, 108), // #1198
1278 INST(Vphsubbw , VexRm , V(XOP_M9,E1,_,0,0,_,_,_ ), 0 , 72 , 0 , 7499 , 181, 124), // #1199
1279 INST(Vphsubd , VexRvm_Lx , V(660F38,06,_,x,I,_,_,_ ), 0 , 88 , 0 , 7508 , 179, 128), // #1200
1280 INST(Vphsubdq , VexRm , V(XOP_M9,E3,_,0,0,_,_,_ ), 0 , 72 , 0 , 7516 , 181, 124), // #1201
1281 INST(Vphsubsw , VexRvm_Lx , V(660F38,07,_,x,I,_,_,_ ), 0 , 88 , 0 , 7525 , 179, 128), // #1202
1282 INST(Vphsubw , VexRvm_Lx , V(660F38,05,_,x,I,_,_,_ ), 0 , 88 , 0 , 7534 , 179, 128), // #1203
1283 INST(Vphsubwd , VexRm , V(XOP_M9,E2,_,0,0,_,_,_ ), 0 , 72 , 0 , 7542 , 181, 124), // #1204
1284 INST(Vpinsrb , VexRvmi , V(660F3A,20,_,0,0,I,0,T1S), 0 , 190, 0 , 7551 , 333, 138), // #1205
1285 INST(Vpinsrd , VexRvmi , V(660F3A,22,_,0,0,0,2,T1S), 0 , 150, 0 , 7559 , 334, 139), // #1206
1286 INST(Vpinsrq , VexRvmi , V(660F3A,22,_,0,1,1,3,T1S), 0 , 191, 0 , 7567 , 335, 139), // #1207
1287 INST(Vpinsrw , VexRvmi , V(660F00,C4,_,0,0,I,1,T1S), 0 , 193, 0 , 7575 , 336, 138), // #1208
1288 INST(Vplzcntd , VexRm_Lx , E(660F38,44,_,x,_,0,4,FV ), 0 , 102, 0 , 7583 , 324, 132), // #1209
1289 INST(Vplzcntq , VexRm_Lx , E(660F38,44,_,x,_,1,4,FV ), 0 , 103, 0 , 7592 , 337, 132), // #1210
1290 INST(Vpmacsdd , VexRvmr , V(XOP_M8,9E,_,0,0,_,_,_ ), 0 , 183, 0 , 7601 , 338, 124), // #1211
1291 INST(Vpmacsdqh , VexRvmr , V(XOP_M8,9F,_,0,0,_,_,_ ), 0 , 183, 0 , 7610 , 338, 124), // #1212
1292 INST(Vpmacsdql , VexRvmr , V(XOP_M8,97,_,0,0,_,_,_ ), 0 , 183, 0 , 7620 , 338, 124), // #1213
1293 INST(Vpmacssdd , VexRvmr , V(XOP_M8,8E,_,0,0,_,_,_ ), 0 , 183, 0 , 7630 , 338, 124), // #1214
1294 INST(Vpmacssdqh , VexRvmr , V(XOP_M8,8F,_,0,0,_,_,_ ), 0 , 183, 0 , 7640 , 338, 124), // #1215
1295 INST(Vpmacssdql , VexRvmr , V(XOP_M8,87,_,0,0,_,_,_ ), 0 , 183, 0 , 7651 , 338, 124), // #1216
1296 INST(Vpmacsswd , VexRvmr , V(XOP_M8,86,_,0,0,_,_,_ ), 0 , 183, 0 , 7662 , 338, 124), // #1217
1297 INST(Vpmacssww , VexRvmr , V(XOP_M8,85,_,0,0,_,_,_ ), 0 , 183, 0 , 7672 , 338, 124), // #1218
1298 INST(Vpmacswd , VexRvmr , V(XOP_M8,96,_,0,0,_,_,_ ), 0 , 183, 0 , 7682 , 338, 124), // #1219
1299 INST(Vpmacsww , VexRvmr , V(XOP_M8,95,_,0,0,_,_,_ ), 0 , 183, 0 , 7691 , 338, 124), // #1220
1300 INST(Vpmadcsswd , VexRvmr , V(XOP_M8,A6,_,0,0,_,_,_ ), 0 , 183, 0 , 7700 , 338, 124), // #1221
1301 INST(Vpmadcswd , VexRvmr , V(XOP_M8,B6,_,0,0,_,_,_ ), 0 , 183, 0 , 7711 , 338, 124), // #1222
1302 INST(Vpmadd52huq , VexRvm_Lx , E(660F38,B5,_,x,_,1,4,FV ), 0 , 103, 0 , 7721 , 191, 140), // #1223
1303 INST(Vpmadd52luq , VexRvm_Lx , E(660F38,B4,_,x,_,1,4,FV ), 0 , 103, 0 , 7733 , 191, 140), // #1224
1304 INST(Vpmaddubsw , VexRvm_Lx , V(660F38,04,_,x,I,I,4,FVM), 0 , 98 , 0 , 7745 , 272, 130), // #1225
1305 INST(Vpmaddwd , VexRvm_Lx , V(660F00,F5,_,x,I,I,4,FVM), 0 , 175, 0 , 7756 , 272, 130), // #1226
1306 INST(Vpmaskmovd , VexRvmMvr_Lx , V(660F38,8C,_,x,0,_,_,_ ), V(660F38,8E,_,x,0,_,_,_ ), 88 , 117, 7765 , 280, 115), // #1227
1307 INST(Vpmaskmovq , VexRvmMvr_Lx , V(660F38,8C,_,x,1,_,_,_ ), V(660F38,8E,_,x,1,_,_,_ ), 156, 118, 7776 , 280, 115), // #1228
1308 INST(Vpmaxsb , VexRvm_Lx , V(660F38,3C,_,x,I,I,4,FVM), 0 , 98 , 0 , 7787 , 339, 130), // #1229
1309 INST(Vpmaxsd , VexRvm_Lx , V(660F38,3D,_,x,I,0,4,FV ), 0 , 154, 0 , 7795 , 188, 116), // #1230
1310 INST(Vpmaxsq , VexRvm_Lx , E(660F38,3D,_,x,_,1,4,FV ), 0 , 103, 0 , 7803 , 191, 111), // #1231
1311 INST(Vpmaxsw , VexRvm_Lx , V(660F00,EE,_,x,I,I,4,FVM), 0 , 175, 0 , 7811 , 339, 130), // #1232
1312 INST(Vpmaxub , VexRvm_Lx , V(660F00,DE,_,x,I,I,4,FVM), 0 , 175, 0 , 7819 , 339, 130), // #1233
1313 INST(Vpmaxud , VexRvm_Lx , V(660F38,3F,_,x,I,0,4,FV ), 0 , 154, 0 , 7827 , 188, 116), // #1234
1314 INST(Vpmaxuq , VexRvm_Lx , E(660F38,3F,_,x,_,1,4,FV ), 0 , 103, 0 , 7835 , 191, 111), // #1235
1315 INST(Vpmaxuw , VexRvm_Lx , V(660F38,3E,_,x,I,I,4,FVM), 0 , 98 , 0 , 7843 , 339, 130), // #1236
1316 INST(Vpminsb , VexRvm_Lx , V(660F38,38,_,x,I,I,4,FVM), 0 , 98 , 0 , 7851 , 339, 130), // #1237
1317 INST(Vpminsd , VexRvm_Lx , V(660F38,39,_,x,I,0,4,FV ), 0 , 154, 0 , 7859 , 188, 116), // #1238
1318 INST(Vpminsq , VexRvm_Lx , E(660F38,39,_,x,_,1,4,FV ), 0 , 103, 0 , 7867 , 191, 111), // #1239
1319 INST(Vpminsw , VexRvm_Lx , V(660F00,EA,_,x,I,I,4,FVM), 0 , 175, 0 , 7875 , 339, 130), // #1240
1320 INST(Vpminub , VexRvm_Lx , V(660F00,DA,_,x,I,_,4,FVM), 0 , 175, 0 , 7883 , 339, 130), // #1241
1321 INST(Vpminud , VexRvm_Lx , V(660F38,3B,_,x,I,0,4,FV ), 0 , 154, 0 , 7891 , 188, 116), // #1242
1322 INST(Vpminuq , VexRvm_Lx , E(660F38,3B,_,x,_,1,4,FV ), 0 , 103, 0 , 7899 , 191, 111), // #1243
1323 INST(Vpminuw , VexRvm_Lx , V(660F38,3A,_,x,I,_,4,FVM), 0 , 98 , 0 , 7907 , 339, 130), // #1244
1324 INST(Vpmovb2m , VexRm_Lx , E(F30F38,29,_,x,_,0,_,_ ), 0 , 119, 0 , 7915 , 340, 113), // #1245
1325 INST(Vpmovd2m , VexRm_Lx , E(F30F38,39,_,x,_,0,_,_ ), 0 , 119, 0 , 7924 , 340, 114), // #1246
1326 INST(Vpmovdb , VexMr_Lx , E(F30F38,31,_,x,_,0,2,QVM), 0 , 194, 0 , 7933 , 341, 111), // #1247
1327 INST(Vpmovdw , VexMr_Lx , E(F30F38,33,_,x,_,0,3,HVM), 0 , 195, 0 , 7941 , 342, 111), // #1248
1328 INST(Vpmovm2b , VexRm_Lx , E(F30F38,28,_,x,_,0,_,_ ), 0 , 119, 0 , 7949 , 310, 113), // #1249
1329 INST(Vpmovm2d , VexRm_Lx , E(F30F38,38,_,x,_,0,_,_ ), 0 , 119, 0 , 7958 , 310, 114), // #1250
1330 INST(Vpmovm2q , VexRm_Lx , E(F30F38,38,_,x,_,1,_,_ ), 0 , 181, 0 , 7967 , 310, 114), // #1251
1331 INST(Vpmovm2w , VexRm_Lx , E(F30F38,28,_,x,_,1,_,_ ), 0 , 181, 0 , 7976 , 310, 113), // #1252
1332 INST(Vpmovmskb , VexRm_Lx , V(660F00,D7,_,x,I,_,_,_ ), 0 , 63 , 0 , 7985 , 293, 128), // #1253
1333 INST(Vpmovq2m , VexRm_Lx , E(F30F38,39,_,x,_,1,_,_ ), 0 , 181, 0 , 7995 , 340, 114), // #1254
1334 INST(Vpmovqb , VexMr_Lx , E(F30F38,32,_,x,_,0,1,OVM), 0 , 196, 0 , 8004 , 343, 111), // #1255
1335 INST(Vpmovqd , VexMr_Lx , E(F30F38,35,_,x,_,0,3,HVM), 0 , 195, 0 , 8012 , 342, 111), // #1256
1336 INST(Vpmovqw , VexMr_Lx , E(F30F38,34,_,x,_,0,2,QVM), 0 , 194, 0 , 8020 , 341, 111), // #1257
1337 INST(Vpmovsdb , VexMr_Lx , E(F30F38,21,_,x,_,0,2,QVM), 0 , 194, 0 , 8028 , 341, 111), // #1258
1338 INST(Vpmovsdw , VexMr_Lx , E(F30F38,23,_,x,_,0,3,HVM), 0 , 195, 0 , 8037 , 342, 111), // #1259
1339 INST(Vpmovsqb , VexMr_Lx , E(F30F38,22,_,x,_,0,1,OVM), 0 , 196, 0 , 8046 , 343, 111), // #1260
1340 INST(Vpmovsqd , VexMr_Lx , E(F30F38,25,_,x,_,0,3,HVM), 0 , 195, 0 , 8055 , 342, 111), // #1261
1341 INST(Vpmovsqw , VexMr_Lx , E(F30F38,24,_,x,_,0,2,QVM), 0 , 194, 0 , 8064 , 341, 111), // #1262
1342 INST(Vpmovswb , VexMr_Lx , E(F30F38,20,_,x,_,0,3,HVM), 0 , 195, 0 , 8073 , 342, 113), // #1263
1343 INST(Vpmovsxbd , VexRm_Lx , V(660F38,21,_,x,I,I,2,QVM), 0 , 197, 0 , 8082 , 344, 116), // #1264
1344 INST(Vpmovsxbq , VexRm_Lx , V(660F38,22,_,x,I,I,1,OVM), 0 , 198, 0 , 8092 , 345, 116), // #1265
1345 INST(Vpmovsxbw , VexRm_Lx , V(660F38,20,_,x,I,I,3,HVM), 0 , 123, 0 , 8102 , 346, 130), // #1266
1346 INST(Vpmovsxdq , VexRm_Lx , V(660F38,25,_,x,I,0,3,HVM), 0 , 123, 0 , 8112 , 346, 116), // #1267
1347 INST(Vpmovsxwd , VexRm_Lx , V(660F38,23,_,x,I,I,3,HVM), 0 , 123, 0 , 8122 , 346, 116), // #1268
1348 INST(Vpmovsxwq , VexRm_Lx , V(660F38,24,_,x,I,I,2,QVM), 0 , 197, 0 , 8132 , 344, 116), // #1269
1349 INST(Vpmovusdb , VexMr_Lx , E(F30F38,11,_,x,_,0,2,QVM), 0 , 194, 0 , 8142 , 341, 111), // #1270
1350 INST(Vpmovusdw , VexMr_Lx , E(F30F38,13,_,x,_,0,3,HVM), 0 , 195, 0 , 8152 , 342, 111), // #1271
1351 INST(Vpmovusqb , VexMr_Lx , E(F30F38,12,_,x,_,0,1,OVM), 0 , 196, 0 , 8162 , 343, 111), // #1272
1352 INST(Vpmovusqd , VexMr_Lx , E(F30F38,15,_,x,_,0,3,HVM), 0 , 195, 0 , 8172 , 342, 111), // #1273
1353 INST(Vpmovusqw , VexMr_Lx , E(F30F38,14,_,x,_,0,2,QVM), 0 , 194, 0 , 8182 , 341, 111), // #1274
1354 INST(Vpmovuswb , VexMr_Lx , E(F30F38,10,_,x,_,0,3,HVM), 0 , 195, 0 , 8192 , 342, 113), // #1275
1355 INST(Vpmovw2m , VexRm_Lx , E(F30F38,29,_,x,_,1,_,_ ), 0 , 181, 0 , 8202 , 340, 113), // #1276
1356 INST(Vpmovwb , VexMr_Lx , E(F30F38,30,_,x,_,0,3,HVM), 0 , 195, 0 , 8211 , 342, 113), // #1277
1357 INST(Vpmovzxbd , VexRm_Lx , V(660F38,31,_,x,I,I,2,QVM), 0 , 197, 0 , 8219 , 344, 116), // #1278
1358 INST(Vpmovzxbq , VexRm_Lx , V(660F38,32,_,x,I,I,1,OVM), 0 , 198, 0 , 8229 , 345, 116), // #1279
1359 INST(Vpmovzxbw , VexRm_Lx , V(660F38,30,_,x,I,I,3,HVM), 0 , 123, 0 , 8239 , 346, 130), // #1280
1360 INST(Vpmovzxdq , VexRm_Lx , V(660F38,35,_,x,I,0,3,HVM), 0 , 123, 0 , 8249 , 346, 116), // #1281
1361 INST(Vpmovzxwd , VexRm_Lx , V(660F38,33,_,x,I,I,3,HVM), 0 , 123, 0 , 8259 , 346, 116), // #1282
1362 INST(Vpmovzxwq , VexRm_Lx , V(660F38,34,_,x,I,I,2,QVM), 0 , 197, 0 , 8269 , 344, 116), // #1283
1363 INST(Vpmuldq , VexRvm_Lx , V(660F38,28,_,x,I,1,4,FV ), 0 , 188, 0 , 8279 , 185, 116), // #1284
1364 INST(Vpmulhrsw , VexRvm_Lx , V(660F38,0B,_,x,I,I,4,FVM), 0 , 98 , 0 , 8287 , 272, 130), // #1285
1365 INST(Vpmulhuw , VexRvm_Lx , V(660F00,E4,_,x,I,I,4,FVM), 0 , 175, 0 , 8297 , 272, 130), // #1286
1366 INST(Vpmulhw , VexRvm_Lx , V(660F00,E5,_,x,I,I,4,FVM), 0 , 175, 0 , 8306 , 272, 130), // #1287
1367 INST(Vpmulld , VexRvm_Lx , V(660F38,40,_,x,I,0,4,FV ), 0 , 154, 0 , 8314 , 186, 116), // #1288
1368 INST(Vpmullq , VexRvm_Lx , E(660F38,40,_,x,_,1,4,FV ), 0 , 103, 0 , 8322 , 191, 114), // #1289
1369 INST(Vpmullw , VexRvm_Lx , V(660F00,D5,_,x,I,I,4,FVM), 0 , 175, 0 , 8330 , 272, 130), // #1290
1370 INST(Vpmultishiftqb , VexRvm_Lx , E(660F38,83,_,x,_,1,4,FV ), 0 , 103, 0 , 8338 , 191, 137), // #1291
1371 INST(Vpmuludq , VexRvm_Lx , V(660F00,F4,_,x,I,1,4,FV ), 0 , 93 , 0 , 8353 , 185, 116), // #1292
1372 INST(Vpopcntb , VexRm_Lx , E(660F38,54,_,x,_,0,4,FV ), 0 , 102, 0 , 8362 , 241, 141), // #1293
1373 INST(Vpopcntd , VexRm_Lx , E(660F38,55,_,x,_,0,4,FVM), 0 , 101, 0 , 8371 , 324, 142), // #1294
1374 INST(Vpopcntq , VexRm_Lx , E(660F38,55,_,x,_,1,4,FVM), 0 , 104, 0 , 8380 , 337, 142), // #1295
1375 INST(Vpopcntw , VexRm_Lx , E(660F38,54,_,x,_,1,4,FV ), 0 , 103, 0 , 8389 , 241, 141), // #1296
1376 INST(Vpor , VexRvm_Lx , V(660F00,EB,_,x,I,_,_,_ ), 0 , 63 , 0 , 8398 , 302, 128), // #1297
1377 INST(Vpord , VexRvm_Lx , E(660F00,EB,_,x,_,0,4,FV ), 0 , 179, 0 , 8403 , 303, 111), // #1298
1378 INST(Vporq , VexRvm_Lx , E(660F00,EB,_,x,_,1,4,FV ), 0 , 121, 0 , 8409 , 307, 111), // #1299
1379 INST(Vpperm , VexRvrmRvmr , V(XOP_M8,A3,_,0,x,_,_,_ ), 0 , 183, 0 , 8415 , 347, 124), // #1300
1380 INST(Vprold , VexVmi_Lx , E(660F00,72,1,x,_,0,4,FV ), 0 , 199, 0 , 8422 , 348, 111), // #1301
1381 INST(Vprolq , VexVmi_Lx , E(660F00,72,1,x,_,1,4,FV ), 0 , 200, 0 , 8429 , 349, 111), // #1302
1382 INST(Vprolvd , VexRvm_Lx , E(660F38,15,_,x,_,0,4,FV ), 0 , 102, 0 , 8436 , 190, 111), // #1303
1383 INST(Vprolvq , VexRvm_Lx , E(660F38,15,_,x,_,1,4,FV ), 0 , 103, 0 , 8444 , 191, 111), // #1304
1384 INST(Vprord , VexVmi_Lx , E(660F00,72,0,x,_,0,4,FV ), 0 , 179, 0 , 8452 , 348, 111), // #1305
1385 INST(Vprorq , VexVmi_Lx , E(660F00,72,0,x,_,1,4,FV ), 0 , 121, 0 , 8459 , 349, 111), // #1306
1386 INST(Vprorvd , VexRvm_Lx , E(660F38,14,_,x,_,0,4,FV ), 0 , 102, 0 , 8466 , 190, 111), // #1307
1387 INST(Vprorvq , VexRvm_Lx , E(660F38,14,_,x,_,1,4,FV ), 0 , 103, 0 , 8474 , 191, 111), // #1308
1388 INST(Vprotb , VexRvmRmvRmi , V(XOP_M9,90,_,0,x,_,_,_ ), V(XOP_M8,C0,_,0,x,_,_,_ ), 72 , 119, 8482 , 350, 124), // #1309
1389 INST(Vprotd , VexRvmRmvRmi , V(XOP_M9,92,_,0,x,_,_,_ ), V(XOP_M8,C2,_,0,x,_,_,_ ), 72 , 120, 8489 , 350, 124), // #1310
1390 INST(Vprotq , VexRvmRmvRmi , V(XOP_M9,93,_,0,x,_,_,_ ), V(XOP_M8,C3,_,0,x,_,_,_ ), 72 , 121, 8496 , 350, 124), // #1311
1391 INST(Vprotw , VexRvmRmvRmi , V(XOP_M9,91,_,0,x,_,_,_ ), V(XOP_M8,C1,_,0,x,_,_,_ ), 72 , 122, 8503 , 350, 124), // #1312
1392 INST(Vpsadbw , VexRvm_Lx , V(660F00,F6,_,x,I,I,4,FVM), 0 , 175, 0 , 8510 , 180, 130), // #1313
1393 INST(Vpscatterdd , VexMr_VM , E(660F38,A0,_,x,_,0,2,T1S), 0 , 116, 0 , 8518 , 351, 111), // #1314
1394 INST(Vpscatterdq , VexMr_VM , E(660F38,A0,_,x,_,1,3,T1S), 0 , 115, 0 , 8530 , 351, 111), // #1315
1395 INST(Vpscatterqd , VexMr_VM , E(660F38,A1,_,x,_,0,2,T1S), 0 , 116, 0 , 8542 , 352, 111), // #1316
1396 INST(Vpscatterqq , VexMr_VM , E(660F38,A1,_,x,_,1,3,T1S), 0 , 115, 0 , 8554 , 353, 111), // #1317
1397 INST(Vpshab , VexRvmRmv , V(XOP_M9,98,_,0,x,_,_,_ ), 0 , 72 , 0 , 8566 , 354, 124), // #1318
1398 INST(Vpshad , VexRvmRmv , V(XOP_M9,9A,_,0,x,_,_,_ ), 0 , 72 , 0 , 8573 , 354, 124), // #1319
1399 INST(Vpshaq , VexRvmRmv , V(XOP_M9,9B,_,0,x,_,_,_ ), 0 , 72 , 0 , 8580 , 354, 124), // #1320
1400 INST(Vpshaw , VexRvmRmv , V(XOP_M9,99,_,0,x,_,_,_ ), 0 , 72 , 0 , 8587 , 354, 124), // #1321
1401 INST(Vpshlb , VexRvmRmv , V(XOP_M9,94,_,0,x,_,_,_ ), 0 , 72 , 0 , 8594 , 354, 124), // #1322
1402 INST(Vpshld , VexRvmRmv , V(XOP_M9,96,_,0,x,_,_,_ ), 0 , 72 , 0 , 8601 , 354, 124), // #1323
1403 INST(Vpshldd , VexRvmi_Lx , E(660F3A,71,_,x,_,0,4,FV ), 0 , 99 , 0 , 8608 , 183, 135), // #1324
1404 INST(Vpshldq , VexRvmi_Lx , E(660F3A,71,_,x,_,1,4,FV ), 0 , 100, 0 , 8616 , 184, 135), // #1325
1405 INST(Vpshldvd , VexRvm_Lx , E(660F38,71,_,x,_,0,4,FV ), 0 , 102, 0 , 8624 , 190, 135), // #1326
1406 INST(Vpshldvq , VexRvm_Lx , E(660F38,71,_,x,_,1,4,FV ), 0 , 103, 0 , 8633 , 191, 135), // #1327
1407 INST(Vpshldvw , VexRvm_Lx , E(660F38,70,_,x,_,0,4,FVM), 0 , 101, 0 , 8642 , 189, 135), // #1328
1408 INST(Vpshldw , VexRvmi_Lx , E(660F3A,70,_,x,_,0,4,FVM), 0 , 142, 0 , 8651 , 237, 135), // #1329
1409 INST(Vpshlq , VexRvmRmv , V(XOP_M9,97,_,0,x,_,_,_ ), 0 , 72 , 0 , 8659 , 354, 124), // #1330
1410 INST(Vpshlw , VexRvmRmv , V(XOP_M9,95,_,0,x,_,_,_ ), 0 , 72 , 0 , 8666 , 354, 124), // #1331
1411 INST(Vpshrdd , VexRvmi_Lx , E(660F3A,73,_,x,_,0,4,FV ), 0 , 99 , 0 , 8673 , 183, 135), // #1332
1412 INST(Vpshrdq , VexRvmi_Lx , E(660F3A,73,_,x,_,1,4,FV ), 0 , 100, 0 , 8681 , 184, 135), // #1333
1413 INST(Vpshrdvd , VexRvm_Lx , E(660F38,73,_,x,_,0,4,FV ), 0 , 102, 0 , 8689 , 190, 135), // #1334
1414 INST(Vpshrdvq , VexRvm_Lx , E(660F38,73,_,x,_,1,4,FV ), 0 , 103, 0 , 8698 , 191, 135), // #1335
1415 INST(Vpshrdvw , VexRvm_Lx , E(660F38,72,_,x,_,0,4,FVM), 0 , 101, 0 , 8707 , 189, 135), // #1336
1416 INST(Vpshrdw , VexRvmi_Lx , E(660F3A,72,_,x,_,0,4,FVM), 0 , 142, 0 , 8716 , 237, 135), // #1337
1417 INST(Vpshufb , VexRvm_Lx , V(660F38,00,_,x,I,I,4,FVM), 0 , 98 , 0 , 8724 , 272, 130), // #1338
1418 INST(Vpshufbitqmb , VexRvm_Lx , E(660F38,8F,_,x,0,0,4,FVM), 0 , 101, 0 , 8732 , 355, 141), // #1339
1419 INST(Vpshufd , VexRmi_Lx , V(660F00,70,_,x,I,0,4,FV ), 0 , 124, 0 , 8745 , 356, 116), // #1340
1420 INST(Vpshufhw , VexRmi_Lx , V(F30F00,70,_,x,I,I,4,FVM), 0 , 176, 0 , 8753 , 357, 130), // #1341
1421 INST(Vpshuflw , VexRmi_Lx , V(F20F00,70,_,x,I,I,4,FVM), 0 , 201, 0 , 8762 , 357, 130), // #1342
1422 INST(Vpsignb , VexRvm_Lx , V(660F38,08,_,x,I,_,_,_ ), 0 , 88 , 0 , 8771 , 179, 128), // #1343
1423 INST(Vpsignd , VexRvm_Lx , V(660F38,0A,_,x,I,_,_,_ ), 0 , 88 , 0 , 8779 , 179, 128), // #1344
1424 INST(Vpsignw , VexRvm_Lx , V(660F38,09,_,x,I,_,_,_ ), 0 , 88 , 0 , 8787 , 179, 128), // #1345
1425 INST(Vpslld , VexRvmVmi_Lx , V(660F00,F2,_,x,I,0,4,128), V(660F00,72,6,x,I,0,4,FV ), 202, 123, 8795 , 358, 116), // #1346
1426 INST(Vpslldq , VexEvexVmi_Lx , V(660F00,73,7,x,I,I,4,FVM), 0 , 203, 0 , 8802 , 359, 130), // #1347
1427 INST(Vpsllq , VexRvmVmi_Lx , V(660F00,F3,_,x,I,1,4,128), V(660F00,73,6,x,I,1,4,FV ), 204, 124, 8810 , 360, 116), // #1348
1428 INST(Vpsllvd , VexRvm_Lx , V(660F38,47,_,x,0,0,4,FV ), 0 , 154, 0 , 8817 , 186, 125), // #1349
1429 INST(Vpsllvq , VexRvm_Lx , V(660F38,47,_,x,1,1,4,FV ), 0 , 153, 0 , 8825 , 185, 125), // #1350
1430 INST(Vpsllvw , VexRvm_Lx , E(660F38,12,_,x,_,1,4,FVM), 0 , 104, 0 , 8833 , 189, 113), // #1351
1431 INST(Vpsllw , VexRvmVmi_Lx , V(660F00,F1,_,x,I,I,4,FVM), V(660F00,71,6,x,I,I,4,FVM), 175, 125, 8841 , 361, 130), // #1352
1432 INST(Vpsrad , VexRvmVmi_Lx , V(660F00,E2,_,x,I,0,4,128), V(660F00,72,4,x,I,0,4,FV ), 202, 126, 8848 , 358, 116), // #1353
1433 INST(Vpsraq , VexRvmVmi_Lx , E(660F00,E2,_,x,_,1,4,128), E(660F00,72,4,x,_,1,4,FV ), 205, 127, 8855 , 362, 111), // #1354
1434 INST(Vpsravd , VexRvm_Lx , V(660F38,46,_,x,0,0,4,FV ), 0 , 154, 0 , 8862 , 186, 125), // #1355
1435 INST(Vpsravq , VexRvm_Lx , E(660F38,46,_,x,_,1,4,FV ), 0 , 103, 0 , 8870 , 191, 111), // #1356
1436 INST(Vpsravw , VexRvm_Lx , E(660F38,11,_,x,_,1,4,FVM), 0 , 104, 0 , 8878 , 189, 113), // #1357
1437 INST(Vpsraw , VexRvmVmi_Lx , V(660F00,E1,_,x,I,I,4,128), V(660F00,71,4,x,I,I,4,FVM), 202, 128, 8886 , 361, 130), // #1358
1438 INST(Vpsrld , VexRvmVmi_Lx , V(660F00,D2,_,x,I,0,4,128), V(660F00,72,2,x,I,0,4,FV ), 202, 129, 8893 , 358, 116), // #1359
1439 INST(Vpsrldq , VexEvexVmi_Lx , V(660F00,73,3,x,I,I,4,FVM), 0 , 206, 0 , 8900 , 359, 130), // #1360
1440 INST(Vpsrlq , VexRvmVmi_Lx , V(660F00,D3,_,x,I,1,4,128), V(660F00,73,2,x,I,1,4,FV ), 204, 130, 8908 , 360, 116), // #1361
1441 INST(Vpsrlvd , VexRvm_Lx , V(660F38,45,_,x,0,0,4,FV ), 0 , 154, 0 , 8915 , 186, 125), // #1362
1442 INST(Vpsrlvq , VexRvm_Lx , V(660F38,45,_,x,1,1,4,FV ), 0 , 153, 0 , 8923 , 185, 125), // #1363
1443 INST(Vpsrlvw , VexRvm_Lx , E(660F38,10,_,x,_,1,4,FVM), 0 , 104, 0 , 8931 , 189, 113), // #1364
1444 INST(Vpsrlw , VexRvmVmi_Lx , V(660F00,D1,_,x,I,I,4,128), V(660F00,71,2,x,I,I,4,FVM), 202, 131, 8939 , 361, 130), // #1365
1445 INST(Vpsubb , VexRvm_Lx , V(660F00,F8,_,x,I,I,4,FVM), 0 , 175, 0 , 8946 , 363, 130), // #1366
1446 INST(Vpsubd , VexRvm_Lx , V(660F00,FA,_,x,I,0,4,FV ), 0 , 124, 0 , 8953 , 364, 116), // #1367
1447 INST(Vpsubq , VexRvm_Lx , V(660F00,FB,_,x,I,1,4,FV ), 0 , 93 , 0 , 8960 , 365, 116), // #1368
1448 INST(Vpsubsb , VexRvm_Lx , V(660F00,E8,_,x,I,I,4,FVM), 0 , 175, 0 , 8967 , 363, 130), // #1369
1449 INST(Vpsubsw , VexRvm_Lx , V(660F00,E9,_,x,I,I,4,FVM), 0 , 175, 0 , 8975 , 363, 130), // #1370
1450 INST(Vpsubusb , VexRvm_Lx , V(660F00,D8,_,x,I,I,4,FVM), 0 , 175, 0 , 8983 , 363, 130), // #1371
1451 INST(Vpsubusw , VexRvm_Lx , V(660F00,D9,_,x,I,I,4,FVM), 0 , 175, 0 , 8992 , 363, 130), // #1372
1452 INST(Vpsubw , VexRvm_Lx , V(660F00,F9,_,x,I,I,4,FVM), 0 , 175, 0 , 9001 , 363, 130), // #1373
1453 INST(Vpternlogd , VexRvmi_Lx , E(660F3A,25,_,x,_,0,4,FV ), 0 , 99 , 0 , 9008 , 183, 111), // #1374
1454 INST(Vpternlogq , VexRvmi_Lx , E(660F3A,25,_,x,_,1,4,FV ), 0 , 100, 0 , 9019 , 184, 111), // #1375
1455 INST(Vptest , VexRm_Lx , V(660F38,17,_,x,I,_,_,_ ), 0 , 88 , 0 , 9030 , 257, 134), // #1376
1456 INST(Vptestmb , VexRvm_Lx , E(660F38,26,_,x,_,0,4,FVM), 0 , 101, 0 , 9037 , 355, 113), // #1377
1457 INST(Vptestmd , VexRvm_Lx , E(660F38,27,_,x,_,0,4,FV ), 0 , 102, 0 , 9046 , 366, 111), // #1378
1458 INST(Vptestmq , VexRvm_Lx , E(660F38,27,_,x,_,1,4,FV ), 0 , 103, 0 , 9055 , 367, 111), // #1379
1459 INST(Vptestmw , VexRvm_Lx , E(660F38,26,_,x,_,1,4,FVM), 0 , 104, 0 , 9064 , 355, 113), // #1380
1460 INST(Vptestnmb , VexRvm_Lx , E(F30F38,26,_,x,_,0,4,FVM), 0 , 207, 0 , 9073 , 355, 113), // #1381
1461 INST(Vptestnmd , VexRvm_Lx , E(F30F38,27,_,x,_,0,4,FV ), 0 , 208, 0 , 9083 , 366, 111), // #1382
1462 INST(Vptestnmq , VexRvm_Lx , E(F30F38,27,_,x,_,1,4,FV ), 0 , 209, 0 , 9093 , 367, 111), // #1383
1463 INST(Vptestnmw , VexRvm_Lx , E(F30F38,26,_,x,_,1,4,FVM), 0 , 210, 0 , 9103 , 355, 113), // #1384
1464 INST(Vpunpckhbw , VexRvm_Lx , V(660F00,68,_,x,I,I,4,FVM), 0 , 175, 0 , 9113 , 272, 130), // #1385
1465 INST(Vpunpckhdq , VexRvm_Lx , V(660F00,6A,_,x,I,0,4,FV ), 0 , 124, 0 , 9124 , 186, 116), // #1386
1466 INST(Vpunpckhqdq , VexRvm_Lx , V(660F00,6D,_,x,I,1,4,FV ), 0 , 93 , 0 , 9135 , 185, 116), // #1387
1467 INST(Vpunpckhwd , VexRvm_Lx , V(660F00,69,_,x,I,I,4,FVM), 0 , 175, 0 , 9147 , 272, 130), // #1388
1468 INST(Vpunpcklbw , VexRvm_Lx , V(660F00,60,_,x,I,I,4,FVM), 0 , 175, 0 , 9158 , 272, 130), // #1389
1469 INST(Vpunpckldq , VexRvm_Lx , V(660F00,62,_,x,I,0,4,FV ), 0 , 124, 0 , 9169 , 186, 116), // #1390
1470 INST(Vpunpcklqdq , VexRvm_Lx , V(660F00,6C,_,x,I,1,4,FV ), 0 , 93 , 0 , 9180 , 185, 116), // #1391
1471 INST(Vpunpcklwd , VexRvm_Lx , V(660F00,61,_,x,I,I,4,FVM), 0 , 175, 0 , 9192 , 272, 130), // #1392
1472 INST(Vpxor , VexRvm_Lx , V(660F00,EF,_,x,I,_,_,_ ), 0 , 63 , 0 , 9203 , 304, 128), // #1393
1473 INST(Vpxord , VexRvm_Lx , E(660F00,EF,_,x,_,0,4,FV ), 0 , 179, 0 , 9209 , 305, 111), // #1394
1474 INST(Vpxorq , VexRvm_Lx , E(660F00,EF,_,x,_,1,4,FV ), 0 , 121, 0 , 9216 , 306, 111), // #1395
1475 INST(Vrangepd , VexRvmi_Lx , E(660F3A,50,_,x,_,1,4,FV ), 0 , 100, 0 , 9223 , 246, 114), // #1396
1476 INST(Vrangeps , VexRvmi_Lx , E(660F3A,50,_,x,_,0,4,FV ), 0 , 99 , 0 , 9232 , 247, 114), // #1397
1477 INST(Vrangesd , VexRvmi , E(660F3A,51,_,I,_,1,3,T1S), 0 , 151, 0 , 9241 , 248, 61 ), // #1398
1478 INST(Vrangess , VexRvmi , E(660F3A,51,_,I,_,0,2,T1S), 0 , 152, 0 , 9250 , 249, 61 ), // #1399
1479 INST(Vrcp14pd , VexRm_Lx , E(660F38,4C,_,x,_,1,4,FV ), 0 , 103, 0 , 9259 , 337, 111), // #1400
1480 INST(Vrcp14ps , VexRm_Lx , E(660F38,4C,_,x,_,0,4,FV ), 0 , 102, 0 , 9268 , 324, 111), // #1401
1481 INST(Vrcp14sd , VexRvm , E(660F38,4D,_,I,_,1,3,T1S), 0 , 115, 0 , 9277 , 368, 63 ), // #1402
1482 INST(Vrcp14ss , VexRvm , E(660F38,4D,_,I,_,0,2,T1S), 0 , 116, 0 , 9286 , 369, 63 ), // #1403
1483 INST(Vrcp28pd , VexRm , E(660F38,CA,_,2,_,1,4,FV ), 0 , 143, 0 , 9295 , 239, 120), // #1404
1484 INST(Vrcp28ps , VexRm , E(660F38,CA,_,2,_,0,4,FV ), 0 , 144, 0 , 9304 , 240, 120), // #1405
1485 INST(Vrcp28sd , VexRvm , E(660F38,CB,_,I,_,1,3,T1S), 0 , 115, 0 , 9313 , 267, 120), // #1406
1486 INST(Vrcp28ss , VexRvm , E(660F38,CB,_,I,_,0,2,T1S), 0 , 116, 0 , 9322 , 268, 120), // #1407
1487 INST(Vrcpps , VexRm_Lx , V(000F00,53,_,x,I,_,_,_ ), 0 , 66 , 0 , 9331 , 257, 108), // #1408
1488 INST(Vrcpss , VexRvm , V(F30F00,53,_,I,I,_,_,_ ), 0 , 169, 0 , 9338 , 370, 108), // #1409
1489 INST(Vreducepd , VexRmi_Lx , E(660F3A,56,_,x,_,1,4,FV ), 0 , 100, 0 , 9345 , 349, 114), // #1410
1490 INST(Vreduceps , VexRmi_Lx , E(660F3A,56,_,x,_,0,4,FV ), 0 , 99 , 0 , 9355 , 348, 114), // #1411
1491 INST(Vreducesd , VexRvmi , E(660F3A,57,_,I,_,1,3,T1S), 0 , 151, 0 , 9365 , 371, 61 ), // #1412
1492 INST(Vreducess , VexRvmi , E(660F3A,57,_,I,_,0,2,T1S), 0 , 152, 0 , 9375 , 372, 61 ), // #1413
1493 INST(Vrndscalepd , VexRmi_Lx , E(660F3A,09,_,x,_,1,4,FV ), 0 , 100, 0 , 9385 , 269, 111), // #1414
1494 INST(Vrndscaleps , VexRmi_Lx , E(660F3A,08,_,x,_,0,4,FV ), 0 , 99 , 0 , 9397 , 270, 111), // #1415
1495 INST(Vrndscalesd , VexRvmi , E(660F3A,0B,_,I,_,1,3,T1S), 0 , 151, 0 , 9409 , 248, 63 ), // #1416
1496 INST(Vrndscaless , VexRvmi , E(660F3A,0A,_,I,_,0,2,T1S), 0 , 152, 0 , 9421 , 249, 63 ), // #1417
1497 INST(Vroundpd , VexRmi_Lx , V(660F3A,09,_,x,I,_,_,_ ), 0 , 67 , 0 , 9433 , 373, 108), // #1418
1498 INST(Vroundps , VexRmi_Lx , V(660F3A,08,_,x,I,_,_,_ ), 0 , 67 , 0 , 9442 , 373, 108), // #1419
1499 INST(Vroundsd , VexRvmi , V(660F3A,0B,_,I,I,_,_,_ ), 0 , 67 , 0 , 9451 , 374, 108), // #1420
1500 INST(Vroundss , VexRvmi , V(660F3A,0A,_,I,I,_,_,_ ), 0 , 67 , 0 , 9460 , 375, 108), // #1421
1501 INST(Vrsqrt14pd , VexRm_Lx , E(660F38,4E,_,x,_,1,4,FV ), 0 , 103, 0 , 9469 , 337, 111), // #1422
1502 INST(Vrsqrt14ps , VexRm_Lx , E(660F38,4E,_,x,_,0,4,FV ), 0 , 102, 0 , 9480 , 324, 111), // #1423
1503 INST(Vrsqrt14sd , VexRvm , E(660F38,4F,_,I,_,1,3,T1S), 0 , 115, 0 , 9491 , 368, 63 ), // #1424
1504 INST(Vrsqrt14ss , VexRvm , E(660F38,4F,_,I,_,0,2,T1S), 0 , 116, 0 , 9502 , 369, 63 ), // #1425
1505 INST(Vrsqrt28pd , VexRm , E(660F38,CC,_,2,_,1,4,FV ), 0 , 143, 0 , 9513 , 239, 120), // #1426
1506 INST(Vrsqrt28ps , VexRm , E(660F38,CC,_,2,_,0,4,FV ), 0 , 144, 0 , 9524 , 240, 120), // #1427
1507 INST(Vrsqrt28sd , VexRvm , E(660F38,CD,_,I,_,1,3,T1S), 0 , 115, 0 , 9535 , 267, 120), // #1428
1508 INST(Vrsqrt28ss , VexRvm , E(660F38,CD,_,I,_,0,2,T1S), 0 , 116, 0 , 9546 , 268, 120), // #1429
1509 INST(Vrsqrtps , VexRm_Lx , V(000F00,52,_,x,I,_,_,_ ), 0 , 66 , 0 , 9557 , 257, 108), // #1430
1510 INST(Vrsqrtss , VexRvm , V(F30F00,52,_,I,I,_,_,_ ), 0 , 169, 0 , 9566 , 370, 108), // #1431
1511 INST(Vscalefpd , VexRvm_Lx , E(660F38,2C,_,x,_,1,4,FV ), 0 , 103, 0 , 9575 , 376, 111), // #1432
1512 INST(Vscalefps , VexRvm_Lx , E(660F38,2C,_,x,_,0,4,FV ), 0 , 102, 0 , 9585 , 377, 111), // #1433
1513 INST(Vscalefsd , VexRvm , E(660F38,2D,_,I,_,1,3,T1S), 0 , 115, 0 , 9595 , 378, 63 ), // #1434
1514 INST(Vscalefss , VexRvm , E(660F38,2D,_,I,_,0,2,T1S), 0 , 116, 0 , 9605 , 379, 63 ), // #1435
1515 INST(Vscatterdpd , VexMr_Lx , E(660F38,A2,_,x,_,1,3,T1S), 0 , 115, 0 , 9615 , 380, 111), // #1436
1516 INST(Vscatterdps , VexMr_Lx , E(660F38,A2,_,x,_,0,2,T1S), 0 , 116, 0 , 9627 , 351, 111), // #1437
1517 INST(Vscatterpf0dpd , VexM_VM , E(660F38,C6,5,2,_,1,3,T1S), 0 , 211, 0 , 9639 , 262, 126), // #1438
1518 INST(Vscatterpf0dps , VexM_VM , E(660F38,C6,5,2,_,0,2,T1S), 0 , 212, 0 , 9654 , 263, 126), // #1439
1519 INST(Vscatterpf0qpd , VexM_VM , E(660F38,C7,5,2,_,1,3,T1S), 0 , 211, 0 , 9669 , 264, 126), // #1440
1520 INST(Vscatterpf0qps , VexM_VM , E(660F38,C7,5,2,_,0,2,T1S), 0 , 212, 0 , 9684 , 264, 126), // #1441
1521 INST(Vscatterpf1dpd , VexM_VM , E(660F38,C6,6,2,_,1,3,T1S), 0 , 213, 0 , 9699 , 262, 126), // #1442
1522 INST(Vscatterpf1dps , VexM_VM , E(660F38,C6,6,2,_,0,2,T1S), 0 , 214, 0 , 9714 , 263, 126), // #1443
1523 INST(Vscatterpf1qpd , VexM_VM , E(660F38,C7,6,2,_,1,3,T1S), 0 , 213, 0 , 9729 , 264, 126), // #1444
1524 INST(Vscatterpf1qps , VexM_VM , E(660F38,C7,6,2,_,0,2,T1S), 0 , 214, 0 , 9744 , 264, 126), // #1445
1525 INST(Vscatterqpd , VexMr_Lx , E(660F38,A3,_,x,_,1,3,T1S), 0 , 115, 0 , 9759 , 353, 111), // #1446
1526 INST(Vscatterqps , VexMr_Lx , E(660F38,A3,_,x,_,0,2,T1S), 0 , 116, 0 , 9771 , 352, 111), // #1447
1527 INST(Vshuff32x4 , VexRvmi_Lx , E(660F3A,23,_,x,_,0,4,FV ), 0 , 99 , 0 , 9783 , 381, 111), // #1448
1528 INST(Vshuff64x2 , VexRvmi_Lx , E(660F3A,23,_,x,_,1,4,FV ), 0 , 100, 0 , 9794 , 382, 111), // #1449
1529 INST(Vshufi32x4 , VexRvmi_Lx , E(660F3A,43,_,x,_,0,4,FV ), 0 , 99 , 0 , 9805 , 381, 111), // #1450
1530 INST(Vshufi64x2 , VexRvmi_Lx , E(660F3A,43,_,x,_,1,4,FV ), 0 , 100, 0 , 9816 , 382, 111), // #1451
1531 INST(Vshufpd , VexRvmi_Lx , V(660F00,C6,_,x,I,1,4,FV ), 0 , 93 , 0 , 9827 , 383, 106), // #1452
1532 INST(Vshufps , VexRvmi_Lx , V(000F00,C6,_,x,I,0,4,FV ), 0 , 94 , 0 , 9835 , 384, 106), // #1453
1533 INST(Vsqrtpd , VexRm_Lx , V(660F00,51,_,x,I,1,4,FV ), 0 , 93 , 0 , 9843 , 385, 106), // #1454
1534 INST(Vsqrtps , VexRm_Lx , V(000F00,51,_,x,I,0,4,FV ), 0 , 94 , 0 , 9851 , 209, 106), // #1455
1535 INST(Vsqrtsd , VexRvm , V(F20F00,51,_,I,I,1,3,T1S), 0 , 95 , 0 , 9859 , 177, 107), // #1456
1536 INST(Vsqrtss , VexRvm , V(F30F00,51,_,I,I,0,2,T1S), 0 , 96 , 0 , 9867 , 178, 107), // #1457
1537 INST(Vstmxcsr , VexM , V(000F00,AE,3,0,I,_,_,_ ), 0 , 215, 0 , 9875 , 278, 108), // #1458
1538 INST(Vsubpd , VexRvm_Lx , V(660F00,5C,_,x,I,1,4,FV ), 0 , 93 , 0 , 9884 , 175, 106), // #1459
1539 INST(Vsubps , VexRvm_Lx , V(000F00,5C,_,x,I,0,4,FV ), 0 , 94 , 0 , 9891 , 176, 106), // #1460
1540 INST(Vsubsd , VexRvm , V(F20F00,5C,_,I,I,1,3,T1S), 0 , 95 , 0 , 9898 , 177, 107), // #1461
1541 INST(Vsubss , VexRvm , V(F30F00,5C,_,I,I,0,2,T1S), 0 , 96 , 0 , 9905 , 178, 107), // #1462
1542 INST(Vtestpd , VexRm_Lx , V(660F38,0F,_,x,0,_,_,_ ), 0 , 88 , 0 , 9912 , 257, 134), // #1463
1543 INST(Vtestps , VexRm_Lx , V(660F38,0E,_,x,0,_,_,_ ), 0 , 88 , 0 , 9920 , 257, 134), // #1464
1544 INST(Vucomisd , VexRm , V(660F00,2E,_,I,I,1,3,T1S), 0 , 113, 0 , 9928 , 205, 117), // #1465
1545 INST(Vucomiss , VexRm , V(000F00,2E,_,I,I,0,2,T1S), 0 , 114, 0 , 9937 , 206, 117), // #1466
1546 INST(Vunpckhpd , VexRvm_Lx , V(660F00,15,_,x,I,1,4,FV ), 0 , 93 , 0 , 9946 , 185, 106), // #1467
1547 INST(Vunpckhps , VexRvm_Lx , V(000F00,15,_,x,I,0,4,FV ), 0 , 94 , 0 , 9956 , 186, 106), // #1468
1548 INST(Vunpcklpd , VexRvm_Lx , V(660F00,14,_,x,I,1,4,FV ), 0 , 93 , 0 , 9966 , 185, 106), // #1469
1549 INST(Vunpcklps , VexRvm_Lx , V(000F00,14,_,x,I,0,4,FV ), 0 , 94 , 0 , 9976 , 186, 106), // #1470
1550 INST(Vxorpd , VexRvm_Lx , V(660F00,57,_,x,I,1,4,FV ), 0 , 93 , 0 , 9986 , 365, 112), // #1471
1551 INST(Vxorps , VexRvm_Lx , V(000F00,57,_,x,I,0,4,FV ), 0 , 94 , 0 , 9993 , 364, 112), // #1472
1552 INST(Vzeroall , VexOp , V(000F00,77,_,1,I,_,_,_ ), 0 , 62 , 0 , 10000, 386, 108), // #1473
1553 INST(Vzeroupper , VexOp , V(000F00,77,_,0,I,_,_,_ ), 0 , 66 , 0 , 10009, 386, 108), // #1474
1554 INST(Wbinvd , X86Op , O(000F00,09,_,_,_,_,_,_ ), 0 , 4 , 0 , 10020, 30 , 0 ), // #1475
1555 INST(Wbnoinvd , X86Op , O(F30F00,09,_,_,_,_,_,_ ), 0 , 6 , 0 , 10027, 30 , 143), // #1476
1556 INST(Wrfsbase , X86M , O(F30F00,AE,2,_,x,_,_,_ ), 0 , 216, 0 , 10036, 161, 94 ), // #1477
1557 INST(Wrgsbase , X86M , O(F30F00,AE,3,_,x,_,_,_ ), 0 , 217, 0 , 10045, 161, 94 ), // #1478
1558 INST(Wrmsr , X86Op , O(000F00,30,_,_,_,_,_,_ ), 0 , 4 , 0 , 10054, 162, 95 ), // #1479
1559 INST(Xabort , X86Op_O_I8 , O(000000,C6,7,_,_,_,_,_ ), 0 , 25 , 0 , 10060, 74 , 144), // #1480
1560 INST(Xadd , X86Xadd , O(000F00,C0,_,_,x,_,_,_ ), 0 , 4 , 0 , 10067, 387, 36 ), // #1481
1561 INST(Xbegin , X86JmpRel , O(000000,C7,7,_,_,_,_,_ ), 0 , 25 , 0 , 10072, 388, 144), // #1482
1562 INST(Xchg , X86Xchg , O(000000,86,_,_,x,_,_,_ ), 0 , 0 , 0 , 448 , 389, 0 ), // #1483
1563 INST(Xend , X86Op , O(000F01,D5,_,_,_,_,_,_ ), 0 , 21 , 0 , 10079, 30 , 144), // #1484
1564 INST(Xgetbv , X86Op , O(000F01,D0,_,_,_,_,_,_ ), 0 , 21 , 0 , 10084, 162, 145), // #1485
1565 INST(Xlatb , X86Op , O(000000,D7,_,_,_,_,_,_ ), 0 , 0 , 0 , 10091, 30 , 0 ), // #1486
1566 INST(Xor , X86Arith , O(000000,30,6,_,x,_,_,_ ), 0 , 30 , 0 , 9205 , 166, 1 ), // #1487
1567 INST(Xorpd , ExtRm , O(660F00,57,_,_,_,_,_,_ ), 0 , 3 , 0 , 9987 , 140, 4 ), // #1488
1568 INST(Xorps , ExtRm , O(000F00,57,_,_,_,_,_,_ ), 0 , 4 , 0 , 9994 , 140, 5 ), // #1489
1569 INST(Xrstor , X86M_Only , O(000F00,AE,5,_,_,_,_,_ ), 0 , 70 , 0 , 1134 , 390, 145), // #1490
1570 INST(Xrstor64 , X86M_Only , O(000F00,AE,5,_,1,_,_,_ ), 0 , 218, 0 , 1142 , 391, 145), // #1491
1571 INST(Xrstors , X86M_Only , O(000F00,C7,3,_,_,_,_,_ ), 0 , 71 , 0 , 10097, 390, 146), // #1492
1572 INST(Xrstors64 , X86M_Only , O(000F00,C7,3,_,1,_,_,_ ), 0 , 219, 0 , 10105, 391, 146), // #1493
1573 INST(Xsave , X86M_Only , O(000F00,AE,4,_,_,_,_,_ ), 0 , 89 , 0 , 1152 , 390, 145), // #1494
1574 INST(Xsave64 , X86M_Only , O(000F00,AE,4,_,1,_,_,_ ), 0 , 220, 0 , 1159 , 391, 145), // #1495
1575 INST(Xsavec , X86M_Only , O(000F00,C7,4,_,_,_,_,_ ), 0 , 89 , 0 , 10115, 390, 147), // #1496
1576 INST(Xsavec64 , X86M_Only , O(000F00,C7,4,_,1,_,_,_ ), 0 , 220, 0 , 10122, 391, 147), // #1497
1577 INST(Xsaveopt , X86M_Only , O(000F00,AE,6,_,_,_,_,_ ), 0 , 73 , 0 , 10131, 390, 148), // #1498
1578 INST(Xsaveopt64 , X86M_Only , O(000F00,AE,6,_,1,_,_,_ ), 0 , 221, 0 , 10140, 391, 148), // #1499
1579 INST(Xsaves , X86M_Only , O(000F00,C7,5,_,_,_,_,_ ), 0 , 70 , 0 , 10151, 390, 146), // #1500
1580 INST(Xsaves64 , X86M_Only , O(000F00,C7,5,_,1,_,_,_ ), 0 , 218, 0 , 10158, 391, 146), // #1501
1581 INST(Xsetbv , X86Op , O(000F01,D1,_,_,_,_,_,_ ), 0 , 21 , 0 , 10167, 162, 145), // #1502
1582 INST(Xtest , X86Op , O(000F01,D6,_,_,_,_,_,_ ), 0 , 21 , 0 , 10174, 30 , 149) // #1503
1583 // ${InstInfo:End}
1584 };
1585 #undef NAME_DATA_INDEX
1586 #undef INST
1587
1588 // ============================================================================
1589 // [asmjit::x86::InstDB - Opcode Tables]
1590 // ============================================================================
1591
1592 // ${MainOpcodeTable:Begin}
1593 // ------------------- Automatically generated, do not edit -------------------
1594 const uint32_t InstDB::_mainOpcodeTable[] = {
1595 O(000000,00,0,0,0,0,0,_ ), // #0 [ref=55x]
1596 O(000000,00,2,0,0,0,0,_ ), // #1 [ref=4x]
1597 O(660F38,00,0,0,0,0,0,_ ), // #2 [ref=42x]
1598 O(660F00,00,0,0,0,0,0,_ ), // #3 [ref=38x]
1599 O(000F00,00,0,0,0,0,0,_ ), // #4 [ref=231x]
1600 O(F20F00,00,0,0,0,0,0,_ ), // #5 [ref=24x]
1601 O(F30F00,00,0,0,0,0,0,_ ), // #6 [ref=29x]
1602 O(F30F38,00,0,0,0,0,0,_ ), // #7 [ref=2x]
1603 O(660F3A,00,0,0,0,0,0,_ ), // #8 [ref=22x]
1604 O(000000,00,4,0,0,0,0,_ ), // #9 [ref=5x]
1605 V(000F38,00,0,0,0,0,0,_ ), // #10 [ref=3x]
1606 V(XOP_M9,00,1,0,0,0,0,_ ), // #11 [ref=3x]
1607 V(XOP_M9,00,6,0,0,0,0,_ ), // #12 [ref=2x]
1608 V(XOP_M9,00,5,0,0,0,0,_ ), // #13 [ref=1x]
1609 V(XOP_M9,00,3,0,0,0,0,_ ), // #14 [ref=1x]
1610 V(XOP_M9,00,2,0,0,0,0,_ ), // #15 [ref=1x]
1611 V(000F38,00,3,0,0,0,0,_ ), // #16 [ref=1x]
1612 V(000F38,00,2,0,0,0,0,_ ), // #17 [ref=1x]
1613 V(000F38,00,1,0,0,0,0,_ ), // #18 [ref=1x]
1614 O(660000,00,0,0,0,0,0,_ ), // #19 [ref=7x]
1615 O(000000,00,0,0,1,0,0,_ ), // #20 [ref=4x]
1616 O(000F01,00,0,0,0,0,0,_ ), // #21 [ref=25x]
1617 O(000F00,00,7,0,0,0,0,_ ), // #22 [ref=5x]
1618 O(660F00,00,7,0,0,0,0,_ ), // #23 [ref=2x]
1619 O(660F00,00,6,0,0,0,0,_ ), // #24 [ref=2x]
1620 O(000000,00,7,0,0,0,0,_ ), // #25 [ref=5x]
1621 O(000F00,00,1,0,1,0,0,_ ), // #26 [ref=2x]
1622 O(000F00,00,1,0,0,0,0,_ ), // #27 [ref=6x]
1623 O(F20F38,00,0,0,0,0,0,_ ), // #28 [ref=2x]
1624 O(000000,00,1,0,0,0,0,_ ), // #29 [ref=3x]
1625 O(000000,00,6,0,0,0,0,_ ), // #30 [ref=3x]
1626 O_FPU(00,D900,_) , // #31 [ref=29x]
1627 O_FPU(00,C000,0) , // #32 [ref=1x]
1628 O_FPU(00,DE00,_) , // #33 [ref=7x]
1629 O_FPU(00,0000,4) , // #34 [ref=4x]
1630 O_FPU(00,0000,6) , // #35 [ref=4x]
1631 O_FPU(9B,DB00,_) , // #36 [ref=2x]
1632 O_FPU(00,DA00,_) , // #37 [ref=5x]
1633 O_FPU(00,DB00,_) , // #38 [ref=8x]
1634 O_FPU(00,D000,2) , // #39 [ref=1x]
1635 O_FPU(00,DF00,_) , // #40 [ref=2x]
1636 O_FPU(00,D800,3) , // #41 [ref=1x]
1637 O_FPU(00,F000,6) , // #42 [ref=1x]
1638 O_FPU(00,F800,7) , // #43 [ref=1x]
1639 O_FPU(00,DD00,_) , // #44 [ref=3x]
1640 O_FPU(00,0000,0) , // #45 [ref=3x]
1641 O_FPU(00,0000,2) , // #46 [ref=3x]
1642 O_FPU(00,0000,3) , // #47 [ref=3x]
1643 O_FPU(00,0000,7) , // #48 [ref=3x]
1644 O_FPU(00,0000,1) , // #49 [ref=2x]
1645 O_FPU(00,0000,5) , // #50 [ref=2x]
1646 O_FPU(00,C800,1) , // #51 [ref=1x]
1647 O_FPU(9B,0000,6) , // #52 [ref=2x]
1648 O_FPU(9B,0000,7) , // #53 [ref=2x]
1649 O_FPU(00,E000,4) , // #54 [ref=1x]
1650 O_FPU(00,E800,5) , // #55 [ref=1x]
1651 O_FPU(00,0000,_) , // #56 [ref=1x]
1652 O(000F00,00,0,0,1,0,0,_ ), // #57 [ref=1x]
1653 O(000000,00,5,0,0,0,0,_ ), // #58 [ref=3x]
1654 V(660F00,00,0,1,0,0,0,_ ), // #59 [ref=7x]
1655 V(660F00,00,0,1,1,0,0,_ ), // #60 [ref=6x]
1656 V(000F00,00,0,1,1,0,0,_ ), // #61 [ref=7x]
1657 V(000F00,00,0,1,0,0,0,_ ), // #62 [ref=8x]
1658 V(660F00,00,0,0,0,0,0,_ ), // #63 [ref=15x]
1659 V(660F00,00,0,0,1,0,0,_ ), // #64 [ref=4x]
1660 V(000F00,00,0,0,1,0,0,_ ), // #65 [ref=4x]
1661 V(000F00,00,0,0,0,0,0,_ ), // #66 [ref=10x]
1662 V(660F3A,00,0,0,0,0,0,_ ), // #67 [ref=45x]
1663 V(660F3A,00,0,0,1,0,0,_ ), // #68 [ref=4x]
1664 O(000F00,00,2,0,0,0,0,_ ), // #69 [ref=5x]
1665 O(000F00,00,5,0,0,0,0,_ ), // #70 [ref=4x]
1666 O(000F00,00,3,0,0,0,0,_ ), // #71 [ref=5x]
1667 V(XOP_M9,00,0,0,0,0,0,_ ), // #72 [ref=32x]
1668 O(000F00,00,6,0,0,0,0,_ ), // #73 [ref=5x]
1669 V(XOP_MA,00,0,0,0,0,0,_ ), // #74 [ref=1x]
1670 V(XOP_MA,00,1,0,0,0,0,_ ), // #75 [ref=1x]
1671 O(000F38,00,0,0,0,0,0,_ ), // #76 [ref=23x]
1672 V(F20F38,00,0,0,0,0,0,_ ), // #77 [ref=3x]
1673 O(000000,00,3,0,0,0,0,_ ), // #78 [ref=3x]
1674 O(000F3A,00,0,0,0,0,0,_ ), // #79 [ref=4x]
1675 O(F30000,00,0,0,0,0,0,_ ), // #80 [ref=1x]
1676 O(000F0F,00,0,0,0,0,0,_ ), // #81 [ref=26x]
1677 V(F30F38,00,0,0,0,0,0,_ ), // #82 [ref=2x]
1678 O(000F3A,00,0,0,1,0,0,_ ), // #83 [ref=1x]
1679 O(660F3A,00,0,0,1,0,0,_ ), // #84 [ref=1x]
1680 O(F30F00,00,1,0,0,0,0,_ ), // #85 [ref=1x]
1681 O(F30F00,00,7,0,0,0,0,_ ), // #86 [ref=1x]
1682 V(F20F3A,00,0,0,0,0,0,_ ), // #87 [ref=1x]
1683 V(660F38,00,0,0,0,0,0,_ ), // #88 [ref=22x]
1684 O(000F00,00,4,0,0,0,0,_ ), // #89 [ref=4x]
1685 V(XOP_M9,00,7,0,0,0,0,_ ), // #90 [ref=1x]
1686 V(XOP_M9,00,4,0,0,0,0,_ ), // #91 [ref=1x]
1687 E(F20F38,00,0,2,0,0,2,T4X), // #92 [ref=6x]
1688 V(660F00,00,0,0,0,1,4,FV ), // #93 [ref=22x]
1689 V(000F00,00,0,0,0,0,4,FV ), // #94 [ref=16x]
1690 V(F20F00,00,0,0,0,1,3,T1S), // #95 [ref=10x]
1691 V(F30F00,00,0,0,0,0,2,T1S), // #96 [ref=10x]
1692 V(F20F00,00,0,0,0,0,0,_ ), // #97 [ref=4x]
1693 V(660F38,00,0,0,0,0,4,FVM), // #98 [ref=14x]
1694 E(660F3A,00,0,0,0,0,4,FV ), // #99 [ref=14x]
1695 E(660F3A,00,0,0,0,1,4,FV ), // #100 [ref=14x]
1696 E(660F38,00,0,0,0,0,4,FVM), // #101 [ref=9x]
1697 E(660F38,00,0,0,0,0,4,FV ), // #102 [ref=22x]
1698 E(660F38,00,0,0,0,1,4,FV ), // #103 [ref=28x]
1699 E(660F38,00,0,0,0,1,4,FVM), // #104 [ref=9x]
1700 V(660F38,00,0,1,0,0,0,_ ), // #105 [ref=2x]
1701 E(660F38,00,0,0,0,0,3,T2 ), // #106 [ref=2x]
1702 E(660F38,00,0,0,0,0,4,T4 ), // #107 [ref=2x]
1703 E(660F38,00,0,2,0,0,5,T8 ), // #108 [ref=2x]
1704 E(660F38,00,0,0,0,1,4,T2 ), // #109 [ref=2x]
1705 E(660F38,00,0,2,0,1,5,T4 ), // #110 [ref=2x]
1706 V(660F38,00,0,0,0,1,3,T1S), // #111 [ref=2x]
1707 V(660F38,00,0,0,0,0,2,T1S), // #112 [ref=14x]
1708 V(660F00,00,0,0,0,1,3,T1S), // #113 [ref=5x]
1709 V(000F00,00,0,0,0,0,2,T1S), // #114 [ref=2x]
1710 E(660F38,00,0,0,0,1,3,T1S), // #115 [ref=14x]
1711 E(660F38,00,0,0,0,0,2,T1S), // #116 [ref=14x]
1712 V(F30F00,00,0,0,0,0,3,HV ), // #117 [ref=1x]
1713 E(F20F38,00,0,0,0,0,0,_ ), // #118 [ref=1x]
1714 E(F30F38,00,0,0,0,0,0,_ ), // #119 [ref=7x]
1715 V(F20F00,00,0,0,0,1,4,FV ), // #120 [ref=1x]
1716 E(660F00,00,0,0,0,1,4,FV ), // #121 [ref=9x]
1717 E(000F00,00,0,0,0,1,4,FV ), // #122 [ref=3x]
1718 V(660F38,00,0,0,0,0,3,HVM), // #123 [ref=7x]
1719 V(660F00,00,0,0,0,0,4,FV ), // #124 [ref=11x]
1720 V(000F00,00,0,0,0,0,4,HV ), // #125 [ref=1x]
1721 V(660F3A,00,0,0,0,0,3,HVM), // #126 [ref=1x]
1722 E(660F00,00,0,0,0,0,3,HV ), // #127 [ref=4x]
1723 E(000F00,00,0,0,0,0,4,FV ), // #128 [ref=2x]
1724 E(F30F00,00,0,0,0,1,4,FV ), // #129 [ref=2x]
1725 V(F20F00,00,0,0,0,0,3,T1F), // #130 [ref=2x]
1726 E(F20F00,00,0,0,0,0,3,T1F), // #131 [ref=2x]
1727 V(F20F00,00,0,0,0,0,2,T1W), // #132 [ref=1x]
1728 V(F30F00,00,0,0,0,0,2,T1W), // #133 [ref=1x]
1729 V(F30F00,00,0,0,0,0,2,T1F), // #134 [ref=2x]
1730 E(F30F00,00,0,0,0,0,2,T1F), // #135 [ref=2x]
1731 V(F30F00,00,0,0,0,0,4,FV ), // #136 [ref=1x]
1732 E(F30F00,00,0,0,0,0,3,HV ), // #137 [ref=1x]
1733 E(F20F00,00,0,0,0,0,4,FV ), // #138 [ref=1x]
1734 E(F20F00,00,0,0,0,1,4,FV ), // #139 [ref=1x]
1735 E(F20F00,00,0,0,0,0,2,T1W), // #140 [ref=1x]
1736 E(F30F00,00,0,0,0,0,2,T1W), // #141 [ref=1x]
1737 E(660F3A,00,0,0,0,0,4,FVM), // #142 [ref=5x]
1738 E(660F38,00,0,2,0,1,4,FV ), // #143 [ref=3x]
1739 E(660F38,00,0,2,0,0,4,FV ), // #144 [ref=3x]
1740 V(660F3A,00,0,1,0,0,0,_ ), // #145 [ref=6x]
1741 E(660F3A,00,0,0,0,0,4,T4 ), // #146 [ref=4x]
1742 E(660F3A,00,0,2,0,0,5,T8 ), // #147 [ref=4x]
1743 E(660F3A,00,0,0,0,1,4,T2 ), // #148 [ref=4x]
1744 E(660F3A,00,0,2,0,1,5,T4 ), // #149 [ref=4x]
1745 V(660F3A,00,0,0,0,0,2,T1S), // #150 [ref=4x]
1746 E(660F3A,00,0,0,0,1,3,T1S), // #151 [ref=6x]
1747 E(660F3A,00,0,0,0,0,2,T1S), // #152 [ref=6x]
1748 V(660F38,00,0,0,1,1,4,FV ), // #153 [ref=20x]
1749 V(660F38,00,0,0,0,0,4,FV ), // #154 [ref=32x]
1750 V(660F38,00,0,0,1,1,3,T1S), // #155 [ref=12x]
1751 V(660F38,00,0,0,1,0,0,_ ), // #156 [ref=5x]
1752 E(660F38,00,1,2,0,1,3,T1S), // #157 [ref=2x]
1753 E(660F38,00,1,2,0,0,2,T1S), // #158 [ref=2x]
1754 E(660F38,00,2,2,0,1,3,T1S), // #159 [ref=2x]
1755 E(660F38,00,2,2,0,0,2,T1S), // #160 [ref=2x]
1756 V(660F3A,00,0,0,1,1,4,FV ), // #161 [ref=2x]
1757 V(000F00,00,2,0,0,0,0,_ ), // #162 [ref=1x]
1758 V(660F00,00,0,0,0,1,4,FVM), // #163 [ref=3x]
1759 V(000F00,00,0,0,0,0,4,FVM), // #164 [ref=3x]
1760 V(660F00,00,0,0,0,0,2,T1S), // #165 [ref=1x]
1761 V(F20F00,00,0,0,0,1,3,DUP), // #166 [ref=1x]
1762 E(660F00,00,0,0,0,0,4,FVM), // #167 [ref=1x]
1763 E(660F00,00,0,0,0,1,4,FVM), // #168 [ref=1x]
1764 V(F30F00,00,0,0,0,0,0,_ ), // #169 [ref=3x]
1765 E(F20F00,00,0,0,0,1,4,FVM), // #170 [ref=1x]
1766 E(F30F00,00,0,0,0,0,4,FVM), // #171 [ref=1x]
1767 E(F30F00,00,0,0,0,1,4,FVM), // #172 [ref=1x]
1768 E(F20F00,00,0,0,0,0,4,FVM), // #173 [ref=1x]
1769 V(000F00,00,0,0,0,0,3,T2 ), // #174 [ref=2x]
1770 V(660F00,00,0,0,0,0,4,FVM), // #175 [ref=33x]
1771 V(F30F00,00,0,0,0,0,4,FVM), // #176 [ref=3x]
1772 O(F30F00,00,6,0,0,0,0,_ ), // #177 [ref=1x]
1773 V(660F3A,00,0,0,0,0,4,FVM), // #178 [ref=2x]
1774 E(660F00,00,0,0,0,0,4,FV ), // #179 [ref=5x]
1775 V(660F38,00,0,0,0,0,0,T1S), // #180 [ref=1x]
1776 E(F30F38,00,0,0,0,1,0,_ ), // #181 [ref=5x]
1777 V(660F38,00,0,0,0,0,1,T1S), // #182 [ref=1x]
1778 V(XOP_M8,00,0,0,0,0,0,_ ), // #183 [ref=22x]
1779 V(660F38,00,0,0,0,1,4,FVM), // #184 [ref=2x]
1780 E(660F3A,00,0,0,0,1,4,FVM), // #185 [ref=2x]
1781 E(660F38,00,0,0,0,0,0,T1S), // #186 [ref=2x]
1782 E(660F38,00,0,0,0,1,1,T1S), // #187 [ref=2x]
1783 V(660F38,00,0,0,0,1,4,FV ), // #188 [ref=3x]
1784 E(660F38,00,0,0,1,1,4,FV ), // #189 [ref=1x]
1785 V(660F3A,00,0,0,0,0,0,T1S), // #190 [ref=2x]
1786 V(660F3A,00,0,0,1,1,3,T1S), // #191 [ref=2x]
1787 V(660F3A,00,0,0,0,0,1,T1S), // #192 [ref=1x]
1788 V(660F00,00,0,0,0,0,1,T1S), // #193 [ref=1x]
1789 E(F30F38,00,0,0,0,0,2,QVM), // #194 [ref=6x]
1790 E(F30F38,00,0,0,0,0,3,HVM), // #195 [ref=9x]
1791 E(F30F38,00,0,0,0,0,1,OVM), // #196 [ref=3x]
1792 V(660F38,00,0,0,0,0,2,QVM), // #197 [ref=4x]
1793 V(660F38,00,0,0,0,0,1,OVM), // #198 [ref=2x]
1794 E(660F00,00,1,0,0,0,4,FV ), // #199 [ref=1x]
1795 E(660F00,00,1,0,0,1,4,FV ), // #200 [ref=1x]
1796 V(F20F00,00,0,0,0,0,4,FVM), // #201 [ref=1x]
1797 V(660F00,00,0,0,0,0,4,128), // #202 [ref=5x]
1798 V(660F00,00,7,0,0,0,4,FVM), // #203 [ref=1x]
1799 V(660F00,00,0,0,0,1,4,128), // #204 [ref=2x]
1800 E(660F00,00,0,0,0,1,4,128), // #205 [ref=1x]
1801 V(660F00,00,3,0,0,0,4,FVM), // #206 [ref=1x]
1802 E(F30F38,00,0,0,0,0,4,FVM), // #207 [ref=1x]
1803 E(F30F38,00,0,0,0,0,4,FV ), // #208 [ref=1x]
1804 E(F30F38,00,0,0,0,1,4,FV ), // #209 [ref=1x]
1805 E(F30F38,00,0,0,0,1,4,FVM), // #210 [ref=1x]
1806 E(660F38,00,5,2,0,1,3,T1S), // #211 [ref=2x]
1807 E(660F38,00,5,2,0,0,2,T1S), // #212 [ref=2x]
1808 E(660F38,00,6,2,0,1,3,T1S), // #213 [ref=2x]
1809 E(660F38,00,6,2,0,0,2,T1S), // #214 [ref=2x]
1810 V(000F00,00,3,0,0,0,0,_ ), // #215 [ref=1x]
1811 O(F30F00,00,2,0,0,0,0,_ ), // #216 [ref=1x]
1812 O(F30F00,00,3,0,0,0,0,_ ), // #217 [ref=1x]
1813 O(000F00,00,5,0,1,0,0,_ ), // #218 [ref=2x]
1814 O(000F00,00,3,0,1,0,0,_ ), // #219 [ref=1x]
1815 O(000F00,00,4,0,1,0,0,_ ), // #220 [ref=2x]
1816 O(000F00,00,6,0,1,0,0,_ ) // #221 [ref=1x]
1817 };
1818 // ----------------------------------------------------------------------------
1819 // ${MainOpcodeTable:End}
1820
1821 // ${AltOpcodeTable:Begin}
1822 // ------------------- Automatically generated, do not edit -------------------
1823 const uint32_t InstDB::_altOpcodeTable[] = {
1824 0 , // #0 [ref=1359x]
1825 O(660F00,1B,_,_,_,_,_,_ ), // #1 [ref=1x]
1826 O(000F00,BA,4,_,x,_,_,_ ), // #2 [ref=1x]
1827 O(000F00,BA,7,_,x,_,_,_ ), // #3 [ref=1x]
1828 O(000F00,BA,6,_,x,_,_,_ ), // #4 [ref=1x]
1829 O(000F00,BA,5,_,x,_,_,_ ), // #5 [ref=1x]
1830 O(000000,48,_,_,x,_,_,_ ), // #6 [ref=1x]
1831 O(660F00,78,0,_,_,_,_,_ ), // #7 [ref=1x]
1832 O_FPU(00,00DF,5) , // #8 [ref=1x]
1833 O_FPU(00,00DF,7) , // #9 [ref=1x]
1834 O_FPU(00,00DD,1) , // #10 [ref=1x]
1835 O_FPU(00,00DB,5) , // #11 [ref=1x]
1836 O_FPU(00,DFE0,_) , // #12 [ref=1x]
1837 O(000000,DB,7,_,_,_,_,_ ), // #13 [ref=1x]
1838 O_FPU(9B,DFE0,_) , // #14 [ref=1x]
1839 O(000000,E4,_,_,_,_,_,_ ), // #15 [ref=1x]
1840 O(000000,40,_,_,x,_,_,_ ), // #16 [ref=1x]
1841 O(F20F00,78,_,_,_,_,_,_ ), // #17 [ref=1x]
1842 O(000000,77,_,_,_,_,_,_ ), // #18 [ref=2x]
1843 O(000000,73,_,_,_,_,_,_ ), // #19 [ref=3x]
1844 O(000000,72,_,_,_,_,_,_ ), // #20 [ref=3x]
1845 O(000000,76,_,_,_,_,_,_ ), // #21 [ref=2x]
1846 O(000000,74,_,_,_,_,_,_ ), // #22 [ref=2x]
1847 O(000000,E3,_,_,_,_,_,_ ), // #23 [ref=1x]
1848 O(000000,7F,_,_,_,_,_,_ ), // #24 [ref=2x]
1849 O(000000,7D,_,_,_,_,_,_ ), // #25 [ref=2x]
1850 O(000000,7C,_,_,_,_,_,_ ), // #26 [ref=2x]
1851 O(000000,7E,_,_,_,_,_,_ ), // #27 [ref=2x]
1852 O(000000,EB,_,_,_,_,_,_ ), // #28 [ref=1x]
1853 O(000000,75,_,_,_,_,_,_ ), // #29 [ref=2x]
1854 O(000000,71,_,_,_,_,_,_ ), // #30 [ref=1x]
1855 O(000000,7B,_,_,_,_,_,_ ), // #31 [ref=2x]
1856 O(000000,79,_,_,_,_,_,_ ), // #32 [ref=1x]
1857 O(000000,70,_,_,_,_,_,_ ), // #33 [ref=1x]
1858 O(000000,7A,_,_,_,_,_,_ ), // #34 [ref=2x]
1859 O(000000,78,_,_,_,_,_,_ ), // #35 [ref=1x]
1860 V(660F00,92,_,0,0,_,_,_ ), // #36 [ref=1x]
1861 V(F20F00,92,_,0,0,_,_,_ ), // #37 [ref=1x]
1862 V(F20F00,92,_,0,1,_,_,_ ), // #38 [ref=1x]
1863 V(000F00,92,_,0,0,_,_,_ ), // #39 [ref=1x]
1864 O(000000,E2,_,_,_,_,_,_ ), // #40 [ref=1x]
1865 O(000000,E1,_,_,_,_,_,_ ), // #41 [ref=1x]
1866 O(000000,E0,_,_,_,_,_,_ ), // #42 [ref=1x]
1867 O(660F00,29,_,_,_,_,_,_ ), // #43 [ref=1x]
1868 O(000F00,29,_,_,_,_,_,_ ), // #44 [ref=1x]
1869 O(000F38,F1,_,_,x,_,_,_ ), // #45 [ref=1x]
1870 O(000F00,7E,_,_,_,_,_,_ ), // #46 [ref=1x]
1871 O(660F00,7F,_,_,_,_,_,_ ), // #47 [ref=1x]
1872 O(F30F00,7F,_,_,_,_,_,_ ), // #48 [ref=1x]
1873 O(660F00,17,_,_,_,_,_,_ ), // #49 [ref=1x]
1874 O(000F00,17,_,_,_,_,_,_ ), // #50 [ref=1x]
1875 O(660F00,13,_,_,_,_,_,_ ), // #51 [ref=1x]
1876 O(000F00,13,_,_,_,_,_,_ ), // #52 [ref=1x]
1877 O(660F00,E7,_,_,_,_,_,_ ), // #53 [ref=1x]
1878 O(660F00,2B,_,_,_,_,_,_ ), // #54 [ref=1x]
1879 O(000F00,2B,_,_,_,_,_,_ ), // #55 [ref=1x]
1880 O(000F00,E7,_,_,_,_,_,_ ), // #56 [ref=1x]
1881 O(F20F00,2B,_,_,_,_,_,_ ), // #57 [ref=1x]
1882 O(F30F00,2B,_,_,_,_,_,_ ), // #58 [ref=1x]
1883 O(000F00,7E,_,_,x,_,_,_ ), // #59 [ref=1x]
1884 O(F20F00,11,_,_,_,_,_,_ ), // #60 [ref=1x]
1885 O(F30F00,11,_,_,_,_,_,_ ), // #61 [ref=1x]
1886 O(660F00,11,_,_,_,_,_,_ ), // #62 [ref=1x]
1887 O(000F00,11,_,_,_,_,_,_ ), // #63 [ref=1x]
1888 O(000000,E6,_,_,_,_,_,_ ), // #64 [ref=1x]
1889 O(000F3A,15,_,_,_,_,_,_ ), // #65 [ref=1x]
1890 O(000000,58,_,_,_,_,_,_ ), // #66 [ref=1x]
1891 O(000F00,72,6,_,_,_,_,_ ), // #67 [ref=1x]
1892 O(660F00,73,7,_,_,_,_,_ ), // #68 [ref=1x]
1893 O(000F00,73,6,_,_,_,_,_ ), // #69 [ref=1x]
1894 O(000F00,71,6,_,_,_,_,_ ), // #70 [ref=1x]
1895 O(000F00,72,4,_,_,_,_,_ ), // #71 [ref=1x]
1896 O(000F00,71,4,_,_,_,_,_ ), // #72 [ref=1x]
1897 O(000F00,72,2,_,_,_,_,_ ), // #73 [ref=1x]
1898 O(660F00,73,3,_,_,_,_,_ ), // #74 [ref=1x]
1899 O(000F00,73,2,_,_,_,_,_ ), // #75 [ref=1x]
1900 O(000F00,71,2,_,_,_,_,_ ), // #76 [ref=1x]
1901 O(000000,50,_,_,_,_,_,_ ), // #77 [ref=1x]
1902 O(000000,F6,_,_,x,_,_,_ ), // #78 [ref=1x]
1903 V(660F38,92,_,x,_,1,3,T1S), // #79 [ref=1x]
1904 V(660F38,92,_,x,_,0,2,T1S), // #80 [ref=1x]
1905 V(660F38,93,_,x,_,1,3,T1S), // #81 [ref=1x]
1906 V(660F38,93,_,x,_,0,2,T1S), // #82 [ref=1x]
1907 V(660F38,2F,_,x,0,_,_,_ ), // #83 [ref=1x]
1908 V(660F38,2E,_,x,0,_,_,_ ), // #84 [ref=1x]
1909 V(660F00,29,_,x,I,1,4,FVM), // #85 [ref=1x]
1910 V(000F00,29,_,x,I,0,4,FVM), // #86 [ref=1x]
1911 V(660F00,7E,_,0,0,0,2,T1S), // #87 [ref=1x]
1912 V(660F00,7F,_,x,I,_,_,_ ), // #88 [ref=1x]
1913 E(660F00,7F,_,x,_,0,4,FVM), // #89 [ref=1x]
1914 E(660F00,7F,_,x,_,1,4,FVM), // #90 [ref=1x]
1915 V(F30F00,7F,_,x,I,_,_,_ ), // #91 [ref=1x]
1916 E(F20F00,7F,_,x,_,1,4,FVM), // #92 [ref=1x]
1917 E(F30F00,7F,_,x,_,0,4,FVM), // #93 [ref=1x]
1918 E(F30F00,7F,_,x,_,1,4,FVM), // #94 [ref=1x]
1919 E(F20F00,7F,_,x,_,0,4,FVM), // #95 [ref=1x]
1920 V(660F00,17,_,0,I,1,3,T1S), // #96 [ref=1x]
1921 V(000F00,17,_,0,I,0,3,T2 ), // #97 [ref=1x]
1922 V(660F00,13,_,0,I,1,3,T1S), // #98 [ref=1x]
1923 V(000F00,13,_,0,I,0,3,T2 ), // #99 [ref=1x]
1924 V(660F00,7E,_,0,I,1,3,T1S), // #100 [ref=1x]
1925 V(F20F00,11,_,I,I,1,3,T1S), // #101 [ref=1x]
1926 V(F30F00,11,_,I,I,0,2,T1S), // #102 [ref=1x]
1927 V(660F00,11,_,x,I,1,4,FVM), // #103 [ref=1x]
1928 V(000F00,11,_,x,I,0,4,FVM), // #104 [ref=1x]
1929 E(660F38,7A,_,x,0,0,0,T1S), // #105 [ref=1x]
1930 E(660F38,7C,_,x,0,0,0,T1S), // #106 [ref=1x]
1931 E(660F38,7C,_,x,0,1,0,T1S), // #107 [ref=1x]
1932 E(660F38,7B,_,x,0,0,0,T1S), // #108 [ref=1x]
1933 V(660F3A,05,_,x,0,1,4,FV ), // #109 [ref=1x]
1934 V(660F3A,04,_,x,0,0,4,FV ), // #110 [ref=1x]
1935 V(660F3A,01,_,x,1,1,4,FV ), // #111 [ref=1x]
1936 V(660F3A,00,_,x,1,1,4,FV ), // #112 [ref=1x]
1937 V(660F38,90,_,x,_,0,2,T1S), // #113 [ref=1x]
1938 V(660F38,90,_,x,_,1,3,T1S), // #114 [ref=1x]
1939 V(660F38,91,_,x,_,0,2,T1S), // #115 [ref=1x]
1940 V(660F38,91,_,x,_,1,3,T1S), // #116 [ref=1x]
1941 V(660F38,8E,_,x,0,_,_,_ ), // #117 [ref=1x]
1942 V(660F38,8E,_,x,1,_,_,_ ), // #118 [ref=1x]
1943 V(XOP_M8,C0,_,0,x,_,_,_ ), // #119 [ref=1x]
1944 V(XOP_M8,C2,_,0,x,_,_,_ ), // #120 [ref=1x]
1945 V(XOP_M8,C3,_,0,x,_,_,_ ), // #121 [ref=1x]
1946 V(XOP_M8,C1,_,0,x,_,_,_ ), // #122 [ref=1x]
1947 V(660F00,72,6,x,I,0,4,FV ), // #123 [ref=1x]
1948 V(660F00,73,6,x,I,1,4,FV ), // #124 [ref=1x]
1949 V(660F00,71,6,x,I,I,4,FVM), // #125 [ref=1x]
1950 V(660F00,72,4,x,I,0,4,FV ), // #126 [ref=1x]
1951 E(660F00,72,4,x,_,1,4,FV ), // #127 [ref=1x]
1952 V(660F00,71,4,x,I,I,4,FVM), // #128 [ref=1x]
1953 V(660F00,72,2,x,I,0,4,FV ), // #129 [ref=1x]
1954 V(660F00,73,2,x,I,1,4,FV ), // #130 [ref=1x]
1955 V(660F00,71,2,x,I,I,4,FVM) // #131 [ref=1x]
1956 };
1957 // ----------------------------------------------------------------------------
1958 // ${AltOpcodeTable:End}
1959
1960 #undef O_FPU
1961 #undef O
1962 #undef V
1963 #undef E
1964
1965 // ============================================================================
1966 // [asmjit::x86::InstDB - CommonInfoTableA]
1967 // ============================================================================
1968
1969 // ${InstCommonTable:Begin}
1970 // ------------------- Automatically generated, do not edit -------------------
1971 #define F(VAL) InstDB::kFlag##VAL
1972 #define CONTROL(VAL) Inst::kControl##VAL
1973 #define SINGLE_REG(VAL) InstDB::kSingleReg##VAL
1974 const InstDB::CommonInfo InstDB::_commonInfoTable[] = {
1975 { 0 , 0 , 0 , CONTROL(None) , SINGLE_REG(None), 0 }, // #0 [ref=1x]
1976 { 0 , 339, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #1 [ref=4x]
1977 { 0 , 340, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #2 [ref=2x]
1978 { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #3 [ref=2x]
1979 { 0 , 151, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #4 [ref=2x]
1980 { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #5 [ref=54x]
1981 { F(Vec) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #6 [ref=19x]
1982 { F(Vec) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #7 [ref=16x]
1983 { F(Vec) , 183, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #8 [ref=20x]
1984 { F(Lock)|F(XAcquire)|F(XRelease) , 28 , 11, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #9 [ref=1x]
1985 { F(Vex) , 237, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #10 [ref=3x]
1986 { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #11 [ref=12x]
1987 { 0 , 341, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #12 [ref=1x]
1988 { F(Vex) , 239, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #13 [ref=5x]
1989 { F(Vex) , 151, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #14 [ref=12x]
1990 { F(Vec) , 342, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #15 [ref=4x]
1991 { 0 , 241, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #16 [ref=3x]
1992 { F(Mib) , 343, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #17 [ref=1x]
1993 { 0 , 344, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #18 [ref=1x]
1994 { 0 , 243, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #19 [ref=1x]
1995 { F(Mib) , 345, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #20 [ref=1x]
1996 { 0 , 245, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #21 [ref=1x]
1997 { 0 , 150, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #22 [ref=35x]
1998 { 0 , 346, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #23 [ref=3x]
1999 { 0 , 114, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #24 [ref=1x]
2000 { F(Lock)|F(XAcquire)|F(XRelease) , 114, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #25 [ref=3x]
2001 { F(Rep)|F(RepIgnored) , 247, 2 , CONTROL(Call) , SINGLE_REG(None), 0 }, // #26 [ref=1x]
2002 { 0 , 347, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #27 [ref=1x]
2003 { 0 , 348, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #28 [ref=2x]
2004 { 0 , 322, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #29 [ref=1x]
2005 { 0 , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #30 [ref=74x]
2006 { 0 , 349, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #31 [ref=24x]
2007 { 0 , 350, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #32 [ref=1x]
2008 { 0 , 16 , 12, CONTROL(None) , SINGLE_REG(None), 0 }, // #33 [ref=1x]
2009 { F(Rep) , 351, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #34 [ref=1x]
2010 { F(Vec) , 352, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #35 [ref=2x]
2011 { F(Vec) , 353, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #36 [ref=3x]
2012 { F(Lock)|F(XAcquire)|F(XRelease) , 118, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #37 [ref=1x]
2013 { F(Lock)|F(XAcquire)|F(XRelease) , 354, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #38 [ref=1x]
2014 { F(Lock)|F(XAcquire)|F(XRelease) , 355, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #39 [ref=1x]
2015 { 0 , 356, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #40 [ref=1x]
2016 { 0 , 357, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #41 [ref=1x]
2017 { 0 , 249, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #42 [ref=1x]
2018 { F(Mmx)|F(Vec) , 358, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #43 [ref=2x]
2019 { F(Mmx)|F(Vec) , 359, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #44 [ref=2x]
2020 { F(Mmx)|F(Vec) , 360, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #45 [ref=2x]
2021 { F(Vec) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #46 [ref=2x]
2022 { F(Vec) , 362, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #47 [ref=2x]
2023 { F(Vec) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #48 [ref=2x]
2024 { 0 , 364, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #49 [ref=1x]
2025 { 0 , 365, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #50 [ref=2x]
2026 { F(Lock)|F(XAcquire)|F(XRelease) , 251, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #51 [ref=2x]
2027 { 0 , 39 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #52 [ref=3x]
2028 { F(Mmx) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #53 [ref=1x]
2029 { 0 , 253, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #54 [ref=2x]
2030 { 0 , 366, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #55 [ref=1x]
2031 { F(Vec) , 367, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #56 [ref=2x]
2032 { F(Vec) , 255, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #57 [ref=1x]
2033 { F(FpuM32)|F(FpuM64) , 153, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #58 [ref=6x]
2034 { 0 , 257, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #59 [ref=9x]
2035 { F(FpuM80) , 368, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #60 [ref=2x]
2036 { 0 , 258, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #61 [ref=13x]
2037 { F(FpuM32)|F(FpuM64) , 259, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #62 [ref=2x]
2038 { F(FpuM16)|F(FpuM32) , 369, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #63 [ref=9x]
2039 { F(FpuM16)|F(FpuM32)|F(FpuM64) , 370, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #64 [ref=3x]
2040 { F(FpuM32)|F(FpuM64)|F(FpuM80) , 371, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #65 [ref=2x]
2041 { F(FpuM16) , 372, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #66 [ref=3x]
2042 { F(FpuM16) , 373, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #67 [ref=2x]
2043 { F(FpuM32)|F(FpuM64) , 260, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #68 [ref=1x]
2044 { 0 , 374, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #69 [ref=2x]
2045 { 0 , 39 , 10, CONTROL(None) , SINGLE_REG(None), 0 }, // #70 [ref=1x]
2046 { 0 , 375, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #71 [ref=1x]
2047 { F(Rep) , 376, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #72 [ref=1x]
2048 { F(Vec) , 261, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #73 [ref=1x]
2049 { 0 , 377, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #74 [ref=2x]
2050 { 0 , 378, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #75 [ref=8x]
2051 { 0 , 263, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #76 [ref=3x]
2052 { 0 , 265, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #77 [ref=1x]
2053 { 0 , 257, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #78 [ref=3x]
2054 { 0 , 379, 1 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #79 [ref=1x]
2055 { F(Rep)|F(RepIgnored) , 267, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #80 [ref=30x]
2056 { F(Rep)|F(RepIgnored) , 269, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #81 [ref=1x]
2057 { F(Rep)|F(RepIgnored) , 271, 2 , CONTROL(Jump) , SINGLE_REG(None), 0 }, // #82 [ref=1x]
2058 { F(Vec)|F(Vex) , 380, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #83 [ref=27x]
2059 { F(Vec)|F(Vex) , 273, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #84 [ref=1x]
2060 { F(Vec)|F(Vex) , 275, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #85 [ref=1x]
2061 { F(Vec)|F(Vex) , 277, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #86 [ref=1x]
2062 { F(Vec)|F(Vex) , 279, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #87 [ref=1x]
2063 { F(Vec)|F(Vex) , 381, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #88 [ref=12x]
2064 { F(Vec)|F(Vex) , 382, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #89 [ref=8x]
2065 { 0 , 383, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #90 [ref=2x]
2066 { 0 , 281, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #91 [ref=1x]
2067 { F(Vec) , 192, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #92 [ref=2x]
2068 { 0 , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #93 [ref=2x]
2069 { 0 , 283, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #94 [ref=2x]
2070 { 0 , 385, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #95 [ref=1x]
2071 { 0 , 156, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #96 [ref=3x]
2072 { 0 , 386, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #97 [ref=5x]
2073 { F(Vex) , 387, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #98 [ref=2x]
2074 { F(Rep) , 388, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #99 [ref=1x]
2075 { 0 , 269, 2 , CONTROL(Branch) , SINGLE_REG(None), 0 }, // #100 [ref=3x]
2076 { 0 , 285, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #101 [ref=1x]
2077 { F(Vex) , 389, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #102 [ref=2x]
2078 { F(Vec) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #103 [ref=1x]
2079 { F(Mmx) , 391, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #104 [ref=1x]
2080 { 0 , 392, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #105 [ref=2x]
2081 { F(XRelease) , 0 , 16, CONTROL(None) , SINGLE_REG(None), 0 }, // #106 [ref=1x]
2082 { F(Vec) , 70 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #107 [ref=6x]
2083 { 0 , 64 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #108 [ref=1x]
2084 { F(Mmx)|F(Vec) , 287, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #109 [ref=1x]
2085 { 0 , 393, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #110 [ref=1x]
2086 { 0 , 68 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #111 [ref=2x]
2087 { F(Mmx)|F(Vec) , 394, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #112 [ref=1x]
2088 { F(Vec) , 256, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #113 [ref=2x]
2089 { F(Vec) , 198, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #114 [ref=4x]
2090 { F(Vec) , 395, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #115 [ref=2x]
2091 { F(Vec) , 71 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #116 [ref=3x]
2092 { F(Mmx) , 396, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #117 [ref=1x]
2093 { F(Vec) , 98 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #118 [ref=1x]
2094 { F(Vec) , 201, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #119 [ref=1x]
2095 { F(Mmx)|F(Vec) , 94 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #120 [ref=1x]
2096 { F(Mmx)|F(Vec) , 397, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #121 [ref=1x]
2097 { F(Rep) , 398, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #122 [ref=1x]
2098 { F(Vec) , 97 , 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #123 [ref=1x]
2099 { F(Vec) , 289, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #124 [ref=1x]
2100 { 0 , 291, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #125 [ref=2x]
2101 { 0 , 399, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #126 [ref=1x]
2102 { F(Vex) , 293, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #127 [ref=1x]
2103 { 0 , 400, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #128 [ref=1x]
2104 { 0 , 401, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #129 [ref=1x]
2105 { F(Lock)|F(XAcquire)|F(XRelease) , 252, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #130 [ref=2x]
2106 { 0 , 295, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #131 [ref=1x]
2107 { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(RO) , 0 }, // #132 [ref=1x]
2108 { 0 , 402, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #133 [ref=1x]
2109 { F(Rep) , 403, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #134 [ref=1x]
2110 { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #135 [ref=40x]
2111 { F(Mmx)|F(Vec) , 299, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #136 [ref=1x]
2112 { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #137 [ref=6x]
2113 { F(Mmx)|F(Vec) , 297, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #138 [ref=16x]
2114 { F(Mmx) , 297, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #139 [ref=26x]
2115 { F(Vec) , 70 , 1 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #140 [ref=4x]
2116 { F(Vec) , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #141 [ref=1x]
2117 { F(Vec) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #142 [ref=1x]
2118 { F(Vec) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #143 [ref=1x]
2119 { F(Vec) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #144 [ref=1x]
2120 { F(Vec) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #145 [ref=1x]
2121 { F(Vec) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #146 [ref=1x]
2122 { F(Mmx)|F(Vec) , 301, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #147 [ref=1x]
2123 { F(Vec) , 410, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #148 [ref=1x]
2124 { F(Vec) , 411, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #149 [ref=1x]
2125 { F(Vec) , 412, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #150 [ref=1x]
2126 { F(Mmx)|F(Vec) , 413, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #151 [ref=1x]
2127 { F(Mmx)|F(Vec) , 414, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #152 [ref=1x]
2128 { F(Vec) , 225, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #153 [ref=2x]
2129 { 0 , 122, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #154 [ref=1x]
2130 { 0 , 379, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #155 [ref=6x]
2131 { F(Mmx) , 299, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #156 [ref=1x]
2132 { F(Mmx)|F(Vec) , 303, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #157 [ref=8x]
2133 { F(Vec) , 415, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #158 [ref=2x]
2134 { 0 , 126, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #159 [ref=1x]
2135 { 0 , 416, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #160 [ref=8x]
2136 { 0 , 417, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #161 [ref=4x]
2137 { 0 , 418, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #162 [ref=6x]
2138 { 0 , 305, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #163 [ref=1x]
2139 { F(Rep)|F(RepIgnored) , 307, 2 , CONTROL(Return) , SINGLE_REG(None), 0 }, // #164 [ref=1x]
2140 { F(Vex) , 309, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #165 [ref=1x]
2141 { F(Lock)|F(XAcquire)|F(XRelease) , 16 , 12, CONTROL(None) , SINGLE_REG(WO) , 0 }, // #166 [ref=3x]
2142 { F(Rep) , 419, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #167 [ref=1x]
2143 { 0 , 420, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #168 [ref=30x]
2144 { 0 , 159, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #169 [ref=2x]
2145 { 0 , 421, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #170 [ref=3x]
2146 { F(Rep) , 422, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #171 [ref=1x]
2147 { 0 , 57 , 7 , CONTROL(None) , SINGLE_REG(None), 0 }, // #172 [ref=1x]
2148 { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 423, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #173 [ref=4x]
2149 { F(Vec)|F(Evex)|F(Avx512T4X)|F(Avx512KZ) , 424, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #174 [ref=2x]
2150 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #175 [ref=22x]
2151 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #176 [ref=22x]
2152 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #177 [ref=18x]
2153 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #178 [ref=17x]
2154 { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #179 [ref=15x]
2155 { F(Vec)|F(Vex)|F(Evex) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #180 [ref=5x]
2156 { F(Vec)|F(Vex) , 70 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #181 [ref=17x]
2157 { F(Vec)|F(Vex) , 183, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #182 [ref=1x]
2158 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #183 [ref=4x]
2159 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #184 [ref=4x]
2160 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #185 [ref=10x]
2161 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #186 [ref=12x]
2162 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #187 [ref=2x]
2163 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #188 [ref=6x]
2164 { F(Vec)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #189 [ref=13x]
2165 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #190 [ref=16x]
2166 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #191 [ref=19x]
2167 { F(Vec)|F(Vex) , 165, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #192 [ref=6x]
2168 { F(Vec)|F(Vex) , 311, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #193 [ref=3x]
2169 { F(Vec)|F(Vex) , 427, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #194 [ref=2x]
2170 { F(Vec)|F(Evex)|F(Avx512KZ) , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #195 [ref=1x]
2171 { F(Vec)|F(Evex)|F(Avx512KZ) , 429, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #196 [ref=4x]
2172 { F(Vec)|F(Evex)|F(Avx512KZ) , 430, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #197 [ref=4x]
2173 { F(Vec)|F(Evex)|F(Avx512KZ) , 431, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #198 [ref=1x]
2174 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 428, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #199 [ref=1x]
2175 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 432, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #200 [ref=1x]
2176 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 168, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #201 [ref=1x]
2177 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 168, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #202 [ref=1x]
2178 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 433, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #203 [ref=1x]
2179 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 434, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #204 [ref=1x]
2180 { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #205 [ref=2x]
2181 { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #206 [ref=2x]
2182 { F(Vec)|F(Evex)|F(Avx512KZ) , 171, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #207 [ref=6x]
2183 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #208 [ref=1x]
2184 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #209 [ref=3x]
2185 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #210 [ref=1x]
2186 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #211 [ref=2x]
2187 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #212 [ref=4x]
2188 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #213 [ref=3x]
2189 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #214 [ref=1x]
2190 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #215 [ref=1x]
2191 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 180, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #216 [ref=1x]
2192 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #217 [ref=2x]
2193 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #218 [ref=2x]
2194 { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #219 [ref=1x]
2195 { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #220 [ref=1x]
2196 { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #221 [ref=2x]
2197 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #222 [ref=3x]
2198 { F(Vec)|F(Vex)|F(Evex)|F(Avx512ER_SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #223 [ref=1x]
2199 { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #224 [ref=1x]
2200 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #225 [ref=1x]
2201 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #226 [ref=3x]
2202 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 313, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #227 [ref=1x]
2203 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #228 [ref=1x]
2204 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #229 [ref=2x]
2205 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #230 [ref=2x]
2206 { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #231 [ref=1x]
2207 { F(Vec)|F(Evex)|F(Avx512SAE) , 361, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #232 [ref=1x]
2208 { F(Vec)|F(Vex)|F(Evex)|F(Avx512SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #233 [ref=1x]
2209 { F(Vec)|F(Evex)|F(Avx512SAE) , 363, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #234 [ref=1x]
2210 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #235 [ref=1x]
2211 { F(Vec)|F(Evex)|F(Avx512ER_SAE) , 435, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #236 [ref=2x]
2212 { F(Vec)|F(Evex)|F(Avx512KZ) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #237 [ref=3x]
2213 { F(Vec)|F(Vex) , 165, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #238 [ref=9x]
2214 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #239 [ref=3x]
2215 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 74 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #240 [ref=3x]
2216 { F(Vec)|F(Evex)|F(Avx512KZ) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #241 [ref=9x]
2217 { F(Vec)|F(Vex) , 181, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #242 [ref=2x]
2218 { F(Vec)|F(Evex)|F(Avx512KZ) , 436, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #243 [ref=4x]
2219 { F(Vec)|F(Evex)|F(Avx512KZ) , 182, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #244 [ref=4x]
2220 { F(Vec)|F(Vex)|F(Evex) , 367, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #245 [ref=2x]
2221 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #246 [ref=2x]
2222 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #247 [ref=2x]
2223 { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #248 [ref=4x]
2224 { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #249 [ref=4x]
2225 { F(Vec)|F(Vex) , 130, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #250 [ref=13x]
2226 { F(Vec)|F(Vex) , 315, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #251 [ref=4x]
2227 { F(Vec)|F(Vex) , 317, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #252 [ref=4x]
2228 { F(Vec)|F(Evex)|F(Avx512K_B64) , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #253 [ref=1x]
2229 { F(Vec)|F(Evex)|F(Avx512K_B32) , 439, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #254 [ref=1x]
2230 { F(Vec)|F(Evex)|F(Avx512K) , 440, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #255 [ref=1x]
2231 { F(Vec)|F(Evex)|F(Avx512K) , 441, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #256 [ref=1x]
2232 { F(Vec)|F(Vex) , 177, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #257 [ref=7x]
2233 { F(Vec)|F(Vex) , 97 , 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #258 [ref=1x]
2234 { F(Vec)|F(Vex) , 222, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #259 [ref=1x]
2235 { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 99 , 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #260 [ref=2x]
2236 { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 104, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #261 [ref=2x]
2237 { F(Vsib)|F(Evex)|F(Avx512K) , 442, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #262 [ref=4x]
2238 { F(Vsib)|F(Evex)|F(Avx512K) , 443, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #263 [ref=4x]
2239 { F(Vsib)|F(Evex)|F(Avx512K) , 444, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #264 [ref=8x]
2240 { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 109, 5 , CONTROL(None) , SINGLE_REG(None), 0 }, // #265 [ref=2x]
2241 { F(Vec)|F(Vsib)|F(Vex)|F(Evex)|F(Avx512K) , 134, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #266 [ref=2x]
2242 { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #267 [ref=3x]
2243 { F(Vec)|F(Evex)|F(Avx512KZ_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #268 [ref=3x]
2244 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B64) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #269 [ref=2x]
2245 { F(Vec)|F(Evex)|F(Avx512KZ_SAE_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #270 [ref=2x]
2246 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #271 [ref=3x]
2247 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #272 [ref=22x]
2248 { F(Vec)|F(Vex) , 319, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #273 [ref=2x]
2249 { F(Vec)|F(Evex)|F(Avx512KZ) , 319, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #274 [ref=4x]
2250 { F(Vec)|F(Evex)|F(Avx512KZ) , 445, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #275 [ref=4x]
2251 { F(Vec)|F(Vex)|F(Evex) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #276 [ref=1x]
2252 { F(Vec)|F(Vex) , 192, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #277 [ref=1x]
2253 { F(Vex) , 384, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #278 [ref=2x]
2254 { F(Vec)|F(Vex) , 390, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #279 [ref=1x]
2255 { F(Vec)|F(Vex) , 138, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #280 [ref=4x]
2256 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #281 [ref=2x]
2257 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #282 [ref=2x]
2258 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #283 [ref=2x]
2259 { 0 , 446, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #284 [ref=4x]
2260 { 0 , 321, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #285 [ref=3x]
2261 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #286 [ref=4x]
2262 { F(Vec)|F(Vex)|F(Evex) , 323, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #287 [ref=1x]
2263 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 186, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #288 [ref=1x]
2264 { F(Vec)|F(Vex) , 70 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #289 [ref=2x]
2265 { F(Vec)|F(Evex)|F(Avx512KZ) , 70 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #290 [ref=6x]
2266 { F(Vec)|F(Vex)|F(Evex) , 200, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #291 [ref=2x]
2267 { F(Vec)|F(Vex)|F(Evex) , 325, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #292 [ref=4x]
2268 { F(Vec)|F(Vex) , 447, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #293 [ref=3x]
2269 { F(Vec)|F(Vex)|F(Evex) , 189, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #294 [ref=3x]
2270 { F(Vec)|F(Vex)|F(Evex) , 192, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #295 [ref=1x]
2271 { F(Vec)|F(Vex)|F(Evex) , 195, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #296 [ref=1x]
2272 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 198, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #297 [ref=1x]
2273 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #298 [ref=5x]
2274 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 201, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #299 [ref=1x]
2275 { 0 , 327, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #300 [ref=1x]
2276 { 0 , 329, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #301 [ref=1x]
2277 { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #302 [ref=2x]
2278 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #303 [ref=2x]
2279 { F(Vec)|F(Vex) , 162, 2 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #304 [ref=2x]
2280 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #305 [ref=2x]
2281 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #306 [ref=2x]
2282 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #307 [ref=2x]
2283 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 448, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #308 [ref=1x]
2284 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 449, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #309 [ref=1x]
2285 { F(Vec)|F(Evex) , 450, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #310 [ref=6x]
2286 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 204, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #311 [ref=1x]
2287 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 451, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #312 [ref=1x]
2288 { F(Vec)|F(Vex)|F(Evex) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #313 [ref=1x]
2289 { F(Vec)|F(Evex)|F(Avx512K) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #314 [ref=2x]
2290 { F(Vec)|F(Evex)|F(Avx512K_B32) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #315 [ref=2x]
2291 { F(Vec)|F(Vex)|F(Evex)|F(Avx512K) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #316 [ref=4x]
2292 { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B32) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #317 [ref=2x]
2293 { F(Vec)|F(Vex)|F(Evex)|F(Avx512K_B64) , 210, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #318 [ref=2x]
2294 { F(Vec)|F(Vex) , 404, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #319 [ref=1x]
2295 { F(Vec)|F(Vex) , 405, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #320 [ref=1x]
2296 { F(Vec)|F(Vex) , 406, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #321 [ref=1x]
2297 { F(Vec)|F(Vex) , 407, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #322 [ref=1x]
2298 { F(Vec)|F(Evex)|F(Avx512K_B64) , 207, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #323 [ref=4x]
2299 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #324 [ref=6x]
2300 { F(Vec)|F(Vex) , 166, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #325 [ref=2x]
2301 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 163, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #326 [ref=2x]
2302 { F(Vec)|F(Vex) , 142, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #327 [ref=2x]
2303 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 76 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #328 [ref=2x]
2304 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 146, 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #329 [ref=2x]
2305 { F(Vec)|F(Vex)|F(Evex) , 408, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #330 [ref=1x]
2306 { F(Vec)|F(Vex)|F(Evex) , 409, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #331 [ref=1x]
2307 { F(Vec)|F(Vex)|F(Evex) , 452, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #332 [ref=1x]
2308 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 453, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #333 [ref=1x]
2309 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 454, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #334 [ref=1x]
2310 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 455, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #335 [ref=1x]
2311 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 456, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #336 [ref=1x]
2312 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #337 [ref=4x]
2313 { F(Vec)|F(Vex) , 311, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #338 [ref=12x]
2314 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #339 [ref=8x]
2315 { F(Vec)|F(Evex) , 457, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #340 [ref=4x]
2316 { F(Vec)|F(Evex)|F(Avx512KZ) , 213, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #341 [ref=6x]
2317 { F(Vec)|F(Evex)|F(Avx512KZ) , 216, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #342 [ref=9x]
2318 { F(Vec)|F(Evex)|F(Avx512KZ) , 219, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #343 [ref=3x]
2319 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 222, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #344 [ref=4x]
2320 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 225, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #345 [ref=2x]
2321 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 174, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #346 [ref=6x]
2322 { F(Vec)|F(Vex) , 130, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #347 [ref=1x]
2323 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #348 [ref=3x]
2324 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #349 [ref=3x]
2325 { F(Vec)|F(Vex) , 331, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #350 [ref=4x]
2326 { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 228, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #351 [ref=3x]
2327 { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 333, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #352 [ref=2x]
2328 { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 231, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #353 [ref=2x]
2329 { F(Vec)|F(Vex) , 335, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #354 [ref=8x]
2330 { F(Vec)|F(Evex)|F(Avx512K) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #355 [ref=5x]
2331 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #356 [ref=1x]
2332 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #357 [ref=2x]
2333 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #358 [ref=3x]
2334 { F(Vec)|F(Vex)|F(Evex) , 183, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #359 [ref=2x]
2335 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #360 [ref=2x]
2336 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 82 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #361 [ref=3x]
2337 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 88 , 6 , CONTROL(None) , SINGLE_REG(None), 0 }, // #362 [ref=1x]
2338 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #363 [ref=6x]
2339 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #364 [ref=2x]
2340 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(WO) , 0 }, // #365 [ref=2x]
2341 { F(Vec)|F(Evex)|F(Avx512K_B32) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #366 [ref=2x]
2342 { F(Vec)|F(Evex)|F(Avx512K_B64) , 234, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #367 [ref=2x]
2343 { F(Vec)|F(Evex)|F(Avx512KZ) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #368 [ref=2x]
2344 { F(Vec)|F(Evex)|F(Avx512KZ) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #369 [ref=2x]
2345 { F(Vec)|F(Vex) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #370 [ref=2x]
2346 { F(Vec)|F(Evex)|F(Avx512KZ) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #371 [ref=1x]
2347 { F(Vec)|F(Evex)|F(Avx512KZ) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #372 [ref=1x]
2348 { F(Vec)|F(Vex) , 183, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #373 [ref=2x]
2349 { F(Vec)|F(Vex) , 437, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #374 [ref=1x]
2350 { F(Vec)|F(Vex) , 438, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #375 [ref=1x]
2351 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #376 [ref=1x]
2352 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE_B32) , 162, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #377 [ref=1x]
2353 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 425, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #378 [ref=1x]
2354 { F(Vec)|F(Evex)|F(Avx512KZ_ER_SAE) , 426, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #379 [ref=1x]
2355 { F(Vec)|F(Vsib)|F(Evex)|F(Avx512K) , 337, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #380 [ref=1x]
2356 { F(Vec)|F(Evex)|F(Avx512KZ_B32) , 166, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #381 [ref=2x]
2357 { F(Vec)|F(Evex)|F(Avx512KZ_B64) , 166, 2 , CONTROL(None) , SINGLE_REG(None), 0 }, // #382 [ref=2x]
2358 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B32) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #383 [ref=1x]
2359 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_B64) , 165, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #384 [ref=1x]
2360 { F(Vec)|F(Vex)|F(Evex)|F(Avx512KZ_ER_SAE_B64) , 177, 3 , CONTROL(None) , SINGLE_REG(None), 0 }, // #385 [ref=1x]
2361 { F(Vec)|F(Vex) , 257, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #386 [ref=2x]
2362 { F(Lock)|F(XAcquire)|F(XRelease) , 49 , 4 , CONTROL(None) , SINGLE_REG(None), 0 }, // #387 [ref=1x]
2363 { 0 , 458, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #388 [ref=1x]
2364 { F(Lock)|F(XAcquire) , 49 , 8 , CONTROL(None) , SINGLE_REG(RO) , 0 }, // #389 [ref=1x]
2365 { 0 , 459, 1 , CONTROL(None) , SINGLE_REG(None), 0 }, // #390 [ref=6x]
2366 { 0 , 460, 1 , CONTROL(None) , SINGLE_REG(None), 0 } // #391 [ref=6x]
2367 };
2368 #undef SINGLE_REG
2369 #undef CONTROL
2370 #undef F
2371 // ----------------------------------------------------------------------------
2372 // ${InstCommonTable:End}
2373
2374 // ============================================================================
2375 // [asmjit::x86::InstDB - CommonInfoTableB]
2376 // ============================================================================
2377
2378 // ${InstCommonInfoTableB:Begin}
2379 // ------------------- Automatically generated, do not edit -------------------
2380 #define EXT(VAL) uint32_t(Features::k##VAL)
2381 const InstDB::CommonInfoTableB InstDB::_commonInfoTableB[] = {
2382 { { 0 }, 0, 0 }, // #0 [ref=144x]
2383 { { 0 }, 1, 0 }, // #1 [ref=32x]
2384 { { 0 }, 2, 0 }, // #2 [ref=2x]
2385 { { EXT(ADX) }, 3, 0 }, // #3 [ref=1x]
2386 { { EXT(SSE2) }, 0, 0 }, // #4 [ref=65x]
2387 { { EXT(SSE) }, 0, 0 }, // #5 [ref=44x]
2388 { { EXT(SSE3) }, 0, 0 }, // #6 [ref=12x]
2389 { { EXT(ADX) }, 4, 0 }, // #7 [ref=1x]
2390 { { EXT(AESNI) }, 0, 0 }, // #8 [ref=6x]
2391 { { EXT(BMI) }, 1, 0 }, // #9 [ref=6x]
2392 { { 0 }, 5, 0 }, // #10 [ref=5x]
2393 { { EXT(TBM) }, 0, 0 }, // #11 [ref=9x]
2394 { { EXT(SSE4_1) }, 0, 0 }, // #12 [ref=47x]
2395 { { EXT(MPX) }, 0, 0 }, // #13 [ref=7x]
2396 { { 0 }, 6, 0 }, // #14 [ref=4x]
2397 { { EXT(BMI2) }, 1, 0 }, // #15 [ref=1x]
2398 { { EXT(SMAP) }, 7, 0 }, // #16 [ref=2x]
2399 { { 0 }, 8, 0 }, // #17 [ref=2x]
2400 { { 0 }, 9, 0 }, // #18 [ref=2x]
2401 { { EXT(CLDEMOTE) }, 0, 0 }, // #19 [ref=1x]
2402 { { EXT(CLFLUSH) }, 0, 0 }, // #20 [ref=1x]
2403 { { EXT(CLFLUSHOPT) }, 0, 0 }, // #21 [ref=1x]
2404 { { EXT(SVM) }, 0, 0 }, // #22 [ref=6x]
2405 { { 0 }, 10, 0 }, // #23 [ref=2x]
2406 { { EXT(CLWB) }, 0, 0 }, // #24 [ref=1x]
2407 { { EXT(CLZERO) }, 0, 0 }, // #25 [ref=1x]
2408 { { 0 }, 3, 0 }, // #26 [ref=1x]
2409 { { EXT(CMOV) }, 11, 0 }, // #27 [ref=6x]
2410 { { EXT(CMOV) }, 12, 0 }, // #28 [ref=8x]
2411 { { EXT(CMOV) }, 13, 0 }, // #29 [ref=6x]
2412 { { EXT(CMOV) }, 14, 0 }, // #30 [ref=4x]
2413 { { EXT(CMOV) }, 15, 0 }, // #31 [ref=4x]
2414 { { EXT(CMOV) }, 16, 0 }, // #32 [ref=2x]
2415 { { EXT(CMOV) }, 17, 0 }, // #33 [ref=6x]
2416 { { EXT(CMOV) }, 18, 0 }, // #34 [ref=2x]
2417 { { 0 }, 19, 0 }, // #35 [ref=2x]
2418 { { EXT(I486) }, 1, 0 }, // #36 [ref=2x]
2419 { { EXT(CMPXCHG16B) }, 5, 0 }, // #37 [ref=1x]
2420 { { EXT(CMPXCHG8B) }, 5, 0 }, // #38 [ref=1x]
2421 { { EXT(SSE2) }, 1, 0 }, // #39 [ref=2x]
2422 { { EXT(SSE) }, 1, 0 }, // #40 [ref=2x]
2423 { { EXT(I486) }, 0, 0 }, // #41 [ref=4x]
2424 { { EXT(SSE4_2) }, 0, 0 }, // #42 [ref=2x]
2425 { { 0 }, 20, 0 }, // #43 [ref=2x]
2426 { { EXT(MMX) }, 0, 0 }, // #44 [ref=1x]
2427 { { EXT(ENQCMD) }, 0, 0 }, // #45 [ref=2x]
2428 { { EXT(SSE4A) }, 0, 0 }, // #46 [ref=4x]
2429 { { 0 }, 21, 0 }, // #47 [ref=4x]
2430 { { EXT(3DNOW) }, 0, 0 }, // #48 [ref=21x]
2431 { { EXT(FXSR) }, 0, 0 }, // #49 [ref=4x]
2432 { { EXT(SMX) }, 0, 0 }, // #50 [ref=1x]
2433 { { EXT(GFNI) }, 0, 0 }, // #51 [ref=3x]
2434 { { 0 }, 16, 0 }, // #52 [ref=5x]
2435 { { EXT(VMX) }, 0, 0 }, // #53 [ref=12x]
2436 { { 0 }, 11, 0 }, // #54 [ref=8x]
2437 { { 0 }, 12, 0 }, // #55 [ref=12x]
2438 { { 0 }, 13, 0 }, // #56 [ref=10x]
2439 { { 0 }, 14, 0 }, // #57 [ref=8x]
2440 { { 0 }, 15, 0 }, // #58 [ref=8x]
2441 { { 0 }, 17, 0 }, // #59 [ref=8x]
2442 { { 0 }, 18, 0 }, // #60 [ref=4x]
2443 { { EXT(AVX512_DQ) }, 0, 0 }, // #61 [ref=23x]
2444 { { EXT(AVX512_BW) }, 0, 0 }, // #62 [ref=22x]
2445 { { EXT(AVX512_F) }, 0, 0 }, // #63 [ref=37x]
2446 { { EXT(AVX512_DQ) }, 1, 0 }, // #64 [ref=3x]
2447 { { EXT(AVX512_BW) }, 1, 0 }, // #65 [ref=4x]
2448 { { EXT(AVX512_F) }, 1, 0 }, // #66 [ref=1x]
2449 { { EXT(LAHFSAHF) }, 22, 0 }, // #67 [ref=1x]
2450 { { EXT(LWP) }, 0, 0 }, // #68 [ref=4x]
2451 { { 0 }, 23, 0 }, // #69 [ref=3x]
2452 { { EXT(LZCNT) }, 1, 0 }, // #70 [ref=1x]
2453 { { EXT(MMX2) }, 0, 0 }, // #71 [ref=8x]
2454 { { EXT(MONITOR) }, 0, 0 }, // #72 [ref=2x]
2455 { { EXT(MONITORX) }, 0, 0 }, // #73 [ref=2x]
2456 { { EXT(MOVBE) }, 0, 0 }, // #74 [ref=1x]
2457 { { EXT(MMX), EXT(SSE2) }, 0, 0 }, // #75 [ref=46x]
2458 { { EXT(MOVDIR64B) }, 0, 0 }, // #76 [ref=1x]
2459 { { EXT(MOVDIRI) }, 0, 0 }, // #77 [ref=1x]
2460 { { EXT(BMI2) }, 0, 0 }, // #78 [ref=7x]
2461 { { EXT(SSSE3) }, 0, 0 }, // #79 [ref=15x]
2462 { { EXT(MMX2), EXT(SSE2) }, 0, 0 }, // #80 [ref=10x]
2463 { { EXT(PCLMULQDQ) }, 0, 0 }, // #81 [ref=1x]
2464 { { EXT(SSE4_2) }, 1, 0 }, // #82 [ref=4x]
2465 { { EXT(PCOMMIT) }, 0, 0 }, // #83 [ref=1x]
2466 { { EXT(MMX2), EXT(SSE2), EXT(SSE4_1) }, 0, 0 }, // #84 [ref=1x]
2467 { { EXT(3DNOW2) }, 0, 0 }, // #85 [ref=5x]
2468 { { EXT(GEODE) }, 0, 0 }, // #86 [ref=2x]
2469 { { EXT(POPCNT) }, 1, 0 }, // #87 [ref=1x]
2470 { { 0 }, 24, 0 }, // #88 [ref=3x]
2471 { { EXT(PREFETCHW) }, 1, 0 }, // #89 [ref=1x]
2472 { { EXT(PREFETCHWT1) }, 1, 0 }, // #90 [ref=1x]
2473 { { EXT(SSE4_1) }, 1, 0 }, // #91 [ref=1x]
2474 { { 0 }, 25, 0 }, // #92 [ref=3x]
2475 { { 0 }, 26, 0 }, // #93 [ref=2x]
2476 { { EXT(FSGSBASE) }, 0, 0 }, // #94 [ref=4x]
2477 { { EXT(MSR) }, 0, 0 }, // #95 [ref=2x]
2478 { { EXT(RDPID) }, 0, 0 }, // #96 [ref=1x]
2479 { { EXT(RDRAND) }, 1, 0 }, // #97 [ref=1x]
2480 { { EXT(RDSEED) }, 1, 0 }, // #98 [ref=1x]
2481 { { EXT(RDTSC) }, 0, 0 }, // #99 [ref=1x]
2482 { { EXT(RDTSCP) }, 0, 0 }, // #100 [ref=1x]
2483 { { 0 }, 27, 0 }, // #101 [ref=2x]
2484 { { EXT(LAHFSAHF) }, 28, 0 }, // #102 [ref=1x]
2485 { { EXT(SHA) }, 0, 0 }, // #103 [ref=7x]
2486 { { EXT(SKINIT) }, 0, 0 }, // #104 [ref=2x]
2487 { { EXT(AVX512_4FMAPS) }, 0, 0 }, // #105 [ref=4x]
2488 { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #106 [ref=46x]
2489 { { EXT(AVX), EXT(AVX512_F) }, 0, 0 }, // #107 [ref=32x]
2490 { { EXT(AVX) }, 0, 0 }, // #108 [ref=37x]
2491 { { EXT(AESNI), EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(VAES) }, 0, 0 }, // #109 [ref=4x]
2492 { { EXT(AESNI), EXT(AVX) }, 0, 0 }, // #110 [ref=2x]
2493 { { EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #111 [ref=112x]
2494 { { EXT(AVX), EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #112 [ref=8x]
2495 { { EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #113 [ref=26x]
2496 { { EXT(AVX512_DQ), EXT(AVX512_VL) }, 0, 0 }, // #114 [ref=30x]
2497 { { EXT(AVX2) }, 0, 0 }, // #115 [ref=7x]
2498 { { EXT(AVX), EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #116 [ref=39x]
2499 { { EXT(AVX), EXT(AVX512_F) }, 1, 0 }, // #117 [ref=4x]
2500 { { EXT(AVX512_BF16), EXT(AVX512_VL) }, 0, 0 }, // #118 [ref=3x]
2501 { { EXT(AVX512_F), EXT(AVX512_VL), EXT(F16C) }, 0, 0 }, // #119 [ref=2x]
2502 { { EXT(AVX512_ERI) }, 0, 0 }, // #120 [ref=10x]
2503 { { EXT(AVX512_F), EXT(AVX512_VL), EXT(FMA) }, 0, 0 }, // #121 [ref=36x]
2504 { { EXT(AVX512_F), EXT(FMA) }, 0, 0 }, // #122 [ref=24x]
2505 { { EXT(FMA4) }, 0, 0 }, // #123 [ref=20x]
2506 { { EXT(XOP) }, 0, 0 }, // #124 [ref=55x]
2507 { { EXT(AVX2), EXT(AVX512_F), EXT(AVX512_VL) }, 0, 0 }, // #125 [ref=19x]
2508 { { EXT(AVX512_PFI) }, 0, 0 }, // #126 [ref=16x]
2509 { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(GFNI) }, 0, 0 }, // #127 [ref=3x]
2510 { { EXT(AVX), EXT(AVX2) }, 0, 0 }, // #128 [ref=17x]
2511 { { EXT(AVX512_4VNNIW) }, 0, 0 }, // #129 [ref=2x]
2512 { { EXT(AVX), EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #130 [ref=54x]
2513 { { EXT(AVX2), EXT(AVX512_BW), EXT(AVX512_VL) }, 0, 0 }, // #131 [ref=2x]
2514 { { EXT(AVX512_CDI), EXT(AVX512_VL) }, 0, 0 }, // #132 [ref=6x]
2515 { { EXT(AVX), EXT(AVX512_F), EXT(AVX512_VL), EXT(PCLMULQDQ), EXT(VPCLMULQDQ) }, 0, 0 }, // #133 [ref=1x]
2516 { { EXT(AVX) }, 1, 0 }, // #134 [ref=7x]
2517 { { EXT(AVX512_VBMI2), EXT(AVX512_VL) }, 0, 0 }, // #135 [ref=16x]
2518 { { EXT(AVX512_VL), EXT(AVX512_VNNI) }, 0, 0 }, // #136 [ref=4x]
2519 { { EXT(AVX512_VBMI), EXT(AVX512_VL) }, 0, 0 }, // #137 [ref=4x]
2520 { { EXT(AVX), EXT(AVX512_BW) }, 0, 0 }, // #138 [ref=4x]
2521 { { EXT(AVX), EXT(AVX512_DQ) }, 0, 0 }, // #139 [ref=4x]
2522 { { EXT(AVX512_IFMA), EXT(AVX512_VL) }, 0, 0 }, // #140 [ref=2x]
2523 { { EXT(AVX512_BITALG), EXT(AVX512_VL) }, 0, 0 }, // #141 [ref=3x]
2524 { { EXT(AVX512_VL), EXT(AVX512_VPOPCNTDQ) }, 0, 0 }, // #142 [ref=2x]
2525 { { EXT(WBNOINVD) }, 0, 0 }, // #143 [ref=1x]
2526 { { EXT(RTM) }, 0, 0 }, // #144 [ref=3x]
2527 { { EXT(XSAVE) }, 0, 0 }, // #145 [ref=6x]
2528 { { EXT(XSAVES) }, 0, 0 }, // #146 [ref=4x]
2529 { { EXT(XSAVEC) }, 0, 0 }, // #147 [ref=2x]
2530 { { EXT(XSAVEOPT) }, 0, 0 }, // #148 [ref=2x]
2531 { { EXT(TSX) }, 1, 0 } // #149 [ref=1x]
2532 };
2533 #undef EXT
2534
2535 #define FLAG(VAL) uint32_t(Status::k##VAL)
2536 const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {
2537 { 0, 0 }, // #0 [ref=1281x]
2538 { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #1 [ref=76x]
2539 { FLAG(CF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #2 [ref=2x]
2540 { FLAG(CF), FLAG(CF) }, // #3 [ref=2x]
2541 { FLAG(OF), FLAG(OF) }, // #4 [ref=1x]
2542 { 0, FLAG(ZF) }, // #5 [ref=7x]
2543 { 0, FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) }, // #6 [ref=4x]
2544 { 0, FLAG(AC) }, // #7 [ref=2x]
2545 { 0, FLAG(CF) }, // #8 [ref=2x]
2546 { 0, FLAG(DF) }, // #9 [ref=2x]
2547 { 0, FLAG(IF) }, // #10 [ref=2x]
2548 { FLAG(CF) | FLAG(ZF), 0 }, // #11 [ref=14x]
2549 { FLAG(CF), 0 }, // #12 [ref=20x]
2550 { FLAG(ZF), 0 }, // #13 [ref=16x]
2551 { FLAG(OF) | FLAG(SF) | FLAG(ZF), 0 }, // #14 [ref=12x]
2552 { FLAG(OF) | FLAG(SF), 0 }, // #15 [ref=12x]
2553 { FLAG(OF), 0 }, // #16 [ref=7x]
2554 { FLAG(PF), 0 }, // #17 [ref=14x]
2555 { FLAG(SF), 0 }, // #18 [ref=6x]
2556 { FLAG(DF), FLAG(AF) | FLAG(CF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #19 [ref=2x]
2557 { 0, FLAG(AF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #20 [ref=2x]
2558 { 0, FLAG(CF) | FLAG(PF) | FLAG(ZF) }, // #21 [ref=4x]
2559 { FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #22 [ref=1x]
2560 { FLAG(DF), 0 }, // #23 [ref=3x]
2561 { 0, FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) }, // #24 [ref=3x]
2562 { FLAG(AF) | FLAG(CF) | FLAG(DF) | FLAG(IF) | FLAG(OF) | FLAG(PF) | FLAG(SF) | FLAG(ZF), 0 }, // #25 [ref=3x]
2563 { FLAG(CF) | FLAG(OF), FLAG(CF) | FLAG(OF) }, // #26 [ref=2x]
2564 { 0, FLAG(CF) | FLAG(OF) }, // #27 [ref=2x]
2565 { 0, FLAG(AF) | FLAG(CF) | FLAG(PF) | FLAG(SF) | FLAG(ZF) } // #28 [ref=1x]
2566 };
2567 #undef FLAG
2568 // ----------------------------------------------------------------------------
2569 // ${InstCommonInfoTableB:End}
2570
2571 // ============================================================================
2572 // [asmjit::Inst - NameData]
2573 // ============================================================================
2574
2575 #ifndef ASMJIT_NO_TEXT
2576 // ${NameData:Begin}
2577 // ------------------- Automatically generated, do not edit -------------------
2578 const char InstDB::_nameData[] =
2579 "\0" "aaa\0" "aad\0" "aam\0" "aas\0" "adc\0" "adcx\0" "adox\0" "arpl\0" "bextr\0" "blcfill\0" "blci\0" "blcic\0"
2580 "blcmsk\0" "blcs\0" "blsfill\0" "blsi\0" "blsic\0" "blsmsk\0" "blsr\0" "bndcl\0" "bndcn\0" "bndcu\0" "bndldx\0"
2581 "bndmk\0" "bndmov\0" "bndstx\0" "bound\0" "bsf\0" "bsr\0" "bswap\0" "bt\0" "btc\0" "btr\0" "bts\0" "bzhi\0" "cbw\0"
2582 "cdq\0" "cdqe\0" "clac\0" "clc\0" "cld\0" "cldemote\0" "clflush\0" "clflushopt\0" "clgi\0" "cli\0" "clts\0" "clwb\0"
2583 "clzero\0" "cmc\0" "cmova\0" "cmovae\0" "cmovc\0" "cmovg\0" "cmovge\0" "cmovl\0" "cmovle\0" "cmovna\0" "cmovnae\0"
2584 "cmovnc\0" "cmovng\0" "cmovnge\0" "cmovnl\0" "cmovnle\0" "cmovno\0" "cmovnp\0" "cmovns\0" "cmovnz\0" "cmovo\0"
2585 "cmovp\0" "cmovpe\0" "cmovpo\0" "cmovs\0" "cmovz\0" "cmp\0" "cmps\0" "cmpxchg\0" "cmpxchg16b\0" "cmpxchg8b\0"
2586 "cpuid\0" "cqo\0" "crc32\0" "cvtpd2pi\0" "cvtpi2pd\0" "cvtpi2ps\0" "cvtps2pi\0" "cvttpd2pi\0" "cvttps2pi\0" "cwd\0"
2587 "cwde\0" "daa\0" "das\0" "enqcmd\0" "enqcmds\0" "f2xm1\0" "fabs\0" "faddp\0" "fbld\0" "fbstp\0" "fchs\0" "fclex\0"
2588 "fcmovb\0" "fcmovbe\0" "fcmove\0" "fcmovnb\0" "fcmovnbe\0" "fcmovne\0" "fcmovnu\0" "fcmovu\0" "fcom\0" "fcomi\0"
2589 "fcomip\0" "fcomp\0" "fcompp\0" "fcos\0" "fdecstp\0" "fdiv\0" "fdivp\0" "fdivr\0" "fdivrp\0" "femms\0" "ffree\0"
2590 "fiadd\0" "ficom\0" "ficomp\0" "fidiv\0" "fidivr\0" "fild\0" "fimul\0" "fincstp\0" "finit\0" "fist\0" "fistp\0"
2591 "fisttp\0" "fisub\0" "fisubr\0" "fld\0" "fld1\0" "fldcw\0" "fldenv\0" "fldl2e\0" "fldl2t\0" "fldlg2\0" "fldln2\0"
2592 "fldpi\0" "fldz\0" "fmulp\0" "fnclex\0" "fninit\0" "fnop\0" "fnsave\0" "fnstcw\0" "fnstenv\0" "fnstsw\0" "fpatan\0"
2593 "fprem\0" "fprem1\0" "fptan\0" "frndint\0" "frstor\0" "fsave\0" "fscale\0" "fsin\0" "fsincos\0" "fsqrt\0" "fst\0"
2594 "fstcw\0" "fstenv\0" "fstp\0" "fstsw\0" "fsubp\0" "fsubrp\0" "ftst\0" "fucom\0" "fucomi\0" "fucomip\0" "fucomp\0"
2595 "fucompp\0" "fwait\0" "fxam\0" "fxch\0" "fxrstor\0" "fxrstor64\0" "fxsave\0" "fxsave64\0" "fxtract\0" "fyl2x\0"
2596 "fyl2xp1\0" "getsec\0" "hlt\0" "inc\0" "insertq\0" "int3\0" "into\0" "invept\0" "invlpg\0" "invlpga\0" "invpcid\0"
2597 "invvpid\0" "iret\0" "iretd\0" "iretq\0" "iretw\0" "ja\0" "jae\0" "jb\0" "jbe\0" "jc\0" "je\0" "jecxz\0" "jg\0"
2598 "jge\0" "jl\0" "jle\0" "jmp\0" "jna\0" "jnae\0" "jnb\0" "jnbe\0" "jnc\0" "jne\0" "jng\0" "jnge\0" "jnl\0" "jnle\0"
2599 "jno\0" "jnp\0" "jns\0" "jnz\0" "jo\0" "jp\0" "jpe\0" "jpo\0" "js\0" "jz\0" "kaddb\0" "kaddd\0" "kaddq\0" "kaddw\0"
2600 "kandb\0" "kandd\0" "kandnb\0" "kandnd\0" "kandnq\0" "kandnw\0" "kandq\0" "kandw\0" "kmovb\0" "kmovw\0" "knotb\0"
2601 "knotd\0" "knotq\0" "knotw\0" "korb\0" "kord\0" "korq\0" "kortestb\0" "kortestd\0" "kortestq\0" "kortestw\0" "korw\0"
2602 "kshiftlb\0" "kshiftld\0" "kshiftlq\0" "kshiftlw\0" "kshiftrb\0" "kshiftrd\0" "kshiftrq\0" "kshiftrw\0" "ktestb\0"
2603 "ktestd\0" "ktestq\0" "ktestw\0" "kunpckbw\0" "kunpckdq\0" "kunpckwd\0" "kxnorb\0" "kxnord\0" "kxnorq\0" "kxnorw\0"
2604 "kxorb\0" "kxord\0" "kxorq\0" "kxorw\0" "lahf\0" "lar\0" "lds\0" "lea\0" "leave\0" "les\0" "lfence\0" "lfs\0"
2605 "lgdt\0" "lgs\0" "lidt\0" "lldt\0" "llwpcb\0" "lmsw\0" "lods\0" "loop\0" "loope\0" "loopne\0" "lsl\0" "ltr\0"
2606 "lwpins\0" "lwpval\0" "lzcnt\0" "mfence\0" "monitor\0" "monitorx\0" "movdir64b\0" "movdiri\0" "movdq2q\0" "movnti\0"
2607 "movntq\0" "movntsd\0" "movntss\0" "movq2dq\0" "movsx\0" "movsxd\0" "movzx\0" "mulx\0" "mwait\0" "mwaitx\0" "neg\0"
2608 "not\0" "out\0" "outs\0" "pause\0" "pavgusb\0" "pcommit\0" "pdep\0" "pext\0" "pf2id\0" "pf2iw\0" "pfacc\0" "pfadd\0"
2609 "pfcmpeq\0" "pfcmpge\0" "pfcmpgt\0" "pfmax\0" "pfmin\0" "pfmul\0" "pfnacc\0" "pfpnacc\0" "pfrcp\0" "pfrcpit1\0"
2610 "pfrcpit2\0" "pfrcpv\0" "pfrsqit1\0" "pfrsqrt\0" "pfrsqrtv\0" "pfsub\0" "pfsubr\0" "pi2fd\0" "pi2fw\0" "pmulhrw\0"
2611 "pop\0" "popa\0" "popad\0" "popcnt\0" "popf\0" "popfd\0" "popfq\0" "prefetch\0" "prefetchnta\0" "prefetcht0\0"
2612 "prefetcht1\0" "prefetcht2\0" "prefetchw\0" "prefetchwt1\0" "pshufw\0" "pswapd\0" "push\0" "pusha\0" "pushad\0"
2613 "pushf\0" "pushfd\0" "pushfq\0" "rcl\0" "rcr\0" "rdfsbase\0" "rdgsbase\0" "rdmsr\0" "rdpid\0" "rdpmc\0" "rdrand\0"
2614 "rdseed\0" "rdtsc\0" "rdtscp\0" "rol\0" "ror\0" "rorx\0" "rsm\0" "sahf\0" "sal\0" "sar\0" "sarx\0" "sbb\0" "scas\0"
2615 "seta\0" "setae\0" "setb\0" "setbe\0" "setc\0" "sete\0" "setg\0" "setge\0" "setl\0" "setle\0" "setna\0" "setnae\0"
2616 "setnb\0" "setnbe\0" "setnc\0" "setne\0" "setng\0" "setnge\0" "setnl\0" "setnle\0" "setno\0" "setnp\0" "setns\0"
2617 "setnz\0" "seto\0" "setp\0" "setpe\0" "setpo\0" "sets\0" "setz\0" "sfence\0" "sgdt\0" "sha1msg1\0" "sha1msg2\0"
2618 "sha1nexte\0" "sha1rnds4\0" "sha256msg1\0" "sha256msg2\0" "sha256rnds2\0" "shl\0" "shlx\0" "shr\0" "shrd\0" "shrx\0"
2619 "sidt\0" "skinit\0" "sldt\0" "slwpcb\0" "smsw\0" "stac\0" "stc\0" "stgi\0" "sti\0" "stos\0" "str\0" "swapgs\0"
2620 "syscall\0" "sysenter\0" "sysexit\0" "sysexit64\0" "sysret\0" "sysret64\0" "t1mskc\0" "tzcnt\0" "tzmsk\0" "ud2\0"
2621 "v4fmaddps\0" "v4fmaddss\0" "v4fnmaddps\0" "v4fnmaddss\0" "vaddpd\0" "vaddps\0" "vaddsd\0" "vaddss\0" "vaddsubpd\0"
2622 "vaddsubps\0" "vaesdec\0" "vaesdeclast\0" "vaesenc\0" "vaesenclast\0" "vaesimc\0" "vaeskeygenassist\0" "valignd\0"
2623 "valignq\0" "vandnpd\0" "vandnps\0" "vandpd\0" "vandps\0" "vblendmb\0" "vblendmd\0" "vblendmpd\0" "vblendmps\0"
2624 "vblendmq\0" "vblendmw\0" "vblendpd\0" "vblendps\0" "vblendvpd\0" "vblendvps\0" "vbroadcastf128\0"
2625 "vbroadcastf32x2\0" "vbroadcastf32x4\0" "vbroadcastf32x8\0" "vbroadcastf64x2\0" "vbroadcastf64x4\0"
2626 "vbroadcasti128\0" "vbroadcasti32x2\0" "vbroadcasti32x4\0" "vbroadcasti32x8\0" "vbroadcasti64x2\0"
2627 "vbroadcasti64x4\0" "vbroadcastsd\0" "vbroadcastss\0" "vcmppd\0" "vcmpps\0" "vcmpsd\0" "vcmpss\0" "vcomisd\0"
2628 "vcomiss\0" "vcompresspd\0" "vcompressps\0" "vcvtdq2pd\0" "vcvtdq2ps\0" "vcvtne2ps2bf16\0" "vcvtneps2bf16\0"
2629 "vcvtpd2dq\0" "vcvtpd2ps\0" "vcvtpd2qq\0" "vcvtpd2udq\0" "vcvtpd2uqq\0" "vcvtph2ps\0" "vcvtps2dq\0" "vcvtps2pd\0"
2630 "vcvtps2ph\0" "vcvtps2qq\0" "vcvtps2udq\0" "vcvtps2uqq\0" "vcvtqq2pd\0" "vcvtqq2ps\0" "vcvtsd2si\0" "vcvtsd2ss\0"
2631 "vcvtsd2usi\0" "vcvtsi2sd\0" "vcvtsi2ss\0" "vcvtss2sd\0" "vcvtss2si\0" "vcvtss2usi\0" "vcvttpd2dq\0" "vcvttpd2qq\0"
2632 "vcvttpd2udq\0" "vcvttpd2uqq\0" "vcvttps2dq\0" "vcvttps2qq\0" "vcvttps2udq\0" "vcvttps2uqq\0" "vcvttsd2si\0"
2633 "vcvttsd2usi\0" "vcvttss2si\0" "vcvttss2usi\0" "vcvtudq2pd\0" "vcvtudq2ps\0" "vcvtuqq2pd\0" "vcvtuqq2ps\0"
2634 "vcvtusi2sd\0" "vcvtusi2ss\0" "vdbpsadbw\0" "vdivpd\0" "vdivps\0" "vdivsd\0" "vdivss\0" "vdpbf16ps\0" "vdppd\0"
2635 "vdpps\0" "verr\0" "verw\0" "vexp2pd\0" "vexp2ps\0" "vexpandpd\0" "vexpandps\0" "vextractf128\0" "vextractf32x4\0"
2636 "vextractf32x8\0" "vextractf64x2\0" "vextractf64x4\0" "vextracti128\0" "vextracti32x4\0" "vextracti32x8\0"
2637 "vextracti64x2\0" "vextracti64x4\0" "vextractps\0" "vfixupimmpd\0" "vfixupimmps\0" "vfixupimmsd\0" "vfixupimmss\0"
2638 "vfmadd132pd\0" "vfmadd132ps\0" "vfmadd132sd\0" "vfmadd132ss\0" "vfmadd213pd\0" "vfmadd213ps\0" "vfmadd213sd\0"
2639 "vfmadd213ss\0" "vfmadd231pd\0" "vfmadd231ps\0" "vfmadd231sd\0" "vfmadd231ss\0" "vfmaddpd\0" "vfmaddps\0"
2640 "vfmaddsd\0" "vfmaddss\0" "vfmaddsub132pd\0" "vfmaddsub132ps\0" "vfmaddsub213pd\0" "vfmaddsub213ps\0"
2641 "vfmaddsub231pd\0" "vfmaddsub231ps\0" "vfmaddsubpd\0" "vfmaddsubps\0" "vfmsub132pd\0" "vfmsub132ps\0" "vfmsub132sd\0"
2642 "vfmsub132ss\0" "vfmsub213pd\0" "vfmsub213ps\0" "vfmsub213sd\0" "vfmsub213ss\0" "vfmsub231pd\0" "vfmsub231ps\0"
2643 "vfmsub231sd\0" "vfmsub231ss\0" "vfmsubadd132pd\0" "vfmsubadd132ps\0" "vfmsubadd213pd\0" "vfmsubadd213ps\0"
2644 "vfmsubadd231pd\0" "vfmsubadd231ps\0" "vfmsubaddpd\0" "vfmsubaddps\0" "vfmsubpd\0" "vfmsubps\0" "vfmsubsd\0"
2645 "vfmsubss\0" "vfnmadd132pd\0" "vfnmadd132ps\0" "vfnmadd132sd\0" "vfnmadd132ss\0" "vfnmadd213pd\0" "vfnmadd213ps\0"
2646 "vfnmadd213sd\0" "vfnmadd213ss\0" "vfnmadd231pd\0" "vfnmadd231ps\0" "vfnmadd231sd\0" "vfnmadd231ss\0" "vfnmaddpd\0"
2647 "vfnmaddps\0" "vfnmaddsd\0" "vfnmaddss\0" "vfnmsub132pd\0" "vfnmsub132ps\0" "vfnmsub132sd\0" "vfnmsub132ss\0"
2648 "vfnmsub213pd\0" "vfnmsub213ps\0" "vfnmsub213sd\0" "vfnmsub213ss\0" "vfnmsub231pd\0" "vfnmsub231ps\0"
2649 "vfnmsub231sd\0" "vfnmsub231ss\0" "vfnmsubpd\0" "vfnmsubps\0" "vfnmsubsd\0" "vfnmsubss\0" "vfpclasspd\0"
2650 "vfpclassps\0" "vfpclasssd\0" "vfpclassss\0" "vfrczpd\0" "vfrczps\0" "vfrczsd\0" "vfrczss\0" "vgatherdpd\0"
2651 "vgatherdps\0" "vgatherpf0dpd\0" "vgatherpf0dps\0" "vgatherpf0qpd\0" "vgatherpf0qps\0" "vgatherpf1dpd\0"
2652 "vgatherpf1dps\0" "vgatherpf1qpd\0" "vgatherpf1qps\0" "vgatherqpd\0" "vgatherqps\0" "vgetexppd\0" "vgetexpps\0"
2653 "vgetexpsd\0" "vgetexpss\0" "vgetmantpd\0" "vgetmantps\0" "vgetmantsd\0" "vgetmantss\0" "vgf2p8affineinvqb\0"
2654 "vgf2p8affineqb\0" "vgf2p8mulb\0" "vhaddpd\0" "vhaddps\0" "vhsubpd\0" "vhsubps\0" "vinsertf128\0" "vinsertf32x4\0"
2655 "vinsertf32x8\0" "vinsertf64x2\0" "vinsertf64x4\0" "vinserti128\0" "vinserti32x4\0" "vinserti32x8\0" "vinserti64x2\0"
2656 "vinserti64x4\0" "vinsertps\0" "vlddqu\0" "vldmxcsr\0" "vmaskmovdqu\0" "vmaskmovpd\0" "vmaskmovps\0" "vmaxpd\0"
2657 "vmaxps\0" "vmaxsd\0" "vmaxss\0" "vmcall\0" "vmclear\0" "vmfunc\0" "vminpd\0" "vminps\0" "vminsd\0" "vminss\0"
2658 "vmlaunch\0" "vmload\0" "vmmcall\0" "vmovapd\0" "vmovaps\0" "vmovd\0" "vmovddup\0" "vmovdqa\0" "vmovdqa32\0"
2659 "vmovdqa64\0" "vmovdqu\0" "vmovdqu16\0" "vmovdqu32\0" "vmovdqu64\0" "vmovdqu8\0" "vmovhlps\0" "vmovhpd\0" "vmovhps\0"
2660 "vmovlhps\0" "vmovlpd\0" "vmovlps\0" "vmovmskpd\0" "vmovmskps\0" "vmovntdq\0" "vmovntdqa\0" "vmovntpd\0" "vmovntps\0"
2661 "vmovq\0" "vmovsd\0" "vmovshdup\0" "vmovsldup\0" "vmovss\0" "vmovupd\0" "vmovups\0" "vmpsadbw\0" "vmptrld\0"
2662 "vmptrst\0" "vmread\0" "vmresume\0" "vmrun\0" "vmsave\0" "vmulpd\0" "vmulps\0" "vmulsd\0" "vmulss\0" "vmwrite\0"
2663 "vmxon\0" "vorpd\0" "vorps\0" "vp4dpwssd\0" "vp4dpwssds\0" "vpabsb\0" "vpabsd\0" "vpabsq\0" "vpabsw\0" "vpackssdw\0"
2664 "vpacksswb\0" "vpackusdw\0" "vpackuswb\0" "vpaddb\0" "vpaddd\0" "vpaddq\0" "vpaddsb\0" "vpaddsw\0" "vpaddusb\0"
2665 "vpaddusw\0" "vpaddw\0" "vpalignr\0" "vpand\0" "vpandd\0" "vpandn\0" "vpandnd\0" "vpandnq\0" "vpandq\0" "vpavgb\0"
2666 "vpavgw\0" "vpblendd\0" "vpblendvb\0" "vpblendw\0" "vpbroadcastb\0" "vpbroadcastd\0" "vpbroadcastmb2d\0"
2667 "vpbroadcastmb2q\0" "vpbroadcastq\0" "vpbroadcastw\0" "vpclmulqdq\0" "vpcmov\0" "vpcmpb\0" "vpcmpd\0" "vpcmpeqb\0"
2668 "vpcmpeqd\0" "vpcmpeqq\0" "vpcmpeqw\0" "vpcmpestri\0" "vpcmpestrm\0" "vpcmpgtb\0" "vpcmpgtd\0" "vpcmpgtq\0"
2669 "vpcmpgtw\0" "vpcmpistri\0" "vpcmpistrm\0" "vpcmpq\0" "vpcmpub\0" "vpcmpud\0" "vpcmpuq\0" "vpcmpuw\0" "vpcmpw\0"
2670 "vpcomb\0" "vpcomd\0" "vpcompressb\0" "vpcompressd\0" "vpcompressq\0" "vpcompressw\0" "vpcomq\0" "vpcomub\0"
2671 "vpcomud\0" "vpcomuq\0" "vpcomuw\0" "vpcomw\0" "vpconflictd\0" "vpconflictq\0" "vpdpbusd\0" "vpdpbusds\0"
2672 "vpdpwssd\0" "vpdpwssds\0" "vperm2f128\0" "vperm2i128\0" "vpermb\0" "vpermd\0" "vpermi2b\0" "vpermi2d\0"
2673 "vpermi2pd\0" "vpermi2ps\0" "vpermi2q\0" "vpermi2w\0" "vpermil2pd\0" "vpermil2ps\0" "vpermilpd\0" "vpermilps\0"
2674 "vpermpd\0" "vpermps\0" "vpermq\0" "vpermt2b\0" "vpermt2d\0" "vpermt2pd\0" "vpermt2ps\0" "vpermt2q\0" "vpermt2w\0"
2675 "vpermw\0" "vpexpandb\0" "vpexpandd\0" "vpexpandq\0" "vpexpandw\0" "vpextrb\0" "vpextrd\0" "vpextrq\0" "vpextrw\0"
2676 "vpgatherdd\0" "vpgatherdq\0" "vpgatherqd\0" "vpgatherqq\0" "vphaddbd\0" "vphaddbq\0" "vphaddbw\0" "vphaddd\0"
2677 "vphadddq\0" "vphaddsw\0" "vphaddubd\0" "vphaddubq\0" "vphaddubw\0" "vphaddudq\0" "vphadduwd\0" "vphadduwq\0"
2678 "vphaddw\0" "vphaddwd\0" "vphaddwq\0" "vphminposuw\0" "vphsubbw\0" "vphsubd\0" "vphsubdq\0" "vphsubsw\0" "vphsubw\0"
2679 "vphsubwd\0" "vpinsrb\0" "vpinsrd\0" "vpinsrq\0" "vpinsrw\0" "vplzcntd\0" "vplzcntq\0" "vpmacsdd\0" "vpmacsdqh\0"
2680 "vpmacsdql\0" "vpmacssdd\0" "vpmacssdqh\0" "vpmacssdql\0" "vpmacsswd\0" "vpmacssww\0" "vpmacswd\0" "vpmacsww\0"
2681 "vpmadcsswd\0" "vpmadcswd\0" "vpmadd52huq\0" "vpmadd52luq\0" "vpmaddubsw\0" "vpmaddwd\0" "vpmaskmovd\0"
2682 "vpmaskmovq\0" "vpmaxsb\0" "vpmaxsd\0" "vpmaxsq\0" "vpmaxsw\0" "vpmaxub\0" "vpmaxud\0" "vpmaxuq\0" "vpmaxuw\0"
2683 "vpminsb\0" "vpminsd\0" "vpminsq\0" "vpminsw\0" "vpminub\0" "vpminud\0" "vpminuq\0" "vpminuw\0" "vpmovb2m\0"
2684 "vpmovd2m\0" "vpmovdb\0" "vpmovdw\0" "vpmovm2b\0" "vpmovm2d\0" "vpmovm2q\0" "vpmovm2w\0" "vpmovmskb\0" "vpmovq2m\0"
2685 "vpmovqb\0" "vpmovqd\0" "vpmovqw\0" "vpmovsdb\0" "vpmovsdw\0" "vpmovsqb\0" "vpmovsqd\0" "vpmovsqw\0" "vpmovswb\0"
2686 "vpmovsxbd\0" "vpmovsxbq\0" "vpmovsxbw\0" "vpmovsxdq\0" "vpmovsxwd\0" "vpmovsxwq\0" "vpmovusdb\0" "vpmovusdw\0"
2687 "vpmovusqb\0" "vpmovusqd\0" "vpmovusqw\0" "vpmovuswb\0" "vpmovw2m\0" "vpmovwb\0" "vpmovzxbd\0" "vpmovzxbq\0"
2688 "vpmovzxbw\0" "vpmovzxdq\0" "vpmovzxwd\0" "vpmovzxwq\0" "vpmuldq\0" "vpmulhrsw\0" "vpmulhuw\0" "vpmulhw\0"
2689 "vpmulld\0" "vpmullq\0" "vpmullw\0" "vpmultishiftqb\0" "vpmuludq\0" "vpopcntb\0" "vpopcntd\0" "vpopcntq\0"
2690 "vpopcntw\0" "vpor\0" "vpord\0" "vporq\0" "vpperm\0" "vprold\0" "vprolq\0" "vprolvd\0" "vprolvq\0" "vprord\0"
2691 "vprorq\0" "vprorvd\0" "vprorvq\0" "vprotb\0" "vprotd\0" "vprotq\0" "vprotw\0" "vpsadbw\0" "vpscatterdd\0"
2692 "vpscatterdq\0" "vpscatterqd\0" "vpscatterqq\0" "vpshab\0" "vpshad\0" "vpshaq\0" "vpshaw\0" "vpshlb\0" "vpshld\0"
2693 "vpshldd\0" "vpshldq\0" "vpshldvd\0" "vpshldvq\0" "vpshldvw\0" "vpshldw\0" "vpshlq\0" "vpshlw\0" "vpshrdd\0"
2694 "vpshrdq\0" "vpshrdvd\0" "vpshrdvq\0" "vpshrdvw\0" "vpshrdw\0" "vpshufb\0" "vpshufbitqmb\0" "vpshufd\0" "vpshufhw\0"
2695 "vpshuflw\0" "vpsignb\0" "vpsignd\0" "vpsignw\0" "vpslld\0" "vpslldq\0" "vpsllq\0" "vpsllvd\0" "vpsllvq\0"
2696 "vpsllvw\0" "vpsllw\0" "vpsrad\0" "vpsraq\0" "vpsravd\0" "vpsravq\0" "vpsravw\0" "vpsraw\0" "vpsrld\0" "vpsrldq\0"
2697 "vpsrlq\0" "vpsrlvd\0" "vpsrlvq\0" "vpsrlvw\0" "vpsrlw\0" "vpsubb\0" "vpsubd\0" "vpsubq\0" "vpsubsb\0" "vpsubsw\0"
2698 "vpsubusb\0" "vpsubusw\0" "vpsubw\0" "vpternlogd\0" "vpternlogq\0" "vptest\0" "vptestmb\0" "vptestmd\0" "vptestmq\0"
2699 "vptestmw\0" "vptestnmb\0" "vptestnmd\0" "vptestnmq\0" "vptestnmw\0" "vpunpckhbw\0" "vpunpckhdq\0" "vpunpckhqdq\0"
2700 "vpunpckhwd\0" "vpunpcklbw\0" "vpunpckldq\0" "vpunpcklqdq\0" "vpunpcklwd\0" "vpxor\0" "vpxord\0" "vpxorq\0"
2701 "vrangepd\0" "vrangeps\0" "vrangesd\0" "vrangess\0" "vrcp14pd\0" "vrcp14ps\0" "vrcp14sd\0" "vrcp14ss\0" "vrcp28pd\0"
2702 "vrcp28ps\0" "vrcp28sd\0" "vrcp28ss\0" "vrcpps\0" "vrcpss\0" "vreducepd\0" "vreduceps\0" "vreducesd\0" "vreducess\0"
2703 "vrndscalepd\0" "vrndscaleps\0" "vrndscalesd\0" "vrndscaless\0" "vroundpd\0" "vroundps\0" "vroundsd\0" "vroundss\0"
2704 "vrsqrt14pd\0" "vrsqrt14ps\0" "vrsqrt14sd\0" "vrsqrt14ss\0" "vrsqrt28pd\0" "vrsqrt28ps\0" "vrsqrt28sd\0"
2705 "vrsqrt28ss\0" "vrsqrtps\0" "vrsqrtss\0" "vscalefpd\0" "vscalefps\0" "vscalefsd\0" "vscalefss\0" "vscatterdpd\0"
2706 "vscatterdps\0" "vscatterpf0dpd\0" "vscatterpf0dps\0" "vscatterpf0qpd\0" "vscatterpf0qps\0" "vscatterpf1dpd\0"
2707 "vscatterpf1dps\0" "vscatterpf1qpd\0" "vscatterpf1qps\0" "vscatterqpd\0" "vscatterqps\0" "vshuff32x4\0"
2708 "vshuff64x2\0" "vshufi32x4\0" "vshufi64x2\0" "vshufpd\0" "vshufps\0" "vsqrtpd\0" "vsqrtps\0" "vsqrtsd\0" "vsqrtss\0"
2709 "vstmxcsr\0" "vsubpd\0" "vsubps\0" "vsubsd\0" "vsubss\0" "vtestpd\0" "vtestps\0" "vucomisd\0" "vucomiss\0"
2710 "vunpckhpd\0" "vunpckhps\0" "vunpcklpd\0" "vunpcklps\0" "vxorpd\0" "vxorps\0" "vzeroall\0" "vzeroupper\0" "wbinvd\0"
2711 "wbnoinvd\0" "wrfsbase\0" "wrgsbase\0" "wrmsr\0" "xabort\0" "xadd\0" "xbegin\0" "xend\0" "xgetbv\0" "xlatb\0"
2712 "xrstors\0" "xrstors64\0" "xsavec\0" "xsavec64\0" "xsaveopt\0" "xsaveopt64\0" "xsaves\0" "xsaves64\0" "xsetbv\0"
2713 "xtest";
2714
2715 const InstDB::InstNameIndex InstDB::instNameIndex[26] = {
2716 { Inst::kIdAaa , Inst::kIdArpl + 1 },
2717 { Inst::kIdBextr , Inst::kIdBzhi + 1 },
2718 { Inst::kIdCall , Inst::kIdCwde + 1 },
2719 { Inst::kIdDaa , Inst::kIdDpps + 1 },
2720 { Inst::kIdEmms , Inst::kIdExtrq + 1 },
2721 { Inst::kIdF2xm1 , Inst::kIdFyl2xp1 + 1 },
2722 { Inst::kIdGetsec , Inst::kIdGf2p8mulb + 1 },
2723 { Inst::kIdHaddpd , Inst::kIdHsubps + 1 },
2724 { Inst::kIdIdiv , Inst::kIdIretw + 1 },
2725 { Inst::kIdJa , Inst::kIdJz + 1 },
2726 { Inst::kIdKaddb , Inst::kIdKxorw + 1 },
2727 { Inst::kIdLahf , Inst::kIdLzcnt + 1 },
2728 { Inst::kIdMaskmovdqu , Inst::kIdMwaitx + 1 },
2729 { Inst::kIdNeg , Inst::kIdNot + 1 },
2730 { Inst::kIdOr , Inst::kIdOuts + 1 },
2731 { Inst::kIdPabsb , Inst::kIdPxor + 1 },
2732 { Inst::kIdNone , Inst::kIdNone + 1 },
2733 { Inst::kIdRcl , Inst::kIdRsqrtss + 1 },
2734 { Inst::kIdSahf , Inst::kIdSysret64 + 1 },
2735 { Inst::kIdT1mskc , Inst::kIdTzmsk + 1 },
2736 { Inst::kIdUcomisd , Inst::kIdUnpcklps + 1 },
2737 { Inst::kIdV4fmaddps , Inst::kIdVzeroupper + 1 },
2738 { Inst::kIdWbinvd , Inst::kIdWrmsr + 1 },
2739 { Inst::kIdXabort , Inst::kIdXtest + 1 },
2740 { Inst::kIdNone , Inst::kIdNone + 1 },
2741 { Inst::kIdNone , Inst::kIdNone + 1 }
2742 };
2743 // ----------------------------------------------------------------------------
2744 // ${NameData:End}
2745 #endif // !ASMJIT_NO_TEXT
2746
2747 // ============================================================================
2748 // [asmjit::x86::InstDB - InstSignature / OpSignature]
2749 // ============================================================================
2750
2751 #ifndef ASMJIT_NO_VALIDATION
2752 // ${InstSignatureTable:Begin}
2753 // ------------------- Automatically generated, do not edit -------------------
2754 #define ROW(count, x86, x64, implicit, o0, o1, o2, o3, o4, o5) \
2755 { count, (x86 ? uint8_t(InstDB::kModeX86) : uint8_t(0)) | \
2756 (x64 ? uint8_t(InstDB::kModeX64) : uint8_t(0)) , \
2757 implicit, \
2758 0, \
2759 { o0, o1, o2, o3, o4, o5 } \
2760 }
2761 const InstDB::InstSignature InstDB::_instSignatureTable[] = {
2762 ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // #0 {r8lo|r8hi|m8|mem, r8lo|r8hi}
2763 ROW(2, 1, 1, 0, 3 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem|sreg, r16}
2764 ROW(2, 1, 1, 0, 5 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem|sreg, r32}
2765 ROW(2, 0, 1, 0, 7 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem|sreg|creg|dreg, r64}
2766 ROW(2, 1, 1, 0, 9 , 10 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi|m8, i8|u8}
2767 ROW(2, 1, 1, 0, 11 , 12 , 0 , 0 , 0 , 0 ), // {r16|m16, i16|u16}
2768 ROW(2, 1, 1, 0, 13 , 14 , 0 , 0 , 0 , 0 ), // {r32|m32, i32|u32}
2769 ROW(2, 0, 1, 0, 15 , 16 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, i32}
2770 ROW(2, 0, 1, 0, 8 , 17 , 0 , 0 , 0 , 0 ), // {r64, i64|u64|m64|mem|sreg|creg|dreg}
2771 ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem}
2772 ROW(2, 1, 1, 0, 4 , 19 , 0 , 0 , 0 , 0 ), // {r16, m16|mem|sreg}
2773 ROW(2, 1, 1, 0, 6 , 20 , 0 , 0 , 0 , 0 ), // {r32, m32|mem|sreg}
2774 ROW(2, 1, 1, 0, 21 , 22 , 0 , 0 , 0 , 0 ), // {m16|mem, sreg}
2775 ROW(2, 1, 1, 0, 22 , 21 , 0 , 0 , 0 , 0 ), // {sreg, m16|mem}
2776 ROW(2, 1, 0, 0, 6 , 23 , 0 , 0 , 0 , 0 ), // {r32, creg|dreg}
2777 ROW(2, 1, 0, 0, 23 , 6 , 0 , 0 , 0 , 0 ), // {creg|dreg, r32}
2778 ROW(2, 1, 1, 0, 9 , 10 , 0 , 0 , 0 , 0 ), // #16 {r8lo|r8hi|m8, i8|u8}
2779 ROW(2, 1, 1, 0, 11 , 12 , 0 , 0 , 0 , 0 ), // {r16|m16, i16|u16}
2780 ROW(2, 1, 1, 0, 13 , 14 , 0 , 0 , 0 , 0 ), // {r32|m32, i32|u32}
2781 ROW(2, 0, 1, 0, 15 , 24 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, i32|r64}
2782 ROW(2, 1, 1, 0, 25 , 26 , 0 , 0 , 0 , 0 ), // {r16|m16|r32|m32|r64|m64|mem, i8}
2783 ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi|m8|mem, r8lo|r8hi}
2784 ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16}
2785 ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32}
2786 ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem}
2787 ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // {r16, m16|mem}
2788 ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem}
2789 ROW(2, 0, 1, 0, 8 , 30 , 0 , 0 , 0 , 0 ), // {r64, m64|mem}
2790 ROW(2, 1, 1, 0, 31 , 10 , 0 , 0 , 0 , 0 ), // #28 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, i8|u8}
2791 ROW(2, 1, 1, 0, 11 , 12 , 0 , 0 , 0 , 0 ), // {r16|m16, i16|u16}
2792 ROW(2, 1, 1, 0, 13 , 14 , 0 , 0 , 0 , 0 ), // {r32|m32, i32|u32}
2793 ROW(2, 0, 1, 0, 8 , 32 , 0 , 0 , 0 , 0 ), // {r64, u32|i32|r64|m64|mem}
2794 ROW(2, 0, 1, 0, 30 , 24 , 0 , 0 , 0 , 0 ), // {m64|mem, i32|r64}
2795 ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi|m8|mem, r8lo|r8hi}
2796 ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16}
2797 ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32}
2798 ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem}
2799 ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // {r16, m16|mem}
2800 ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem}
2801 ROW(2, 1, 1, 1, 33 , 1 , 0 , 0 , 0 , 0 ), // #39 {<ax>, r8lo|r8hi|m8|mem}
2802 ROW(3, 1, 1, 2, 34 , 33 , 27 , 0 , 0 , 0 ), // {<dx>, <ax>, r16|m16|mem}
2803 ROW(3, 1, 1, 2, 35 , 36 , 28 , 0 , 0 , 0 ), // {<edx>, <eax>, r32|m32|mem}
2804 ROW(3, 0, 1, 2, 37 , 38 , 15 , 0 , 0 , 0 ), // {<rdx>, <rax>, r64|m64|mem}
2805 ROW(2, 1, 1, 0, 4 , 39 , 0 , 0 , 0 , 0 ), // {r16, r16|m16|mem|i8|i16}
2806 ROW(2, 1, 1, 0, 6 , 40 , 0 , 0 , 0 , 0 ), // {r32, r32|m32|mem|i8|i32}
2807 ROW(2, 0, 1, 0, 8 , 41 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem|i8|i32}
2808 ROW(3, 1, 1, 0, 4 , 27 , 42 , 0 , 0 , 0 ), // {r16, r16|m16|mem, i8|i16|u16}
2809 ROW(3, 1, 1, 0, 6 , 28 , 43 , 0 , 0 , 0 ), // {r32, r32|m32|mem, i8|i32|u32}
2810 ROW(3, 0, 1, 0, 8 , 15 , 44 , 0 , 0 , 0 ), // {r64, r64|m64|mem, i8|i32}
2811 ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // #49 {r8lo|r8hi|m8|mem, r8lo|r8hi}
2812 ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16}
2813 ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32}
2814 ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64}
2815 ROW(2, 1, 1, 0, 2 , 18 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi, m8|mem}
2816 ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // {r16, m16|mem}
2817 ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem}
2818 ROW(2, 0, 1, 0, 8 , 30 , 0 , 0 , 0 , 0 ), // {r64, m64|mem}
2819 ROW(2, 1, 1, 0, 9 , 10 , 0 , 0 , 0 , 0 ), // #57 {r8lo|r8hi|m8, i8|u8}
2820 ROW(2, 1, 1, 0, 11 , 12 , 0 , 0 , 0 , 0 ), // {r16|m16, i16|u16}
2821 ROW(2, 1, 1, 0, 13 , 14 , 0 , 0 , 0 , 0 ), // {r32|m32, i32|u32}
2822 ROW(2, 0, 1, 0, 15 , 24 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, i32|r64}
2823 ROW(2, 1, 1, 0, 1 , 2 , 0 , 0 , 0 , 0 ), // {r8lo|r8hi|m8|mem, r8lo|r8hi}
2824 ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16}
2825 ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32}
2826 ROW(2, 1, 1, 0, 4 , 21 , 0 , 0 , 0 , 0 ), // #64 {r16, m16|mem}
2827 ROW(2, 1, 1, 0, 6 , 29 , 0 , 0 , 0 , 0 ), // {r32, m32|mem}
2828 ROW(2, 0, 1, 0, 8 , 30 , 0 , 0 , 0 , 0 ), // {r64, m64|mem}
2829 ROW(2, 1, 1, 0, 21 , 4 , 0 , 0 , 0 , 0 ), // {m16|mem, r16}
2830 ROW(2, 1, 1, 0, 29 , 6 , 0 , 0 , 0 , 0 ), // #68 {m32|mem, r32}
2831 ROW(2, 0, 1, 0, 30 , 8 , 0 , 0 , 0 , 0 ), // {m64|mem, r64}
2832 ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // #70 {xmm, xmm|m128|mem}
2833 ROW(2, 1, 1, 0, 47 , 45 , 0 , 0 , 0 , 0 ), // #71 {m128|mem, xmm}
2834 ROW(2, 1, 1, 0, 48 , 49 , 0 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem}
2835 ROW(2, 1, 1, 0, 50 , 48 , 0 , 0 , 0 , 0 ), // {m256|mem, ymm}
2836 ROW(2, 1, 1, 0, 51 , 52 , 0 , 0 , 0 , 0 ), // #74 {zmm, zmm|m512|mem}
2837 ROW(2, 1, 1, 0, 53 , 51 , 0 , 0 , 0 , 0 ), // {m512|mem, zmm}
2838 ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #76 {xmm, xmm, xmm|m128|mem|i8|u8}
2839 ROW(3, 1, 1, 0, 45 , 47 , 10 , 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8}
2840 ROW(3, 1, 1, 0, 48 , 48 , 55 , 0 , 0 , 0 ), // {ymm, ymm, ymm|m256|mem|i8|u8}
2841 ROW(3, 1, 1, 0, 48 , 50 , 10 , 0 , 0 , 0 ), // {ymm, m256|mem, i8|u8}
2842 ROW(3, 1, 1, 0, 51 , 51 , 56 , 0 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem|i8|u8}
2843 ROW(3, 1, 1, 0, 51 , 53 , 10 , 0 , 0 , 0 ), // {zmm, m512|mem, i8|u8}
2844 ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #82 {xmm, xmm, i8|u8|xmm|m128|mem}
2845 ROW(3, 1, 1, 0, 48 , 48 , 54 , 0 , 0 , 0 ), // {ymm, ymm, i8|u8|xmm|m128|mem}
2846 ROW(3, 1, 1, 0, 45 , 47 , 10 , 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8}
2847 ROW(3, 1, 1, 0, 48 , 50 , 10 , 0 , 0 , 0 ), // {ymm, m256|mem, i8|u8}
2848 ROW(3, 1, 1, 0, 51 , 51 , 54 , 0 , 0 , 0 ), // {zmm, zmm, xmm|m128|mem|i8|u8}
2849 ROW(3, 1, 1, 0, 51 , 53 , 10 , 0 , 0 , 0 ), // {zmm, m512|mem, i8|u8}
2850 ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #88 {xmm, xmm, xmm|m128|mem|i8|u8}
2851 ROW(3, 1, 1, 0, 45 , 47 , 10 , 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8}
2852 ROW(3, 1, 1, 0, 48 , 48 , 54 , 0 , 0 , 0 ), // {ymm, ymm, xmm|m128|mem|i8|u8}
2853 ROW(3, 1, 1, 0, 48 , 50 , 10 , 0 , 0 , 0 ), // {ymm, m256|mem, i8|u8}
2854 ROW(3, 1, 1, 0, 51 , 51 , 54 , 0 , 0 , 0 ), // {zmm, zmm, xmm|m128|mem|i8|u8}
2855 ROW(3, 1, 1, 0, 51 , 53 , 10 , 0 , 0 , 0 ), // {zmm, m512|mem, i8|u8}
2856 ROW(2, 1, 1, 0, 57 , 58 , 0 , 0 , 0 , 0 ), // #94 {mm, mm|m64|mem|r64}
2857 ROW(2, 1, 1, 0, 15 , 59 , 0 , 0 , 0 , 0 ), // {m64|mem|r64, mm|xmm}
2858 ROW(2, 0, 1, 0, 45 , 15 , 0 , 0 , 0 , 0 ), // {xmm, r64|m64|mem}
2859 ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #97 {xmm, xmm|m64|mem}
2860 ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #98 {m64|mem, xmm}
2861 ROW(3, 1, 1, 0, 45 , 61 , 45 , 0 , 0 , 0 ), // #99 {xmm, vm32x, xmm}
2862 ROW(3, 1, 1, 0, 48 , 61 , 48 , 0 , 0 , 0 ), // {ymm, vm32x, ymm}
2863 ROW(2, 1, 1, 0, 45 , 61 , 0 , 0 , 0 , 0 ), // {xmm, vm32x}
2864 ROW(2, 1, 1, 0, 48 , 62 , 0 , 0 , 0 , 0 ), // {ymm, vm32y}
2865 ROW(2, 1, 1, 0, 51 , 63 , 0 , 0 , 0 , 0 ), // {zmm, vm32z}
2866 ROW(3, 1, 1, 0, 45 , 61 , 45 , 0 , 0 , 0 ), // #104 {xmm, vm32x, xmm}
2867 ROW(3, 1, 1, 0, 48 , 62 , 48 , 0 , 0 , 0 ), // {ymm, vm32y, ymm}
2868 ROW(2, 1, 1, 0, 45 , 61 , 0 , 0 , 0 , 0 ), // {xmm, vm32x}
2869 ROW(2, 1, 1, 0, 48 , 62 , 0 , 0 , 0 , 0 ), // {ymm, vm32y}
2870 ROW(2, 1, 1, 0, 51 , 63 , 0 , 0 , 0 , 0 ), // {zmm, vm32z}
2871 ROW(3, 1, 1, 0, 45 , 64 , 45 , 0 , 0 , 0 ), // #109 {xmm, vm64x, xmm}
2872 ROW(3, 1, 1, 0, 48 , 65 , 48 , 0 , 0 , 0 ), // {ymm, vm64y, ymm}
2873 ROW(2, 1, 1, 0, 45 , 64 , 0 , 0 , 0 , 0 ), // {xmm, vm64x}
2874 ROW(2, 1, 1, 0, 48 , 65 , 0 , 0 , 0 , 0 ), // {ymm, vm64y}
2875 ROW(2, 1, 1, 0, 51 , 66 , 0 , 0 , 0 , 0 ), // {zmm, vm64z}
2876 ROW(2, 1, 1, 0, 25 , 10 , 0 , 0 , 0 , 0 ), // #114 {r16|m16|r32|m32|r64|m64|mem, i8|u8}
2877 ROW(2, 1, 1, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // {r16|m16|mem, r16}
2878 ROW(2, 1, 1, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, r32}
2879 ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64}
2880 ROW(3, 1, 1, 1, 1 , 2 , 67 , 0 , 0 , 0 ), // #118 {r8lo|r8hi|m8|mem, r8lo|r8hi, <al>}
2881 ROW(3, 1, 1, 1, 27 , 4 , 33 , 0 , 0 , 0 ), // {r16|m16|mem, r16, <ax>}
2882 ROW(3, 1, 1, 1, 28 , 6 , 36 , 0 , 0 , 0 ), // {r32|m32|mem, r32, <eax>}
2883 ROW(3, 0, 1, 1, 15 , 8 , 38 , 0 , 0 , 0 ), // {r64|m64|mem, r64, <rax>}
2884 ROW(1, 1, 1, 0, 68 , 0 , 0 , 0 , 0 , 0 ), // #122 {r16|m16|r64|m64|mem}
2885 ROW(1, 1, 0, 0, 13 , 0 , 0 , 0 , 0 , 0 ), // {r32|m32}
2886 ROW(1, 1, 0, 0, 69 , 0 , 0 , 0 , 0 , 0 ), // {ds|es|ss}
2887 ROW(1, 1, 1, 0, 70 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs}
2888 ROW(1, 1, 1, 0, 71 , 0 , 0 , 0 , 0 , 0 ), // #126 {r16|m16|r64|m64|mem|i8|i16|i32}
2889 ROW(1, 1, 0, 0, 72 , 0 , 0 , 0 , 0 , 0 ), // {r32|m32|i32|u32}
2890 ROW(1, 1, 0, 0, 73 , 0 , 0 , 0 , 0 , 0 ), // {cs|ss|ds|es}
2891 ROW(1, 1, 1, 0, 70 , 0 , 0 , 0 , 0 , 0 ), // {fs|gs}
2892 ROW(4, 1, 1, 0, 45 , 45 , 45 , 46 , 0 , 0 ), // #130 {xmm, xmm, xmm, xmm|m128|mem}
2893 ROW(4, 1, 1, 0, 45 , 45 , 47 , 45 , 0 , 0 ), // {xmm, xmm, m128|mem, xmm}
2894 ROW(4, 1, 1, 0, 48 , 48 , 48 , 49 , 0 , 0 ), // {ymm, ymm, ymm, ymm|m256|mem}
2895 ROW(4, 1, 1, 0, 48 , 48 , 50 , 48 , 0 , 0 ), // {ymm, ymm, m256|mem, ymm}
2896 ROW(3, 1, 1, 0, 45 , 74 , 45 , 0 , 0 , 0 ), // #134 {xmm, vm64x|vm64y, xmm}
2897 ROW(2, 1, 1, 0, 45 , 64 , 0 , 0 , 0 , 0 ), // {xmm, vm64x}
2898 ROW(2, 1, 1, 0, 48 , 65 , 0 , 0 , 0 , 0 ), // {ymm, vm64y}
2899 ROW(2, 1, 1, 0, 51 , 66 , 0 , 0 , 0 , 0 ), // {zmm, vm64z}
2900 ROW(3, 1, 1, 0, 47 , 45 , 45 , 0 , 0 , 0 ), // #138 {m128|mem, xmm, xmm}
2901 ROW(3, 1, 1, 0, 50 , 48 , 48 , 0 , 0 , 0 ), // {m256|mem, ymm, ymm}
2902 ROW(3, 1, 1, 0, 45 , 45 , 47 , 0 , 0 , 0 ), // {xmm, xmm, m128|mem}
2903 ROW(3, 1, 1, 0, 48 , 48 , 50 , 0 , 0 , 0 ), // {ymm, ymm, m256|mem}
2904 ROW(5, 1, 1, 0, 45 , 45 , 46 , 45 , 75 , 0 ), // #142 {xmm, xmm, xmm|m128|mem, xmm, i4|u4}
2905 ROW(5, 1, 1, 0, 45 , 45 , 45 , 47 , 75 , 0 ), // {xmm, xmm, xmm, m128|mem, i4|u4}
2906 ROW(5, 1, 1, 0, 48 , 48 , 49 , 48 , 75 , 0 ), // {ymm, ymm, ymm|m256|mem, ymm, i4|u4}
2907 ROW(5, 1, 1, 0, 48 , 48 , 48 , 50 , 75 , 0 ), // {ymm, ymm, ymm, m256|mem, i4|u4}
2908 ROW(3, 1, 1, 0, 48 , 49 , 10 , 0 , 0 , 0 ), // #146 {ymm, ymm|m256|mem, i8|u8}
2909 ROW(3, 1, 1, 0, 48 , 48 , 49 , 0 , 0 , 0 ), // {ymm, ymm, ymm|m256|mem}
2910 ROW(3, 1, 1, 0, 51 , 51 , 56 , 0 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem|i8|u8}
2911 ROW(3, 1, 1, 0, 51 , 53 , 10 , 0 , 0 , 0 ), // {zmm, m512|mem, i8|u8}
2912 ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #150 {r16, r16|m16|mem}
2913 ROW(2, 1, 1, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #151 {r32, r32|m32|mem}
2914 ROW(2, 0, 1, 0, 8 , 15 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem}
2915 ROW(1, 1, 1, 0, 76 , 0 , 0 , 0 , 0 , 0 ), // #153 {m32|m64}
2916 ROW(2, 1, 1, 0, 77 , 78 , 0 , 0 , 0 , 0 ), // {st0, st}
2917 ROW(2, 1, 1, 0, 78 , 77 , 0 , 0 , 0 , 0 ), // {st, st0}
2918 ROW(2, 1, 1, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #156 {r16, m32|mem}
2919 ROW(2, 1, 1, 0, 6 , 79 , 0 , 0 , 0 , 0 ), // {r32, m48|mem}
2920 ROW(2, 0, 1, 0, 8 , 80 , 0 , 0 , 0 , 0 ), // {r64, m80|mem}
2921 ROW(3, 1, 1, 0, 27 , 4 , 81 , 0 , 0 , 0 ), // #159 {r16|m16|mem, r16, cl|i8|u8}
2922 ROW(3, 1, 1, 0, 28 , 6 , 81 , 0 , 0 , 0 ), // {r32|m32|mem, r32, cl|i8|u8}
2923 ROW(3, 0, 1, 0, 15 , 8 , 81 , 0 , 0 , 0 ), // {r64|m64|mem, r64, cl|i8|u8}
2924 ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #162 {xmm, xmm, xmm|m128|mem}
2925 ROW(3, 1, 1, 0, 48 , 48 , 49 , 0 , 0 , 0 ), // #163 {ymm, ymm, ymm|m256|mem}
2926 ROW(3, 1, 1, 0, 51 , 51 , 52 , 0 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem}
2927 ROW(4, 1, 1, 0, 45 , 45 , 46 , 10 , 0 , 0 ), // #165 {xmm, xmm, xmm|m128|mem, i8|u8}
2928 ROW(4, 1, 1, 0, 48 , 48 , 49 , 10 , 0 , 0 ), // #166 {ymm, ymm, ymm|m256|mem, i8|u8}
2929 ROW(4, 1, 1, 0, 51 , 51 , 52 , 10 , 0 , 0 ), // {zmm, zmm, zmm|m512|mem, i8|u8}
2930 ROW(4, 1, 1, 0, 82 , 45 , 46 , 10 , 0 , 0 ), // #168 {xmm|k, xmm, xmm|m128|mem, i8|u8}
2931 ROW(4, 1, 1, 0, 83 , 48 , 49 , 10 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem, i8|u8}
2932 ROW(4, 1, 1, 0, 84 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8}
2933 ROW(2, 1, 1, 0, 46 , 45 , 0 , 0 , 0 , 0 ), // #171 {xmm|m128|mem, xmm}
2934 ROW(2, 1, 1, 0, 49 , 48 , 0 , 0 , 0 , 0 ), // {ymm|m256|mem, ymm}
2935 ROW(2, 1, 1, 0, 52 , 51 , 0 , 0 , 0 , 0 ), // {zmm|m512|mem, zmm}
2936 ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #174 {xmm, xmm|m64|mem}
2937 ROW(2, 1, 1, 0, 48 , 46 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m128|mem}
2938 ROW(2, 1, 1, 0, 51 , 49 , 0 , 0 , 0 , 0 ), // {zmm, ymm|m256|mem}
2939 ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // #177 {xmm, xmm|m128|mem}
2940 ROW(2, 1, 1, 0, 48 , 49 , 0 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem}
2941 ROW(2, 1, 1, 0, 51 , 52 , 0 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem}
2942 ROW(3, 1, 1, 0, 60 , 45 , 10 , 0 , 0 , 0 ), // #180 {xmm|m64|mem, xmm, i8|u8}
2943 ROW(3, 1, 1, 0, 46 , 48 , 10 , 0 , 0 , 0 ), // #181 {xmm|m128|mem, ymm, i8|u8}
2944 ROW(3, 1, 1, 0, 49 , 51 , 10 , 0 , 0 , 0 ), // #182 {ymm|m256|mem, zmm, i8|u8}
2945 ROW(3, 1, 1, 0, 45 , 46 , 10 , 0 , 0 , 0 ), // #183 {xmm, xmm|m128|mem, i8|u8}
2946 ROW(3, 1, 1, 0, 48 , 49 , 10 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem, i8|u8}
2947 ROW(3, 1, 1, 0, 51 , 52 , 10 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem, i8|u8}
2948 ROW(2, 1, 1, 0, 45 , 60 , 0 , 0 , 0 , 0 ), // #186 {xmm, xmm|m64|mem}
2949 ROW(2, 1, 1, 0, 48 , 49 , 0 , 0 , 0 , 0 ), // {ymm, ymm|m256|mem}
2950 ROW(2, 1, 1, 0, 51 , 52 , 0 , 0 , 0 , 0 ), // {zmm, zmm|m512|mem}
2951 ROW(2, 1, 1, 0, 47 , 45 , 0 , 0 , 0 , 0 ), // #189 {m128|mem, xmm}
2952 ROW(2, 1, 1, 0, 50 , 48 , 0 , 0 , 0 , 0 ), // {m256|mem, ymm}
2953 ROW(2, 1, 1, 0, 53 , 51 , 0 , 0 , 0 , 0 ), // {m512|mem, zmm}
2954 ROW(2, 1, 1, 0, 45 , 47 , 0 , 0 , 0 , 0 ), // #192 {xmm, m128|mem}
2955 ROW(2, 1, 1, 0, 48 , 50 , 0 , 0 , 0 , 0 ), // {ymm, m256|mem}
2956 ROW(2, 1, 1, 0, 51 , 53 , 0 , 0 , 0 , 0 ), // {zmm, m512|mem}
2957 ROW(2, 0, 1, 0, 15 , 45 , 0 , 0 , 0 , 0 ), // #195 {r64|m64|mem, xmm}
2958 ROW(2, 1, 1, 0, 45 , 85 , 0 , 0 , 0 , 0 ), // {xmm, xmm|m64|mem|r64}
2959 ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // {m64|mem, xmm}
2960 ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #198 {m64|mem, xmm}
2961 ROW(2, 1, 1, 0, 45 , 30 , 0 , 0 , 0 , 0 ), // {xmm, m64|mem}
2962 ROW(3, 1, 1, 0, 45 , 45 , 45 , 0 , 0 , 0 ), // #200 {xmm, xmm, xmm}
2963 ROW(2, 1, 1, 0, 29 , 45 , 0 , 0 , 0 , 0 ), // #201 {m32|mem, xmm}
2964 ROW(2, 1, 1, 0, 45 , 29 , 0 , 0 , 0 , 0 ), // {xmm, m32|mem}
2965 ROW(3, 1, 1, 0, 45 , 45 , 45 , 0 , 0 , 0 ), // {xmm, xmm, xmm}
2966 ROW(2, 1, 1, 0, 86 , 85 , 0 , 0 , 0 , 0 ), // #204 {xmm|ymm, xmm|m64|mem|r64}
2967 ROW(2, 0, 1, 0, 51 , 8 , 0 , 0 , 0 , 0 ), // {zmm, r64}
2968 ROW(2, 1, 1, 0, 51 , 60 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m64|mem}
2969 ROW(4, 1, 1, 0, 84 , 45 , 46 , 10 , 0 , 0 ), // #207 {k, xmm, xmm|m128|mem, i8|u8}
2970 ROW(4, 1, 1, 0, 84 , 48 , 49 , 10 , 0 , 0 ), // {k, ymm, ymm|m256|mem, i8|u8}
2971 ROW(4, 1, 1, 0, 84 , 51 , 52 , 10 , 0 , 0 ), // {k, zmm, zmm|m512|mem, i8|u8}
2972 ROW(3, 1, 1, 0, 82 , 45 , 46 , 0 , 0 , 0 ), // #210 {xmm|k, xmm, xmm|m128|mem}
2973 ROW(3, 1, 1, 0, 83 , 48 , 49 , 0 , 0 , 0 ), // {ymm|k, ymm, ymm|m256|mem}
2974 ROW(3, 1, 1, 0, 84 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem}
2975 ROW(2, 1, 1, 0, 87 , 45 , 0 , 0 , 0 , 0 ), // #213 {xmm|m32|mem, xmm}
2976 ROW(2, 1, 1, 0, 60 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m64|mem, ymm}
2977 ROW(2, 1, 1, 0, 46 , 51 , 0 , 0 , 0 , 0 ), // {xmm|m128|mem, zmm}
2978 ROW(2, 1, 1, 0, 60 , 45 , 0 , 0 , 0 , 0 ), // #216 {xmm|m64|mem, xmm}
2979 ROW(2, 1, 1, 0, 46 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m128|mem, ymm}
2980 ROW(2, 1, 1, 0, 49 , 51 , 0 , 0 , 0 , 0 ), // {ymm|m256|mem, zmm}
2981 ROW(2, 1, 1, 0, 88 , 45 , 0 , 0 , 0 , 0 ), // #219 {xmm|m16|mem, xmm}
2982 ROW(2, 1, 1, 0, 87 , 48 , 0 , 0 , 0 , 0 ), // {xmm|m32|mem, ymm}
2983 ROW(2, 1, 1, 0, 60 , 51 , 0 , 0 , 0 , 0 ), // {xmm|m64|mem, zmm}
2984 ROW(2, 1, 1, 0, 45 , 87 , 0 , 0 , 0 , 0 ), // #222 {xmm, xmm|m32|mem}
2985 ROW(2, 1, 1, 0, 48 , 60 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m64|mem}
2986 ROW(2, 1, 1, 0, 51 , 46 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m128|mem}
2987 ROW(2, 1, 1, 0, 45 , 88 , 0 , 0 , 0 , 0 ), // #225 {xmm, xmm|m16|mem}
2988 ROW(2, 1, 1, 0, 48 , 87 , 0 , 0 , 0 , 0 ), // {ymm, xmm|m32|mem}
2989 ROW(2, 1, 1, 0, 51 , 60 , 0 , 0 , 0 , 0 ), // {zmm, xmm|m64|mem}
2990 ROW(2, 1, 1, 0, 61 , 45 , 0 , 0 , 0 , 0 ), // #228 {vm32x, xmm}
2991 ROW(2, 1, 1, 0, 62 , 48 , 0 , 0 , 0 , 0 ), // {vm32y, ymm}
2992 ROW(2, 1, 1, 0, 63 , 51 , 0 , 0 , 0 , 0 ), // {vm32z, zmm}
2993 ROW(2, 1, 1, 0, 64 , 45 , 0 , 0 , 0 , 0 ), // #231 {vm64x, xmm}
2994 ROW(2, 1, 1, 0, 65 , 48 , 0 , 0 , 0 , 0 ), // {vm64y, ymm}
2995 ROW(2, 1, 1, 0, 66 , 51 , 0 , 0 , 0 , 0 ), // {vm64z, zmm}
2996 ROW(3, 1, 1, 0, 84 , 45 , 46 , 0 , 0 , 0 ), // #234 {k, xmm, xmm|m128|mem}
2997 ROW(3, 1, 1, 0, 84 , 48 , 49 , 0 , 0 , 0 ), // {k, ymm, ymm|m256|mem}
2998 ROW(3, 1, 1, 0, 84 , 51 , 52 , 0 , 0 , 0 ), // {k, zmm, zmm|m512|mem}
2999 ROW(3, 1, 1, 0, 6 , 6 , 28 , 0 , 0 , 0 ), // #237 {r32, r32, r32|m32|mem}
3000 ROW(3, 0, 1, 0, 8 , 8 , 15 , 0 , 0 , 0 ), // {r64, r64, r64|m64|mem}
3001 ROW(3, 1, 1, 0, 6 , 28 , 6 , 0 , 0 , 0 ), // #239 {r32, r32|m32|mem, r32}
3002 ROW(3, 0, 1, 0, 8 , 15 , 8 , 0 , 0 , 0 ), // {r64, r64|m64|mem, r64}
3003 ROW(2, 1, 0, 0, 89 , 28 , 0 , 0 , 0 , 0 ), // #241 {bnd, r32|m32|mem}
3004 ROW(2, 0, 1, 0, 89 , 15 , 0 , 0 , 0 , 0 ), // {bnd, r64|m64|mem}
3005 ROW(2, 1, 1, 0, 89 , 90 , 0 , 0 , 0 , 0 ), // #243 {bnd, bnd|mem}
3006 ROW(2, 1, 1, 0, 91 , 89 , 0 , 0 , 0 , 0 ), // {mem, bnd}
3007 ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #245 {r16, m32|mem}
3008 ROW(2, 1, 0, 0, 6 , 30 , 0 , 0 , 0 , 0 ), // {r32, m64|mem}
3009 ROW(1, 1, 0, 0, 92 , 0 , 0 , 0 , 0 , 0 ), // #247 {rel16|r16|m16|r32|m32}
3010 ROW(1, 1, 1, 0, 93 , 0 , 0 , 0 , 0 , 0 ), // {rel32|r64|m64|mem}
3011 ROW(2, 1, 1, 0, 6 , 94 , 0 , 0 , 0 , 0 ), // #249 {r32, r8lo|r8hi|m8|r16|m16|r32|m32}
3012 ROW(2, 0, 1, 0, 8 , 95 , 0 , 0 , 0 , 0 ), // {r64, r8lo|r8hi|m8|r64|m64}
3013 ROW(1, 1, 0, 0, 96 , 0 , 0 , 0 , 0 , 0 ), // #251 {r16|r32}
3014 ROW(1, 1, 1, 0, 31 , 0 , 0 , 0 , 0 , 0 ), // #252 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem}
3015 ROW(2, 1, 0, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // #253 {es:[memBase], m512|mem}
3016 ROW(2, 0, 1, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // {es:[memBase], m512|mem}
3017 ROW(3, 1, 1, 0, 45 , 10 , 10 , 0 , 0 , 0 ), // #255 {xmm, i8|u8, i8|u8}
3018 ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #256 {xmm, xmm}
3019 ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #257 {}
3020 ROW(1, 1, 1, 0, 78 , 0 , 0 , 0 , 0 , 0 ), // #258 {st}
3021 ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #259 {}
3022 ROW(1, 1, 1, 0, 98 , 0 , 0 , 0 , 0 , 0 ), // #260 {m32|m64|st}
3023 ROW(2, 1, 1, 0, 45 , 45 , 0 , 0 , 0 , 0 ), // #261 {xmm, xmm}
3024 ROW(4, 1, 1, 0, 45 , 45 , 10 , 10 , 0 , 0 ), // {xmm, xmm, i8|u8, i8|u8}
3025 ROW(2, 1, 0, 0, 6 , 47 , 0 , 0 , 0 , 0 ), // #263 {r32, m128|mem}
3026 ROW(2, 0, 1, 0, 8 , 47 , 0 , 0 , 0 , 0 ), // {r64, m128|mem}
3027 ROW(2, 1, 0, 2, 36 , 99 , 0 , 0 , 0 , 0 ), // #265 {<eax>, <ecx>}
3028 ROW(2, 0, 1, 2, 100, 99 , 0 , 0 , 0 , 0 ), // {<eax|rax>, <ecx>}
3029 ROW(1, 1, 1, 0, 101, 0 , 0 , 0 , 0 , 0 ), // #267 {rel8|rel32}
3030 ROW(1, 1, 0, 0, 102, 0 , 0 , 0 , 0 , 0 ), // {rel16}
3031 ROW(2, 1, 0, 1, 103, 104, 0 , 0 , 0 , 0 ), // #269 {<cx|ecx>, rel8}
3032 ROW(2, 0, 1, 1, 105, 104, 0 , 0 , 0 , 0 ), // {<ecx|rcx>, rel8}
3033 ROW(1, 1, 1, 0, 106, 0 , 0 , 0 , 0 , 0 ), // #271 {rel8|rel32|r64|m64|mem}
3034 ROW(1, 1, 0, 0, 107, 0 , 0 , 0 , 0 , 0 ), // {rel16|r32|m32|mem}
3035 ROW(2, 1, 1, 0, 84 , 108, 0 , 0 , 0 , 0 ), // #273 {k, k|m8|mem|r32|r8lo|r8hi|r16}
3036 ROW(2, 1, 1, 0, 109, 84 , 0 , 0 , 0 , 0 ), // {m8|mem|r32|r8lo|r8hi|r16, k}
3037 ROW(2, 1, 1, 0, 84 , 110, 0 , 0 , 0 , 0 ), // #275 {k, k|m32|mem|r32}
3038 ROW(2, 1, 1, 0, 28 , 84 , 0 , 0 , 0 , 0 ), // {m32|mem|r32, k}
3039 ROW(2, 1, 1, 0, 84 , 111, 0 , 0 , 0 , 0 ), // #277 {k, k|m64|mem|r64}
3040 ROW(2, 1, 1, 0, 15 , 84 , 0 , 0 , 0 , 0 ), // {m64|mem|r64, k}
3041 ROW(2, 1, 1, 0, 84 , 112, 0 , 0 , 0 , 0 ), // #279 {k, k|m16|mem|r32|r16}
3042 ROW(2, 1, 1, 0, 113, 84 , 0 , 0 , 0 , 0 ), // {m16|mem|r32|r16, k}
3043 ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #281 {r16, r16|m16|mem}
3044 ROW(2, 1, 1, 0, 6 , 113, 0 , 0 , 0 , 0 ), // {r32, r32|m16|mem|r16}
3045 ROW(2, 1, 0, 0, 4 , 29 , 0 , 0 , 0 , 0 ), // #283 {r16, m32|mem}
3046 ROW(2, 1, 0, 0, 6 , 79 , 0 , 0 , 0 , 0 ), // {r32, m48|mem}
3047 ROW(2, 1, 1, 0, 4 , 27 , 0 , 0 , 0 , 0 ), // #285 {r16, r16|m16|mem}
3048 ROW(2, 1, 1, 0, 114, 113, 0 , 0 , 0 , 0 ), // {r32|r64, r32|m16|mem|r16}
3049 ROW(2, 1, 1, 0, 59 , 28 , 0 , 0 , 0 , 0 ), // #287 {mm|xmm, r32|m32|mem}
3050 ROW(2, 1, 1, 0, 28 , 59 , 0 , 0 , 0 , 0 ), // {r32|m32|mem, mm|xmm}
3051 ROW(2, 1, 1, 0, 45 , 87 , 0 , 0 , 0 , 0 ), // #289 {xmm, xmm|m32|mem}
3052 ROW(2, 1, 1, 0, 29 , 45 , 0 , 0 , 0 , 0 ), // {m32|mem, xmm}
3053 ROW(2, 1, 1, 0, 4 , 9 , 0 , 0 , 0 , 0 ), // #291 {r16, r8lo|r8hi|m8}
3054 ROW(2, 1, 1, 0, 114, 115, 0 , 0 , 0 , 0 ), // {r32|r64, r8lo|r8hi|m8|r16|m16}
3055 ROW(4, 1, 1, 1, 6 , 6 , 28 , 35 , 0 , 0 ), // #293 {r32, r32, r32|m32|mem, <edx>}
3056 ROW(4, 0, 1, 1, 8 , 8 , 15 , 37 , 0 , 0 ), // {r64, r64, r64|m64|mem, <rdx>}
3057 ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #295 {}
3058 ROW(1, 1, 1, 0, 116, 0 , 0 , 0 , 0 , 0 ), // {r16|m16|r32|m32}
3059 ROW(2, 1, 1, 0, 57 , 117, 0 , 0 , 0 , 0 ), // #297 {mm, mm|m64|mem}
3060 ROW(2, 1, 1, 0, 45 , 46 , 0 , 0 , 0 , 0 ), // {xmm, xmm|m128|mem}
3061 ROW(3, 1, 1, 0, 57 , 117, 10 , 0 , 0 , 0 ), // #299 {mm, mm|m64|mem, i8|u8}
3062 ROW(3, 1, 1, 0, 45 , 46 , 10 , 0 , 0 , 0 ), // {xmm, xmm|m128|mem, i8|u8}
3063 ROW(3, 1, 1, 0, 6 , 59 , 10 , 0 , 0 , 0 ), // #301 {r32, mm|xmm, i8|u8}
3064 ROW(3, 1, 1, 0, 21 , 45 , 10 , 0 , 0 , 0 ), // {m16|mem, xmm, i8|u8}
3065 ROW(2, 1, 1, 0, 57 , 118, 0 , 0 , 0 , 0 ), // #303 {mm, i8|u8|mm|m64|mem}
3066 ROW(2, 1, 1, 0, 45 , 54 , 0 , 0 , 0 , 0 ), // {xmm, i8|u8|xmm|m128|mem}
3067 ROW(1, 1, 0, 0, 6 , 0 , 0 , 0 , 0 , 0 ), // #305 {r32}
3068 ROW(1, 0, 1, 0, 8 , 0 , 0 , 0 , 0 , 0 ), // {r64}
3069 ROW(0, 1, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #307 {}
3070 ROW(1, 1, 1, 0, 119, 0 , 0 , 0 , 0 , 0 ), // {u16}
3071 ROW(3, 1, 1, 0, 6 , 28 , 10 , 0 , 0 , 0 ), // #309 {r32, r32|m32|mem, i8|u8}
3072 ROW(3, 0, 1, 0, 8 , 15 , 10 , 0 , 0 , 0 ), // {r64, r64|m64|mem, i8|u8}
3073 ROW(4, 1, 1, 0, 45 , 45 , 46 , 45 , 0 , 0 ), // #311 {xmm, xmm, xmm|m128|mem, xmm}
3074 ROW(4, 1, 1, 0, 48 , 48 , 49 , 48 , 0 , 0 ), // {ymm, ymm, ymm|m256|mem, ymm}
3075 ROW(2, 1, 1, 0, 45 , 120, 0 , 0 , 0 , 0 ), // #313 {xmm, xmm|m128|ymm|m256}
3076 ROW(2, 1, 1, 0, 48 , 52 , 0 , 0 , 0 , 0 ), // {ymm, zmm|m512|mem}
3077 ROW(4, 1, 1, 0, 45 , 45 , 45 , 60 , 0 , 0 ), // #315 {xmm, xmm, xmm, xmm|m64|mem}
3078 ROW(4, 1, 1, 0, 45 , 45 , 30 , 45 , 0 , 0 ), // {xmm, xmm, m64|mem, xmm}
3079 ROW(4, 1, 1, 0, 45 , 45 , 45 , 87 , 0 , 0 ), // #317 {xmm, xmm, xmm, xmm|m32|mem}
3080 ROW(4, 1, 1, 0, 45 , 45 , 29 , 45 , 0 , 0 ), // {xmm, xmm, m32|mem, xmm}
3081 ROW(4, 1, 1, 0, 48 , 48 , 46 , 10 , 0 , 0 ), // #319 {ymm, ymm, xmm|m128|mem, i8|u8}
3082 ROW(4, 1, 1, 0, 51 , 51 , 46 , 10 , 0 , 0 ), // {zmm, zmm, xmm|m128|mem, i8|u8}
3083 ROW(1, 1, 0, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #321 {<eax>}
3084 ROW(1, 0, 1, 1, 38 , 0 , 0 , 0 , 0 , 0 ), // #322 {<rax>}
3085 ROW(2, 1, 1, 0, 28 , 45 , 0 , 0 , 0 , 0 ), // #323 {r32|m32|mem, xmm}
3086 ROW(2, 1, 1, 0, 45 , 28 , 0 , 0 , 0 , 0 ), // {xmm, r32|m32|mem}
3087 ROW(2, 1, 1, 0, 30 , 45 , 0 , 0 , 0 , 0 ), // #325 {m64|mem, xmm}
3088 ROW(3, 1, 1, 0, 45 , 45 , 30 , 0 , 0 , 0 ), // {xmm, xmm, m64|mem}
3089 ROW(2, 1, 0, 0, 28 , 6 , 0 , 0 , 0 , 0 ), // #327 {r32|m32|mem, r32}
3090 ROW(2, 0, 1, 0, 15 , 8 , 0 , 0 , 0 , 0 ), // {r64|m64|mem, r64}
3091 ROW(2, 1, 0, 0, 6 , 28 , 0 , 0 , 0 , 0 ), // #329 {r32, r32|m32|mem}
3092 ROW(2, 0, 1, 0, 8 , 15 , 0 , 0 , 0 , 0 ), // {r64, r64|m64|mem}
3093 ROW(3, 1, 1, 0, 45 , 45 , 54 , 0 , 0 , 0 ), // #331 {xmm, xmm, xmm|m128|mem|i8|u8}
3094 ROW(3, 1, 1, 0, 45 , 47 , 121, 0 , 0 , 0 ), // {xmm, m128|mem, i8|u8|xmm}
3095 ROW(2, 1, 1, 0, 74 , 45 , 0 , 0 , 0 , 0 ), // #333 {vm64x|vm64y, xmm}
3096 ROW(2, 1, 1, 0, 66 , 48 , 0 , 0 , 0 , 0 ), // {vm64z, ymm}
3097 ROW(3, 1, 1, 0, 45 , 45 , 46 , 0 , 0 , 0 ), // #335 {xmm, xmm, xmm|m128|mem}
3098 ROW(3, 1, 1, 0, 45 , 47 , 45 , 0 , 0 , 0 ), // {xmm, m128|mem, xmm}
3099 ROW(2, 1, 1, 0, 61 , 86 , 0 , 0 , 0 , 0 ), // #337 {vm32x, xmm|ymm}
3100 ROW(2, 1, 1, 0, 62 , 51 , 0 , 0 , 0 , 0 ), // {vm32y, zmm}
3101 ROW(1, 1, 0, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #339 {<ax>}
3102 ROW(2, 1, 0, 1, 33 , 10 , 0 , 0 , 0 , 0 ), // #340 {<ax>, i8|u8}
3103 ROW(2, 1, 0, 0, 27 , 4 , 0 , 0 , 0 , 0 ), // #341 {r16|m16|mem, r16}
3104 ROW(3, 1, 1, 1, 45 , 46 , 122, 0 , 0 , 0 ), // #342 {xmm, xmm|m128|mem, <xmm0>}
3105 ROW(2, 1, 1, 0, 89 , 123, 0 , 0 , 0 , 0 ), // #343 {bnd, mib}
3106 ROW(2, 1, 1, 0, 89 , 91 , 0 , 0 , 0 , 0 ), // #344 {bnd, mem}
3107 ROW(2, 1, 1, 0, 123, 89 , 0 , 0 , 0 , 0 ), // #345 {mib, bnd}
3108 ROW(1, 1, 1, 0, 124, 0 , 0 , 0 , 0 , 0 ), // #346 {r16|r32|r64}
3109 ROW(1, 1, 1, 1, 33 , 0 , 0 , 0 , 0 , 0 ), // #347 {<ax>}
3110 ROW(2, 1, 1, 2, 35 , 36 , 0 , 0 , 0 , 0 ), // #348 {<edx>, <eax>}
3111 ROW(1, 1, 1, 0, 91 , 0 , 0 , 0 , 0 , 0 ), // #349 {mem}
3112 ROW(1, 1, 1, 1, 125, 0 , 0 , 0 , 0 , 0 ), // #350 {<ds:[memBase|zax]>}
3113 ROW(2, 1, 1, 2, 126, 127, 0 , 0 , 0 , 0 ), // #351 {<ds:[memBase|zsi]>, <es:[memBase|zdi]>}
3114 ROW(3, 1, 1, 0, 45 , 60 , 10 , 0 , 0 , 0 ), // #352 {xmm, xmm|m64|mem, i8|u8}
3115 ROW(3, 1, 1, 0, 45 , 87 , 10 , 0 , 0 , 0 ), // #353 {xmm, xmm|m32|mem, i8|u8}
3116 ROW(5, 0, 1, 4, 47 , 37 , 38 , 128, 129, 0 ), // #354 {m128|mem, <rdx>, <rax>, <rcx>, <rbx>}
3117 ROW(5, 1, 1, 4, 30 , 35 , 36 , 99 , 130, 0 ), // #355 {m64|mem, <edx>, <eax>, <ecx>, <ebx>}
3118 ROW(4, 1, 1, 4, 36 , 130, 99 , 35 , 0 , 0 ), // #356 {<eax>, <ebx>, <ecx>, <edx>}
3119 ROW(2, 0, 1, 2, 37 , 38 , 0 , 0 , 0 , 0 ), // #357 {<rdx>, <rax>}
3120 ROW(2, 1, 1, 0, 57 , 46 , 0 , 0 , 0 , 0 ), // #358 {mm, xmm|m128|mem}
3121 ROW(2, 1, 1, 0, 45 , 117, 0 , 0 , 0 , 0 ), // #359 {xmm, mm|m64|mem}
3122 ROW(2, 1, 1, 0, 57 , 60 , 0 , 0 , 0 , 0 ), // #360 {mm, xmm|m64|mem}
3123 ROW(2, 1, 1, 0, 114, 60 , 0 , 0 , 0 , 0 ), // #361 {r32|r64, xmm|m64|mem}
3124 ROW(2, 1, 1, 0, 45 , 131, 0 , 0 , 0 , 0 ), // #362 {xmm, r32|m32|mem|r64|m64}
3125 ROW(2, 1, 1, 0, 114, 87 , 0 , 0 , 0 , 0 ), // #363 {r32|r64, xmm|m32|mem}
3126 ROW(2, 1, 1, 2, 34 , 33 , 0 , 0 , 0 , 0 ), // #364 {<dx>, <ax>}
3127 ROW(1, 1, 1, 1, 36 , 0 , 0 , 0 , 0 , 0 ), // #365 {<eax>}
3128 ROW(2, 1, 1, 0, 12 , 10 , 0 , 0 , 0 , 0 ), // #366 {i16|u16, i8|u8}
3129 ROW(3, 1, 1, 0, 28 , 45 , 10 , 0 , 0 , 0 ), // #367 {r32|m32|mem, xmm, i8|u8}
3130 ROW(1, 1, 1, 0, 80 , 0 , 0 , 0 , 0 , 0 ), // #368 {m80|mem}
3131 ROW(1, 1, 1, 0, 132, 0 , 0 , 0 , 0 , 0 ), // #369 {m16|m32}
3132 ROW(1, 1, 1, 0, 133, 0 , 0 , 0 , 0 , 0 ), // #370 {m16|m32|m64}
3133 ROW(1, 1, 1, 0, 134, 0 , 0 , 0 , 0 , 0 ), // #371 {m32|m64|m80|st}
3134 ROW(1, 1, 1, 0, 21 , 0 , 0 , 0 , 0 , 0 ), // #372 {m16|mem}
3135 ROW(1, 1, 1, 0, 135, 0 , 0 , 0 , 0 , 0 ), // #373 {ax|m16|mem}
3136 ROW(1, 0, 1, 0, 91 , 0 , 0 , 0 , 0 , 0 ), // #374 {mem}
3137 ROW(2, 1, 1, 0, 136, 137, 0 , 0 , 0 , 0 ), // #375 {al|ax|eax, i8|u8|dx}
3138 ROW(2, 1, 1, 0, 138, 139, 0 , 0 , 0 , 0 ), // #376 {es:[memBase|zdi], dx}
3139 ROW(1, 1, 1, 0, 10 , 0 , 0 , 0 , 0 , 0 ), // #377 {i8|u8}
3140 ROW(0, 1, 0, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #378 {}
3141 ROW(0, 0, 1, 0, 0 , 0 , 0 , 0 , 0 , 0 ), // #379 {}
3142 ROW(3, 1, 1, 0, 84 , 84 , 84 , 0 , 0 , 0 ), // #380 {k, k, k}
3143 ROW(2, 1, 1, 0, 84 , 84 , 0 , 0 , 0 , 0 ), // #381 {k, k}
3144 ROW(3, 1, 1, 0, 84 , 84 , 10 , 0 , 0 , 0 ), // #382 {k, k, i8|u8}
3145 ROW(1, 1, 1, 1, 140, 0 , 0 , 0 , 0 , 0 ), // #383 {<ah>}
3146 ROW(1, 1, 1, 0, 29 , 0 , 0 , 0 , 0 , 0 ), // #384 {m32|mem}
3147 ROW(2, 1, 1, 0, 124, 141, 0 , 0 , 0 , 0 ), // #385 {r16|r32|r64, mem|m8|m16|m32|m48|m64|m80|m128|m256|m512|m1024}
3148 ROW(1, 1, 1, 0, 27 , 0 , 0 , 0 , 0 , 0 ), // #386 {r16|m16|mem}
3149 ROW(1, 1, 1, 0, 114, 0 , 0 , 0 , 0 , 0 ), // #387 {r32|r64}
3150 ROW(2, 1, 1, 2, 142, 126, 0 , 0 , 0 , 0 ), // #388 {<al|ax|eax|rax>, <ds:[memBase|zsi]>}
3151 ROW(3, 1, 1, 0, 114, 28 , 14 , 0 , 0 , 0 ), // #389 {r32|r64, r32|m32|mem, i32|u32}
3152 ROW(3, 1, 1, 1, 45 , 45 , 143, 0 , 0 , 0 ), // #390 {xmm, xmm, <ds:[memBase|zdi]>}
3153 ROW(3, 1, 1, 1, 57 , 57 , 143, 0 , 0 , 0 ), // #391 {mm, mm, <ds:[memBase|zdi]>}
3154 ROW(3, 1, 1, 3, 125, 99 , 35 , 0 , 0 , 0 ), // #392 {<ds:[memBase|zax]>, <ecx>, <edx>}
3155 ROW(2, 1, 1, 0, 97 , 53 , 0 , 0 , 0 , 0 ), // #393 {es:[memBase], m512|mem}
3156 ROW(2, 1, 1, 0, 57 , 45 , 0 , 0 , 0 , 0 ), // #394 {mm, xmm}
3157 ROW(2, 1, 1, 0, 6 , 45 , 0 , 0 , 0 , 0 ), // #395 {r32, xmm}
3158 ROW(2, 1, 1, 0, 30 , 57 , 0 , 0 , 0 , 0 ), // #396 {m64|mem, mm}
3159 ROW(2, 1, 1, 0, 45 , 57 , 0 , 0 , 0 , 0 ), // #397 {xmm, mm}
3160 ROW(2, 1, 1, 2, 127, 126, 0 , 0 , 0 , 0 ), // #398 {<es:[memBase|zdi]>, <ds:[memBase|zsi]>}
3161 ROW(2, 0, 1, 0, 8 , 28 , 0 , 0 , 0 , 0 ), // #399 {r64, r32|m32|mem}
3162 ROW(2, 1, 1, 2, 36 , 99 , 0 , 0 , 0 , 0 ), // #400 {<eax>, <ecx>}
3163 ROW(3, 1, 1, 3, 36 , 99 , 130, 0 , 0 , 0 ), // #401 {<eax>, <ecx>, <ebx>}
3164 ROW(2, 1, 1, 0, 144, 136, 0 , 0 , 0 , 0 ), // #402 {u8|dx, al|ax|eax}
3165 ROW(2, 1, 1, 0, 139, 145, 0 , 0 , 0 , 0 ), // #403 {dx, ds:[memBase|zsi]}
3166 ROW(6, 1, 1, 3, 45 , 46 , 10 , 99 , 36 , 35 ), // #404 {xmm, xmm|m128|mem, i8|u8, <ecx>, <eax>, <edx>}
3167 ROW(6, 1, 1, 3, 45 , 46 , 10 , 122, 36 , 35 ), // #405 {xmm, xmm|m128|mem, i8|u8, <xmm0>, <eax>, <edx>}
3168 ROW(4, 1, 1, 1, 45 , 46 , 10 , 99 , 0 , 0 ), // #406 {xmm, xmm|m128|mem, i8|u8, <ecx>}
3169 ROW(4, 1, 1, 1, 45 , 46 , 10 , 122, 0 , 0 ), // #407 {xmm, xmm|m128|mem, i8|u8, <xmm0>}
3170 ROW(3, 1, 1, 0, 109, 45 , 10 , 0 , 0 , 0 ), // #408 {r32|m8|mem|r8lo|r8hi|r16, xmm, i8|u8}
3171 ROW(3, 0, 1, 0, 15 , 45 , 10 , 0 , 0 , 0 ), // #409 {r64|m64|mem, xmm, i8|u8}
3172 ROW(3, 1, 1, 0, 45 , 109, 10 , 0 , 0 , 0 ), // #410 {xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8}
3173 ROW(3, 1, 1, 0, 45 , 28 , 10 , 0 , 0 , 0 ), // #411 {xmm, r32|m32|mem, i8|u8}
3174 ROW(3, 0, 1, 0, 45 , 15 , 10 , 0 , 0 , 0 ), // #412 {xmm, r64|m64|mem, i8|u8}
3175 ROW(3, 1, 1, 0, 59 , 113, 10 , 0 , 0 , 0 ), // #413 {mm|xmm, r32|m16|mem|r16, i8|u8}
3176 ROW(2, 1, 1, 0, 6 , 59 , 0 , 0 , 0 , 0 ), // #414 {r32, mm|xmm}
3177 ROW(2, 1, 1, 0, 45 , 10 , 0 , 0 , 0 , 0 ), // #415 {xmm, i8|u8}
3178 ROW(2, 1, 1, 0, 31 , 81 , 0 , 0 , 0 , 0 ), // #416 {r8lo|r8hi|m8|r16|m16|r32|m32|r64|m64|mem, cl|i8|u8}
3179 ROW(1, 0, 1, 0, 114, 0 , 0 , 0 , 0 , 0 ), // #417 {r32|r64}
3180 ROW(3, 1, 1, 3, 35 , 36 , 99 , 0 , 0 , 0 ), // #418 {<edx>, <eax>, <ecx>}
3181 ROW(2, 1, 1, 2, 142, 127, 0 , 0 , 0 , 0 ), // #419 {<al|ax|eax|rax>, <es:[memBase|zdi]>}
3182 ROW(1, 1, 1, 0, 1 , 0 , 0 , 0 , 0 , 0 ), // #420 {r8lo|r8hi|m8|mem}
3183 ROW(1, 1, 1, 0, 146, 0 , 0 , 0 , 0 , 0 ), // #421 {r16|m16|mem|r32|r64}
3184 ROW(2, 1, 1, 2, 127, 142, 0 , 0 , 0 , 0 ), // #422 {<es:[memBase|zdi]>, <al|ax|eax|rax>}
3185 ROW(6, 1, 1, 0, 51 , 51 , 51 , 51 , 51 , 47 ), // #423 {zmm, zmm, zmm, zmm, zmm, m128|mem}
3186 ROW(6, 1, 1, 0, 45 , 45 , 45 , 45 , 45 , 47 ), // #424 {xmm, xmm, xmm, xmm, xmm, m128|mem}
3187 ROW(3, 1, 1, 0, 45 , 45 , 60 , 0 , 0 , 0 ), // #425 {xmm, xmm, xmm|m64|mem}
3188 ROW(3, 1, 1, 0, 45 , 45 , 87 , 0 , 0 , 0 ), // #426 {xmm, xmm, xmm|m32|mem}
3189 ROW(2, 1, 1, 0, 48 , 47 , 0 , 0 , 0 , 0 ), // #427 {ymm, m128|mem}
3190 ROW(2, 1, 1, 0, 147, 60 , 0 , 0 , 0 , 0 ), // #428 {ymm|zmm, xmm|m64|mem}
3191 ROW(2, 1, 1, 0, 147, 47 , 0 , 0 , 0 , 0 ), // #429 {ymm|zmm, m128|mem}
3192 ROW(2, 1, 1, 0, 51 , 50 , 0 , 0 , 0 , 0 ), // #430 {zmm, m256|mem}
3193 ROW(2, 1, 1, 0, 148, 60 , 0 , 0 , 0 , 0 ), // #431 {xmm|ymm|zmm, xmm|m64|mem}
3194 ROW(2, 1, 1, 0, 148, 87 , 0 , 0 , 0 , 0 ), // #432 {xmm|ymm|zmm, m32|mem|xmm}
3195 ROW(4, 1, 1, 0, 82 , 45 , 60 , 10 , 0 , 0 ), // #433 {xmm|k, xmm, xmm|m64|mem, i8|u8}
3196 ROW(4, 1, 1, 0, 82 , 45 , 87 , 10 , 0 , 0 ), // #434 {xmm|k, xmm, xmm|m32|mem, i8|u8}
3197 ROW(3, 1, 1, 0, 45 , 45 , 131, 0 , 0 , 0 ), // #435 {xmm, xmm, r32|m32|mem|r64|m64}
3198 ROW(3, 1, 1, 0, 46 , 147, 10 , 0 , 0 , 0 ), // #436 {xmm|m128|mem, ymm|zmm, i8|u8}
3199 ROW(4, 1, 1, 0, 45 , 45 , 60 , 10 , 0 , 0 ), // #437 {xmm, xmm, xmm|m64|mem, i8|u8}
3200 ROW(4, 1, 1, 0, 45 , 45 , 87 , 10 , 0 , 0 ), // #438 {xmm, xmm, xmm|m32|mem, i8|u8}
3201 ROW(3, 1, 1, 0, 84 , 149, 10 , 0 , 0 , 0 ), // #439 {k, xmm|m128|ymm|m256|zmm|m512, i8|u8}
3202 ROW(3, 1, 1, 0, 84 , 60 , 10 , 0 , 0 , 0 ), // #440 {k, xmm|m64|mem, i8|u8}
3203 ROW(3, 1, 1, 0, 84 , 87 , 10 , 0 , 0 , 0 ), // #441 {k, xmm|m32|mem, i8|u8}
3204 ROW(1, 1, 1, 0, 62 , 0 , 0 , 0 , 0 , 0 ), // #442 {vm32y}
3205 ROW(1, 1, 1, 0, 63 , 0 , 0 , 0 , 0 , 0 ), // #443 {vm32z}
3206 ROW(1, 1, 1, 0, 66 , 0 , 0 , 0 , 0 , 0 ), // #444 {vm64z}
3207 ROW(4, 1, 1, 0, 51 , 51 , 49 , 10 , 0 , 0 ), // #445 {zmm, zmm, ymm|m256|mem, i8|u8}
3208 ROW(1, 1, 1, 0, 30 , 0 , 0 , 0 , 0 , 0 ), // #446 {m64|mem}
3209 ROW(2, 1, 1, 0, 6 , 86 , 0 , 0 , 0 , 0 ), // #447 {r32, xmm|ymm}
3210 ROW(2, 1, 1, 0, 148, 150, 0 , 0 , 0 , 0 ), // #448 {xmm|ymm|zmm, xmm|m8|mem|r32|r8lo|r8hi|r16}
3211 ROW(2, 1, 1, 0, 148, 151, 0 , 0 , 0 , 0 ), // #449 {xmm|ymm|zmm, xmm|m32|mem|r32}
3212 ROW(2, 1, 1, 0, 148, 84 , 0 , 0 , 0 , 0 ), // #450 {xmm|ymm|zmm, k}
3213 ROW(2, 1, 1, 0, 148, 152, 0 , 0 , 0 , 0 ), // #451 {xmm|ymm|zmm, xmm|m16|mem|r32|r16}
3214 ROW(3, 1, 1, 0, 113, 45 , 10 , 0 , 0 , 0 ), // #452 {r32|m16|mem|r16, xmm, i8|u8}
3215 ROW(4, 1, 1, 0, 45 , 45 , 109, 10 , 0 , 0 ), // #453 {xmm, xmm, r32|m8|mem|r8lo|r8hi|r16, i8|u8}
3216 ROW(4, 1, 1, 0, 45 , 45 , 28 , 10 , 0 , 0 ), // #454 {xmm, xmm, r32|m32|mem, i8|u8}
3217 ROW(4, 0, 1, 0, 45 , 45 , 15 , 10 , 0 , 0 ), // #455 {xmm, xmm, r64|m64|mem, i8|u8}
3218 ROW(4, 1, 1, 0, 45 , 45 , 113, 10 , 0 , 0 ), // #456 {xmm, xmm, r32|m16|mem|r16, i8|u8}
3219 ROW(2, 1, 1, 0, 84 , 148, 0 , 0 , 0 , 0 ), // #457 {k, xmm|ymm|zmm}
3220 ROW(1, 1, 1, 0, 102, 0 , 0 , 0 , 0 , 0 ), // #458 {rel16|rel32}
3221 ROW(3, 1, 1, 2, 91 , 35 , 36 , 0 , 0 , 0 ), // #459 {mem, <edx>, <eax>}
3222 ROW(3, 0, 1, 2, 91 , 35 , 36 , 0 , 0 , 0 ) // #460 {mem, <edx>, <eax>}
3223 };
3224 #undef ROW
3225
3226 #define ROW(flags, mFlags, extFlags, regId) { uint32_t(flags), uint16_t(mFlags), uint8_t(extFlags), uint8_t(regId) }
3227 #define F(VAL) InstDB::kOp##VAL
3228 #define M(VAL) InstDB::kMemOp##VAL
3229 const InstDB::OpSignature InstDB::_opSignatureTable[] = {
3230 ROW(0, 0, 0, 0xFF),
3231 ROW(F(GpbLo) | F(GpbHi) | F(Mem), M(M8) | M(Any), 0, 0x00),
3232 ROW(F(GpbLo) | F(GpbHi), 0, 0, 0x00),
3233 ROW(F(Gpw) | F(SReg) | F(Mem), M(M16) | M(Any), 0, 0x00),
3234 ROW(F(Gpw), 0, 0, 0x00),
3235 ROW(F(Gpd) | F(SReg) | F(Mem), M(M32) | M(Any), 0, 0x00),
3236 ROW(F(Gpd), 0, 0, 0x00),
3237 ROW(F(Gpq) | F(SReg) | F(CReg) | F(DReg) | F(Mem), M(M64) | M(Any), 0, 0x00),
3238 ROW(F(Gpq), 0, 0, 0x00),
3239 ROW(F(GpbLo) | F(GpbHi) | F(Mem), M(M8), 0, 0x00),
3240 ROW(F(I8) | F(U8), 0, 0, 0x00),
3241 ROW(F(Gpw) | F(Mem), M(M16), 0, 0x00),
3242 ROW(F(I16) | F(U16), 0, 0, 0x00),
3243 ROW(F(Gpd) | F(Mem), M(M32), 0, 0x00),
3244 ROW(F(I32) | F(U32), 0, 0, 0x00),
3245 ROW(F(Gpq) | F(Mem), M(M64) | M(Any), 0, 0x00),
3246 ROW(F(I32), 0, 0, 0x00),
3247 ROW(F(SReg) | F(CReg) | F(DReg) | F(Mem) | F(I64) | F(U64), M(M64) | M(Any), 0, 0x00),
3248 ROW(F(Mem), M(M8) | M(Any), 0, 0x00),
3249 ROW(F(SReg) | F(Mem), M(M16) | M(Any), 0, 0x00),
3250 ROW(F(SReg) | F(Mem), M(M32) | M(Any), 0, 0x00),
3251 ROW(F(Mem), M(M16) | M(Any), 0, 0x00),
3252 ROW(F(SReg), 0, 0, 0x00),
3253 ROW(F(CReg) | F(DReg), 0, 0, 0x00),
3254 ROW(F(Gpq) | F(I32), 0, 0, 0x00),
3255 ROW(F(Gpw) | F(Gpd) | F(Gpq) | F(Mem), M(M16) | M(M32) | M(M64) | M(Any), 0, 0x00),
3256 ROW(F(I8), 0, 0, 0x00),
3257 ROW(F(Gpw) | F(Mem), M(M16) | M(Any), 0, 0x00),
3258 ROW(F(Gpd) | F(Mem), M(M32) | M(Any), 0, 0x00),
3259 ROW(F(Mem), M(M32) | M(Any), 0, 0x00),
3260 ROW(F(Mem), M(M64) | M(Any), 0, 0x00),
3261 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Gpd) | F(Gpq) | F(Mem), M(M8) | M(M16) | M(M32) | M(M64) | M(Any), 0, 0x00),
3262 ROW(F(Gpq) | F(Mem) | F(I32) | F(U32), M(M64) | M(Any), 0, 0x00),
3263 ROW(F(Gpw) | F(Implicit), 0, 0, 0x01),
3264 ROW(F(Gpw) | F(Implicit), 0, 0, 0x04),
3265 ROW(F(Gpd) | F(Implicit), 0, 0, 0x04),
3266 ROW(F(Gpd) | F(Implicit), 0, 0, 0x01),
3267 ROW(F(Gpq) | F(Implicit), 0, 0, 0x04),
3268 ROW(F(Gpq) | F(Implicit), 0, 0, 0x01),
3269 ROW(F(Gpw) | F(Mem) | F(I8) | F(I16), M(M16) | M(Any), 0, 0x00),
3270 ROW(F(Gpd) | F(Mem) | F(I8) | F(I32), M(M32) | M(Any), 0, 0x00),
3271 ROW(F(Gpq) | F(Mem) | F(I8) | F(I32), M(M64) | M(Any), 0, 0x00),
3272 ROW(F(I8) | F(I16) | F(U16), 0, 0, 0x00),
3273 ROW(F(I8) | F(I32) | F(U32), 0, 0, 0x00),
3274 ROW(F(I8) | F(I32), 0, 0, 0x00),
3275 ROW(F(Xmm), 0, 0, 0x00),
3276 ROW(F(Xmm) | F(Mem), M(M128) | M(Any), 0, 0x00),
3277 ROW(F(Mem), M(M128) | M(Any), 0, 0x00),
3278 ROW(F(Ymm), 0, 0, 0x00),
3279 ROW(F(Ymm) | F(Mem), M(M256) | M(Any), 0, 0x00),
3280 ROW(F(Mem), M(M256) | M(Any), 0, 0x00),
3281 ROW(F(Zmm), 0, 0, 0x00),
3282 ROW(F(Zmm) | F(Mem), M(M512) | M(Any), 0, 0x00),
3283 ROW(F(Mem), M(M512) | M(Any), 0, 0x00),
3284 ROW(F(Xmm) | F(Mem) | F(I8) | F(U8), M(M128) | M(Any), 0, 0x00),
3285 ROW(F(Ymm) | F(Mem) | F(I8) | F(U8), M(M256) | M(Any), 0, 0x00),
3286 ROW(F(Zmm) | F(Mem) | F(I8) | F(U8), M(M512) | M(Any), 0, 0x00),
3287 ROW(F(Mm), 0, 0, 0x00),
3288 ROW(F(Gpq) | F(Mm) | F(Mem), M(M64) | M(Any), 0, 0x00),
3289 ROW(F(Xmm) | F(Mm), 0, 0, 0x00),
3290 ROW(F(Xmm) | F(Mem), M(M64) | M(Any), 0, 0x00),
3291 ROW(F(Vm), M(Vm32x), 0, 0x00),
3292 ROW(F(Vm), M(Vm32y), 0, 0x00),
3293 ROW(F(Vm), M(Vm32z), 0, 0x00),
3294 ROW(F(Vm), M(Vm64x), 0, 0x00),
3295 ROW(F(Vm), M(Vm64y), 0, 0x00),
3296 ROW(F(Vm), M(Vm64z), 0, 0x00),
3297 ROW(F(GpbLo) | F(Implicit), 0, 0, 0x01),
3298 ROW(F(Gpw) | F(Gpq) | F(Mem), M(M16) | M(M64) | M(Any), 0, 0x00),
3299 ROW(F(SReg), 0, 0, 0x1A),
3300 ROW(F(SReg), 0, 0, 0x60),
3301 ROW(F(Gpw) | F(Gpq) | F(Mem) | F(I8) | F(I16) | F(I32), M(M16) | M(M64) | M(Any), 0, 0x00),
3302 ROW(F(Gpd) | F(Mem) | F(I32) | F(U32), M(M32), 0, 0x00),
3303 ROW(F(SReg), 0, 0, 0x1E),
3304 ROW(F(Vm), M(Vm64x) | M(Vm64y), 0, 0x00),
3305 ROW(F(I4) | F(U4), 0, 0, 0x00),
3306 ROW(F(Mem), M(M32) | M(M64), 0, 0x00),
3307 ROW(F(St), 0, 0, 0x01),
3308 ROW(F(St), 0, 0, 0x00),
3309 ROW(F(Mem), M(M48) | M(Any), 0, 0x00),
3310 ROW(F(Mem), M(M80) | M(Any), 0, 0x00),
3311 ROW(F(GpbLo) | F(I8) | F(U8), 0, 0, 0x02),
3312 ROW(F(Xmm) | F(KReg), 0, 0, 0x00),
3313 ROW(F(Ymm) | F(KReg), 0, 0, 0x00),
3314 ROW(F(KReg), 0, 0, 0x00),
3315 ROW(F(Gpq) | F(Xmm) | F(Mem), M(M64) | M(Any), 0, 0x00),
3316 ROW(F(Xmm) | F(Ymm), 0, 0, 0x00),
3317 ROW(F(Xmm) | F(Mem), M(M32) | M(Any), 0, 0x00),
3318 ROW(F(Xmm) | F(Mem), M(M16) | M(Any), 0, 0x00),
3319 ROW(F(Bnd), 0, 0, 0x00),
3320 ROW(F(Bnd) | F(Mem), M(Any), 0, 0x00),
3321 ROW(F(Mem), M(Any), 0, 0x00),
3322 ROW(F(Gpw) | F(Gpd) | F(Mem) | F(I32) | F(I64) | F(Rel32), M(M16) | M(M32), 0, 0x00),
3323 ROW(F(Gpq) | F(Mem) | F(I32) | F(I64) | F(Rel32), M(M64) | M(Any), 0, 0x00),
3324 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Gpd) | F(Mem), M(M8) | M(M16) | M(M32), 0, 0x00),
3325 ROW(F(GpbLo) | F(GpbHi) | F(Gpq) | F(Mem), M(M8) | M(M64), 0, 0x00),
3326 ROW(F(Gpw) | F(Gpd), 0, 0, 0x00),
3327 ROW(F(Mem), M(BaseOnly) | M(Es), 0, 0x00),
3328 ROW(F(St) | F(Mem), M(M32) | M(M64), 0, 0x00),
3329 ROW(F(Gpd) | F(Implicit), 0, 0, 0x02),
3330 ROW(F(Gpd) | F(Gpq) | F(Implicit), 0, 0, 0x01),
3331 ROW(F(I32) | F(I64) | F(Rel8) | F(Rel32), 0, 0, 0x00),
3332 ROW(F(I32) | F(I64) | F(Rel32), 0, 0, 0x00),
3333 ROW(F(Gpw) | F(Gpd) | F(Implicit), 0, 0, 0x02),
3334 ROW(F(I32) | F(I64) | F(Rel8), 0, 0, 0x00),
3335 ROW(F(Gpd) | F(Gpq) | F(Implicit), 0, 0, 0x02),
3336 ROW(F(Gpq) | F(Mem) | F(I32) | F(I64) | F(Rel8) | F(Rel32), M(M64) | M(Any), 0, 0x00),
3337 ROW(F(Gpd) | F(Mem) | F(I32) | F(I64) | F(Rel32), M(M32) | M(Any), 0, 0x00),
3338 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Gpd) | F(KReg) | F(Mem), M(M8) | M(Any), 0, 0x00),
3339 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Gpd) | F(Mem), M(M8) | M(Any), 0, 0x00),
3340 ROW(F(Gpd) | F(KReg) | F(Mem), M(M32) | M(Any), 0, 0x00),
3341 ROW(F(Gpq) | F(KReg) | F(Mem), M(M64) | M(Any), 0, 0x00),
3342 ROW(F(Gpw) | F(Gpd) | F(KReg) | F(Mem), M(M16) | M(Any), 0, 0x00),
3343 ROW(F(Gpw) | F(Gpd) | F(Mem), M(M16) | M(Any), 0, 0x00),
3344 ROW(F(Gpd) | F(Gpq), 0, 0, 0x00),
3345 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Mem), M(M8) | M(M16), 0, 0x00),
3346 ROW(F(Gpw) | F(Gpd) | F(Mem), M(M16) | M(M32), 0, 0x00),
3347 ROW(F(Mm) | F(Mem), M(M64) | M(Any), 0, 0x00),
3348 ROW(F(Mm) | F(Mem) | F(I8) | F(U8), M(M64) | M(Any), 0, 0x00),
3349 ROW(F(U16), 0, 0, 0x00),
3350 ROW(F(Xmm) | F(Ymm) | F(Mem), M(M128) | M(M256), 0, 0x00),
3351 ROW(F(Xmm) | F(I8) | F(U8), 0, 0, 0x00),
3352 ROW(F(Xmm) | F(Implicit), 0, 0, 0x01),
3353 ROW(F(Mem), M(Mib), 0, 0x00),
3354 ROW(F(Gpw) | F(Gpd) | F(Gpq), 0, 0, 0x00),
3355 ROW(F(Mem) | F(Implicit), M(BaseOnly) | M(Ds), 0, 0x01),
3356 ROW(F(Mem) | F(Implicit), M(BaseOnly) | M(Ds), 0, 0x40),
3357 ROW(F(Mem) | F(Implicit), M(BaseOnly) | M(Es), 0, 0x80),
3358 ROW(F(Gpq) | F(Implicit), 0, 0, 0x02),
3359 ROW(F(Gpq) | F(Implicit), 0, 0, 0x08),
3360 ROW(F(Gpd) | F(Implicit), 0, 0, 0x08),
3361 ROW(F(Gpd) | F(Gpq) | F(Mem), M(M32) | M(M64) | M(Any), 0, 0x00),
3362 ROW(F(Mem), M(M16) | M(M32), 0, 0x00),
3363 ROW(F(Mem), M(M16) | M(M32) | M(M64), 0, 0x00),
3364 ROW(F(St) | F(Mem), M(M32) | M(M64) | M(M80), 0, 0x00),
3365 ROW(F(Gpw) | F(Mem), M(M16) | M(Any), 0, 0x01),
3366 ROW(F(GpbLo) | F(Gpw) | F(Gpd), 0, 0, 0x01),
3367 ROW(F(Gpw) | F(I8) | F(U8), 0, 0, 0x04),
3368 ROW(F(Mem), M(BaseOnly) | M(Es), 0, 0x80),
3369 ROW(F(Gpw), 0, 0, 0x04),
3370 ROW(F(GpbHi) | F(Implicit), 0, 0, 0x01),
3371 ROW(F(Mem), M(M8) | M(M16) | M(M32) | M(M48) | M(M64) | M(M80) | M(M128) | M(M256) | M(M512) | M(M1024) | M(Any), 0, 0x00),
3372 ROW(F(GpbLo) | F(Gpw) | F(Gpd) | F(Gpq) | F(Implicit), 0, 0, 0x01),
3373 ROW(F(Mem) | F(Implicit), M(BaseOnly) | M(Ds), 0, 0x80),
3374 ROW(F(Gpw) | F(U8), 0, 0, 0x04),
3375 ROW(F(Mem), M(BaseOnly) | M(Ds), 0, 0x40),
3376 ROW(F(Gpw) | F(Gpd) | F(Gpq) | F(Mem), M(M16) | M(Any), 0, 0x00),
3377 ROW(F(Ymm) | F(Zmm), 0, 0, 0x00),
3378 ROW(F(Xmm) | F(Ymm) | F(Zmm), 0, 0, 0x00),
3379 ROW(F(Xmm) | F(Ymm) | F(Zmm) | F(Mem), M(M128) | M(M256) | M(M512), 0, 0x00),
3380 ROW(F(GpbLo) | F(GpbHi) | F(Gpw) | F(Gpd) | F(Xmm) | F(Mem), M(M8) | M(Any), 0, 0x00),
3381 ROW(F(Gpd) | F(Xmm) | F(Mem), M(M32) | M(Any), 0, 0x00),
3382 ROW(F(Gpw) | F(Gpd) | F(Xmm) | F(Mem), M(M16) | M(Any), 0, 0x00)
3383 };
3384 #undef M
3385 #undef F
3386 #undef ROW
3387 // ----------------------------------------------------------------------------
3388 // ${InstSignatureTable:End}
3389 #endif // !ASMJIT_NO_VALIDATION
3390
3391 // ============================================================================
3392 // [asmjit::x86::InstInternal - QueryRWInfo]
3393 // ============================================================================
3394
3395 // ${InstRWInfoTable:Begin}
3396 // ------------------- Automatically generated, do not edit -------------------
3397 const uint8_t InstDB::rwInfoIndex[Inst::_kIdCount * 2] = {
3398 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 0, 3, 0, 2, 0, 4, 0, 4, 0, 5, 0, 6, 0, 4, 0,
3399 4, 0, 3, 0, 4, 0, 4, 0, 4, 0, 4, 0, 7, 0, 0, 7, 2, 0, 0, 8, 4, 0, 4, 0, 4, 0,
3400 4, 0, 9, 0, 0, 10, 11, 0, 11, 0, 11, 0, 11, 0, 11, 0, 0, 4, 0, 4, 0, 12, 0, 12,
3401 11, 0, 11, 0, 11, 0, 11, 0, 11, 0, 13, 0, 13, 0, 13, 0, 14, 0, 14, 0, 15, 0,
3402 16, 0, 17, 0, 11, 0, 11, 0, 0, 18, 19, 0, 20, 0, 20, 0, 20, 0, 0, 10, 0, 21,
3403 0, 1, 22, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 24, 0, 24, 0, 24, 0, 0, 0, 0, 0, 0, 0,
3404 24, 0, 25, 0, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0,
3405 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0,
3406 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 26, 0, 0, 4, 0, 4, 27, 0, 0, 5, 0,
3407 6, 0, 28, 0, 29, 0, 30, 31, 0, 32, 0, 0, 33, 34, 0, 35, 0, 36, 0, 7, 0, 37, 0,
3408 37, 0, 37, 0, 36, 0, 38, 0, 7, 0, 36, 0, 39, 0, 40, 0, 41, 0, 42, 0, 43, 0, 44,
3409 0, 45, 0, 37, 0, 37, 0, 7, 0, 39, 0, 40, 0, 45, 0, 46, 0, 0, 47, 0, 1, 0, 1,
3410 0, 48, 49, 50, 4, 0, 4, 0, 5, 0, 6, 0, 0, 4, 0, 4, 0, 0, 51, 0, 51, 0, 0, 0,
3411 0, 52, 53, 54, 0, 0, 0, 0, 55, 56, 0, 57, 0, 58, 0, 59, 0, 0, 0, 0, 0, 57, 0,
3412 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 57, 0, 60, 0, 61, 0, 61, 0, 60, 0,
3413 0, 0, 0, 0, 0, 55, 56, 0, 57, 55, 56, 0, 57, 0, 0, 0, 57, 0, 56, 0, 56, 0, 56,
3414 0, 56, 0, 56, 0, 56, 0, 56, 0, 0, 0, 0, 0, 62, 0, 62, 0, 62, 0, 56, 0, 56, 0,
3415 60, 0, 0, 0, 63, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 56, 0, 57, 0,
3416 0, 0, 0, 0, 0, 0, 64, 0, 65, 0, 64, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24,
3417 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 65, 0, 64, 0, 67, 0, 66, 55, 56, 0,
3418 57, 55, 56, 0, 57, 0, 0, 0, 61, 0, 61, 0, 61, 0, 61, 0, 0, 0, 0, 0, 0, 0, 57,
3419 0, 24, 0, 24, 0, 64, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 4, 4, 0, 4, 0,
3420 4, 0, 0, 0, 4, 0, 4, 0, 49, 50, 68, 69, 70, 0, 0, 48, 71, 0, 0, 72, 53, 53, 0,
3421 0, 0, 0, 0, 0, 0, 0, 73, 0, 0, 24, 74, 0, 73, 0, 73, 0, 0, 0, 0, 0, 0, 0, 0,
3422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0,
3423 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 0, 77, 0, 78, 0, 79, 0, 76, 0,
3425 77, 0, 76, 0, 77, 0, 78, 0, 79, 0, 78, 0, 79, 80, 0, 81, 0, 82, 0, 83, 0, 84,
3426 0, 85, 0, 86, 0, 87, 0, 0, 76, 0, 77, 0, 78, 88, 0, 89, 0, 90, 0, 91, 0, 0, 79,
3427 0, 84, 0, 85, 0, 86, 0, 87, 0, 84, 0, 85, 0, 86, 0, 87, 88, 0, 89, 0, 90, 0,
3428 91, 0, 0, 92, 0, 93, 0, 94, 0, 76, 0, 77, 0, 78, 0, 79, 0, 76, 0, 77, 0, 78,
3429 0, 79, 0, 95, 96, 0, 97, 0, 0, 98, 99, 0, 100, 0, 0, 0, 99, 0, 0, 0, 99, 0, 0,
3430 24, 99, 0, 0, 24, 0, 101, 0, 102, 0, 101, 103, 0, 104, 0, 104, 0, 104, 0, 96,
3431 0, 99, 0, 0, 101, 0, 105, 0, 105, 11, 0, 0, 106, 0, 107, 4, 0, 4, 0, 5, 0, 6,
3432 0, 0, 0, 4, 0, 4, 0, 5, 0, 6, 0, 0, 108, 0, 108, 109, 0, 110, 0, 110, 0, 111,
3433 0, 81, 0, 36, 0, 112, 0, 111, 0, 86, 0, 110, 0, 110, 0, 113, 0, 114, 0, 114,
3434 0, 115, 0, 116, 0, 116, 0, 117, 0, 117, 0, 97, 0, 97, 0, 111, 0, 97, 0, 97, 0,
3435 116, 0, 116, 0, 118, 0, 82, 0, 86, 0, 119, 0, 82, 0, 7, 0, 7, 0, 81, 0, 120,
3436 0, 121, 0, 110, 0, 110, 0, 120, 0, 0, 4, 49, 122, 4, 0, 4, 0, 5, 0, 6, 0, 0,
3437 123, 124, 0, 0, 125, 0, 48, 0, 126, 0, 48, 2, 0, 4, 0, 4, 0, 127, 0, 128, 0, 11,
3438 0, 11, 0, 11, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0,
3439 3, 0, 3, 0, 0, 3, 3, 0, 3, 0, 0, 0, 3, 0, 129, 0, 3, 0, 0, 12, 0, 4, 0, 4, 3,
3440 0, 3, 0, 4, 0, 3, 0, 0, 130, 0, 131, 3, 0, 3, 0, 4, 0, 3, 0, 0, 132, 0, 133,
3441 0, 0, 0, 8, 0, 8, 0, 134, 0, 52, 0, 135, 0, 136, 39, 0, 39, 0, 129, 0, 129, 0,
3442 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 129, 0, 39, 0, 129,
3443 0, 129, 0, 129, 0, 39, 0, 39, 0, 129, 0, 129, 0, 129, 0, 3, 0, 3, 0, 3, 0, 137,
3444 0, 3, 0, 3, 0, 3, 0, 39, 0, 39, 0, 0, 138, 0, 72, 0, 139, 0, 140, 3, 0, 3, 0,
3445 4, 0, 4, 0, 3, 0, 3, 0, 4, 0, 4, 0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 0, 4, 0, 141,
3446 0, 142, 0, 143, 0, 36, 0, 36, 0, 36, 0, 142, 0, 142, 0, 143, 0, 36, 0, 36, 0,
3447 36, 0, 142, 0, 4, 0, 3, 0, 129, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 0, 144, 0, 0,
3448 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24,
3449 0, 24, 3, 0, 3, 0, 0, 7, 0, 7, 0, 7, 0, 39, 3, 0, 3, 0, 3, 0, 3, 0, 54, 0,
3450 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 54, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 0,
3451 3, 0, 3, 0, 3, 0, 39, 0, 145, 0, 3, 0, 3, 0, 4, 0, 3, 0, 3, 0, 3, 0, 4, 0, 3,
3452 0, 0, 146, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 147, 0, 7, 0, 148, 0, 147, 0,
3453 0, 149, 0, 149, 0, 150, 0, 149, 0, 150, 0, 149, 0, 149, 151, 0, 0, 152, 0, 0,
3454 147, 0, 147, 0, 0, 11, 0, 7, 0, 7, 0, 38, 0, 148, 0, 0, 7, 0, 148, 0, 0, 153,
3455 147, 0, 147, 0, 0, 10, 2, 0, 154, 0, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0,
3456 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155,
3457 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 155,
3458 0, 155, 0, 155, 0, 155, 0, 155, 0, 155, 0, 0, 0, 64, 4, 0, 4, 0, 4, 0, 0, 4,
3459 4, 0, 4, 0, 0, 12, 147, 0, 0, 156, 0, 10, 147, 0, 0, 156, 0, 10, 0, 4, 0, 4,
3460 0, 64, 0, 47, 0, 157, 0, 149, 0, 157, 7, 0, 7, 0, 38, 0, 148, 0, 0, 0, 0, 0,
3461 0, 0, 0, 0, 0, 0, 0, 158, 159, 0, 0, 157, 2, 0, 4, 0, 4, 0, 5, 0, 6, 0, 0, 0,
3462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 19, 0, 11, 0, 11, 0, 31, 0, 32, 0,
3463 0, 0, 4, 0, 4, 0, 4, 0, 4, 0, 0, 160, 0, 161, 0, 160, 0, 161, 0, 8, 0, 8, 0, 162,
3464 0, 163, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 7, 0, 0, 7, 0, 8, 0, 8, 0, 8,
3465 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 164, 0, 164,
3466 165, 0, 40, 0, 166, 0, 167, 0, 166, 0, 167, 0, 165, 0, 40, 0, 166, 0, 167,
3467 0, 166, 0, 167, 0, 168, 0, 169, 0, 0, 8, 0, 8, 0, 170, 0, 171, 31, 0, 32, 0,
3468 172, 0, 172, 0, 173, 0, 11, 0, 0, 8, 120, 0, 174, 0, 174, 0, 11, 0, 174, 0, 11,
3469 0, 173, 0, 11, 0, 173, 0, 0, 175, 173, 0, 11, 0, 173, 0, 11, 0, 174, 0, 40,
3470 0, 0, 176, 40, 0, 0, 177, 0, 178, 0, 179, 45, 0, 45, 0, 174, 0, 11, 0, 174, 0,
3471 11, 0, 11, 0, 173, 0, 11, 0, 173, 0, 40, 0, 40, 0, 45, 0, 45, 0, 173, 0, 11,
3472 0, 11, 0, 174, 0, 0, 177, 0, 178, 0, 8, 0, 8, 0, 8, 0, 162, 0, 163, 0, 8, 0, 180,
3473 0, 8, 0, 101, 0, 101, 181, 0, 181, 0, 11, 0, 11, 0, 0, 182, 0, 183, 0, 184,
3474 0, 183, 0, 184, 0, 182, 0, 183, 0, 184, 0, 183, 0, 184, 0, 52, 0, 185, 0, 185,
3475 0, 186, 0, 187, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0,
3476 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 190, 0, 190, 0, 191, 0, 192, 0, 185, 0,
3477 185, 0, 185, 0, 185, 0, 185, 0, 185, 0, 190, 0, 190, 0, 185, 0, 185, 0, 188,
3478 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185,
3479 0, 185, 0, 185, 0, 185, 0, 185, 0, 185, 0, 190, 0, 190, 0, 190, 0, 190, 0,
3480 191, 0, 192, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0,
3481 185, 0, 185, 0, 188, 0, 189, 0, 190, 0, 190, 0, 191, 0, 192, 0, 185, 0, 185,
3482 0, 188, 0, 189, 0, 185, 0, 185, 0, 188, 0, 189, 0, 185, 0, 185, 0, 193, 0, 194,
3483 0, 190, 0, 190, 0, 191, 0, 192, 0, 195, 0, 195, 0, 39, 0, 121, 11, 0, 11, 0,
3484 39, 0, 196, 0, 99, 197, 99, 198, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0,
3485 24, 0, 24, 99, 198, 99, 199, 11, 0, 11, 0, 0, 200, 0, 201, 0, 11, 0, 11, 0,
3486 200, 0, 201, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 202, 0, 203, 0, 204,
3487 0, 203, 0, 204, 0, 202, 0, 203, 0, 204, 0, 203, 0, 204, 0, 163, 111, 0, 0, 98,
3488 0, 106, 0, 205, 0, 205, 0, 8, 0, 8, 0, 162, 0, 163, 0, 0, 0, 206, 0, 0, 0, 8,
3489 0, 8, 0, 162, 0, 163, 0, 0, 0, 207, 0, 0, 208, 0, 208, 0, 81, 0, 209, 0, 208,
3490 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 208, 0, 0, 210, 211, 212,
3491 211, 212, 0, 213, 116, 214, 116, 214, 215, 0, 216, 0, 111, 0, 111, 0, 111, 0,
3492 111, 0, 217, 0, 116, 218, 11, 0, 11, 0, 118, 219, 208, 0, 208, 0, 0, 8, 0, 220,
3493 0, 206, 172, 0, 0, 0, 0, 221, 0, 207, 0, 8, 0, 8, 0, 162, 0, 163, 222, 0, 0,
3494 220, 0, 8, 0, 8, 0, 223, 0, 223, 11, 0, 11, 0, 11, 0, 11, 0, 0, 8, 0, 8, 0,
3495 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0,
3496 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 164, 0, 8, 224, 0, 45, 0, 225, 0, 225,
3497 0, 40, 0, 226, 0, 0, 8, 0, 190, 0, 227, 0, 227, 0, 8, 0, 8, 0, 8, 0, 8, 0,
3498 130, 0, 131, 0, 8, 0, 8, 0, 8, 0, 8, 0, 132, 0, 133, 0, 227, 0, 227, 0, 227, 0,
3499 227, 0, 227, 0, 227, 0, 180, 0, 180, 172, 0, 172, 0, 172, 0, 172, 0, 0, 180,
3500 0, 180, 0, 180, 0, 180, 0, 180, 0, 180, 11, 0, 11, 0, 0, 185, 0, 185, 0, 185,
3501 0, 185, 0, 228, 0, 228, 0, 8, 0, 8, 0, 8, 0, 185, 0, 8, 0, 8, 0, 185, 0, 185,
3502 0, 190, 0, 190, 0, 229, 0, 229, 0, 229, 0, 8, 0, 229, 0, 8, 0, 185, 0, 185, 0,
3503 185, 0, 185, 0, 185, 0, 8, 11, 0, 11, 0, 11, 0, 11, 0, 0, 134, 0, 52, 0, 135,
3504 0, 230, 99, 198, 99, 197, 99, 199, 99, 198, 7, 0, 7, 0, 7, 0, 0, 8, 7, 0, 0,
3505 8, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 7, 0, 0, 8, 7, 0, 7, 0, 137, 0, 7, 0, 0, 8,
3506 7, 0, 0, 8, 0, 8, 7, 0, 0, 231, 0, 163, 0, 162, 0, 232, 11, 0, 11, 0, 0, 233,
3507 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233, 0, 233,
3508 0, 233, 0, 185, 0, 185, 0, 8, 0, 8, 0, 205, 0, 205, 0, 8, 0, 8, 0, 8, 0, 8,
3509 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 234, 0,
3510 234, 0, 235, 0, 175, 0, 225, 0, 225, 0, 225, 0, 225, 0, 141, 0, 234, 0, 236,
3511 0, 175, 0, 235, 0, 235, 0, 175, 0, 236, 0, 175, 0, 235, 0, 175, 0, 237, 0, 238,
3512 0, 173, 0, 173, 0, 173, 0, 237, 0, 235, 0, 175, 0, 236, 0, 175, 0, 235, 0,
3513 175, 0, 234, 0, 175, 0, 237, 0, 238, 0, 173, 0, 173, 0, 173, 0, 237, 0, 0, 8,
3514 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 11, 0, 11, 0, 11, 0, 11, 0, 0,
3515 8, 0, 8, 0, 8, 0, 239, 0, 11, 0, 11, 0, 8, 0, 8, 0, 11, 0, 11, 0, 8, 0, 8, 0,
3516 240, 0, 240, 0, 240, 0, 240, 0, 8, 111, 0, 111, 0, 241, 0, 111, 0, 0, 240, 0,
3517 240, 0, 240, 0, 240, 0, 240, 0, 240, 0, 8, 0, 8, 0, 185, 0, 185, 0, 185, 0, 8,
3518 0, 240, 0, 240, 0, 8, 0, 8, 0, 185, 0, 185, 0, 185, 0, 8, 0, 8, 0, 227, 0, 11,
3519 0, 11, 0, 11, 0, 8, 0, 8, 0, 8, 0, 242, 0, 243, 0, 242, 0, 8, 0, 8, 0, 8, 0,
3520 242, 0, 242, 0, 242, 0, 8, 0, 8, 0, 8, 0, 242, 0, 242, 0, 243, 0, 242, 0, 8,
3521 0, 8, 0, 8, 0, 242, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 185, 0,
3522 185, 222, 0, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227, 0, 227,
3523 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8,
3524 0, 200, 0, 201, 11, 0, 11, 0, 0, 200, 0, 201, 181, 0, 181, 0, 0, 200, 0, 201,
3525 11, 0, 0, 201, 0, 11, 0, 11, 0, 200, 0, 201, 0, 11, 0, 11, 0, 200, 0, 201, 0,
3526 11, 0, 11, 0, 200, 0, 201, 11, 0, 11, 0, 0, 200, 0, 201, 181, 0, 181, 0, 0, 200,
3527 0, 201, 11, 0, 0, 201, 0, 8, 0, 8, 0, 162, 0, 163, 111, 0, 111, 0, 0, 24,
3528 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 0, 24, 111, 0, 241, 0, 0, 8, 0, 8, 0,
3529 8, 0, 8, 0, 8, 0, 8, 11, 0, 11, 0, 0, 200, 0, 201, 0, 158, 0, 8, 0, 8, 0, 162,
3530 0, 163, 222, 0, 222, 0, 31, 0, 32, 0, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0,
3531 0, 0, 0, 0, 0, 0, 0, 0, 102, 0, 102, 0, 244, 0, 0, 245, 0, 0, 0, 246, 0, 0,
3532 0, 0, 150, 0, 0, 2, 0, 4, 0, 4, 0, 0, 247, 0, 247, 0, 247, 0, 247, 0, 248, 0,
3533 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 248, 0, 244, 0, 0
3534 };
3535
3536 const InstDB::RWInfo InstDB::rwInfo[] = {
3537 { InstDB::RWInfo::kCategoryGeneric , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #0 [ref=1609x]
3538 { InstDB::RWInfo::kCategoryGeneric , 0 , { 1 , 0 , 0 , 0 , 0 , 0 } }, // #1 [ref=7x]
3539 { InstDB::RWInfo::kCategoryGeneric , 1 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #2 [ref=7x]
3540 { InstDB::RWInfo::kCategoryGeneric , 2 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #3 [ref=100x]
3541 { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #4 [ref=69x]
3542 { InstDB::RWInfo::kCategoryGeneric , 4 , { 6 , 7 , 0 , 0 , 0 , 0 } }, // #5 [ref=7x]
3543 { InstDB::RWInfo::kCategoryGeneric , 5 , { 8 , 9 , 0 , 0 , 0 , 0 } }, // #6 [ref=7x]
3544 { InstDB::RWInfo::kCategoryGeneric , 3 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #7 [ref=33x]
3545 { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #8 [ref=186x]
3546 { InstDB::RWInfo::kCategoryGeneric , 7 , { 12, 13, 0 , 0 , 0 , 0 } }, // #9 [ref=1x]
3547 { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 3 , 0 , 0 , 0 } }, // #10 [ref=5x]
3548 { InstDB::RWInfo::kCategoryGeneric , 2 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #11 [ref=80x]
3549 { InstDB::RWInfo::kCategoryGeneric , 3 , { 4 , 5 , 14, 0 , 0 , 0 } }, // #12 [ref=4x]
3550 { InstDB::RWInfo::kCategoryGeneric , 2 , { 5 , 3 , 0 , 0 , 0 , 0 } }, // #13 [ref=3x]
3551 { InstDB::RWInfo::kCategoryGeneric , 8 , { 10, 3 , 0 , 0 , 0 , 0 } }, // #14 [ref=2x]
3552 { InstDB::RWInfo::kCategoryGeneric , 9 , { 10, 5 , 0 , 0 , 0 , 0 } }, // #15 [ref=1x]
3553 { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 5 , 0 , 0 , 0 , 0 } }, // #16 [ref=1x]
3554 { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #17 [ref=1x]
3555 { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 0 , 0 , 0 , 0 , 0 } }, // #18 [ref=1x]
3556 { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 3 , 0 , 0 , 0 , 0 } }, // #19 [ref=2x]
3557 { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #20 [ref=3x]
3558 { InstDB::RWInfo::kCategoryGeneric , 11, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #21 [ref=2x]
3559 { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 16, 0 , 0 , 0 , 0 } }, // #22 [ref=1x]
3560 { InstDB::RWInfo::kCategoryGeneric , 0 , { 17, 0 , 0 , 0 , 0 , 0 } }, // #23 [ref=1x]
3561 { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #24 [ref=34x]
3562 { InstDB::RWInfo::kCategoryGeneric , 0 , { 18, 0 , 0 , 0 , 0 , 0 } }, // #25 [ref=1x]
3563 { InstDB::RWInfo::kCategoryGeneric , 1 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #26 [ref=1x]
3564 { InstDB::RWInfo::kCategoryGeneric , 0 , { 19, 20, 0 , 0 , 0 , 0 } }, // #27 [ref=1x]
3565 { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 21, 0 , 0 , 0 } }, // #28 [ref=1x]
3566 { InstDB::RWInfo::kCategoryGeneric , 12, { 4 , 22, 17, 23, 24, 0 } }, // #29 [ref=1x]
3567 { InstDB::RWInfo::kCategoryGeneric , 13, { 25, 26, 27, 28, 29, 0 } }, // #30 [ref=1x]
3568 { InstDB::RWInfo::kCategoryGeneric , 4 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #31 [ref=4x]
3569 { InstDB::RWInfo::kCategoryGeneric , 5 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #32 [ref=4x]
3570 { InstDB::RWInfo::kCategoryGeneric , 0 , { 27, 30, 31, 15, 0 , 0 } }, // #33 [ref=1x]
3571 { InstDB::RWInfo::kCategoryGeneric , 0 , { 32, 33, 0 , 0 , 0 , 0 } }, // #34 [ref=1x]
3572 { InstDB::RWInfo::kCategoryGeneric , 14, { 2 , 3 , 0 , 0 , 0 , 0 } }, // #35 [ref=1x]
3573 { InstDB::RWInfo::kCategoryGeneric , 4 , { 10, 7 , 0 , 0 , 0 , 0 } }, // #36 [ref=10x]
3574 { InstDB::RWInfo::kCategoryGeneric , 3 , { 34, 5 , 0 , 0 , 0 , 0 } }, // #37 [ref=5x]
3575 { InstDB::RWInfo::kCategoryGeneric , 4 , { 35, 7 , 0 , 0 , 0 , 0 } }, // #38 [ref=3x]
3576 { InstDB::RWInfo::kCategoryGeneric , 4 , { 34, 7 , 0 , 0 , 0 , 0 } }, // #39 [ref=13x]
3577 { InstDB::RWInfo::kCategoryGeneric , 4 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #40 [ref=9x]
3578 { InstDB::RWInfo::kCategoryGeneric , 4 , { 36, 7 , 0 , 0 , 0 , 0 } }, // #41 [ref=1x]
3579 { InstDB::RWInfo::kCategoryGeneric , 14, { 35, 3 , 0 , 0 , 0 , 0 } }, // #42 [ref=1x]
3580 { InstDB::RWInfo::kCategoryGeneric , 14, { 36, 3 , 0 , 0 , 0 , 0 } }, // #43 [ref=1x]
3581 { InstDB::RWInfo::kCategoryGeneric , 5 , { 35, 9 , 0 , 0 , 0 , 0 } }, // #44 [ref=1x]
3582 { InstDB::RWInfo::kCategoryGeneric , 5 , { 11, 9 , 0 , 0 , 0 , 0 } }, // #45 [ref=7x]
3583 { InstDB::RWInfo::kCategoryGeneric , 0 , { 37, 38, 0 , 0 , 0 , 0 } }, // #46 [ref=1x]
3584 { InstDB::RWInfo::kCategoryGeneric , 0 , { 27, 0 , 0 , 0 , 0 , 0 } }, // #47 [ref=2x]
3585 { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 0 , 0 , 0 , 0 , 0 } }, // #48 [ref=4x]
3586 { InstDB::RWInfo::kCategoryGeneric , 15, { 1 , 39, 0 , 0 , 0 , 0 } }, // #49 [ref=3x]
3587 { InstDB::RWInfo::kCategoryGeneric , 6 , { 40, 41, 3 , 0 , 0 , 0 } }, // #50 [ref=2x]
3588 { InstDB::RWInfo::kCategoryGeneric , 16, { 42, 43, 0 , 0 , 0 , 0 } }, // #51 [ref=2x]
3589 { InstDB::RWInfo::kCategoryGeneric , 17, { 42, 5 , 0 , 0 , 0 , 0 } }, // #52 [ref=4x]
3590 { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 5 , 0 , 0 , 0 , 0 } }, // #53 [ref=3x]
3591 { InstDB::RWInfo::kCategoryGeneric , 0 , { 4 , 0 , 0 , 0 , 0 , 0 } }, // #54 [ref=3x]
3592 { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 45, 0 , 0 , 0 , 0 } }, // #55 [ref=6x]
3593 { InstDB::RWInfo::kCategoryGeneric , 18, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #56 [ref=15x]
3594 { InstDB::RWInfo::kCategoryGeneric , 0 , { 44, 0 , 0 , 0 , 0 , 0 } }, // #57 [ref=16x]
3595 { InstDB::RWInfo::kCategoryGeneric , 19, { 45, 0 , 0 , 0 , 0 , 0 } }, // #58 [ref=1x]
3596 { InstDB::RWInfo::kCategoryGeneric , 19, { 46, 0 , 0 , 0 , 0 , 0 } }, // #59 [ref=1x]
3597 { InstDB::RWInfo::kCategoryGeneric , 20, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #60 [ref=3x]
3598 { InstDB::RWInfo::kCategoryGeneric , 0 , { 45, 0 , 0 , 0 , 0 , 0 } }, // #61 [ref=6x]
3599 { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 0 , 0 , 0 , 0 , 0 } }, // #62 [ref=3x]
3600 { InstDB::RWInfo::kCategoryGeneric , 21, { 13, 0 , 0 , 0 , 0 , 0 } }, // #63 [ref=1x]
3601 { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #64 [ref=8x]
3602 { InstDB::RWInfo::kCategoryGeneric , 21, { 47, 0 , 0 , 0 , 0 , 0 } }, // #65 [ref=2x]
3603 { InstDB::RWInfo::kCategoryGeneric , 7 , { 48, 0 , 0 , 0 , 0 , 0 } }, // #66 [ref=2x]
3604 { InstDB::RWInfo::kCategoryGeneric , 20, { 11, 0 , 0 , 0 , 0 , 0 } }, // #67 [ref=2x]
3605 { InstDB::RWInfo::kCategoryImul , 2 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #68 [ref=1x]
3606 { InstDB::RWInfo::kCategoryImul , 22, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #69 [ref=1x]
3607 { InstDB::RWInfo::kCategoryGeneric , 0 , { 49, 50, 0 , 0 , 0 , 0 } }, // #70 [ref=1x]
3608 { InstDB::RWInfo::kCategoryGeneric , 0 , { 51, 50, 0 , 0 , 0 , 0 } }, // #71 [ref=1x]
3609 { InstDB::RWInfo::kCategoryGeneric , 5 , { 4 , 9 , 0 , 0 , 0 , 0 } }, // #72 [ref=2x]
3610 { InstDB::RWInfo::kCategoryGeneric , 12, { 3 , 5 , 0 , 0 , 0 , 0 } }, // #73 [ref=3x]
3611 { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 28, 0 , 0 , 0 , 0 } }, // #74 [ref=1x]
3612 { InstDB::RWInfo::kCategoryGeneric , 0 , { 52, 0 , 0 , 0 , 0 , 0 } }, // #75 [ref=1x]
3613 { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 39, 39, 0 , 0 , 0 } }, // #76 [ref=6x]
3614 { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 9 , 9 , 0 , 0 , 0 } }, // #77 [ref=6x]
3615 { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 7 , 7 , 0 , 0 , 0 } }, // #78 [ref=6x]
3616 { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 13, 13, 0 , 0 , 0 } }, // #79 [ref=6x]
3617 { InstDB::RWInfo::kCategoryGeneric , 23, { 53, 39, 0 , 0 , 0 , 0 } }, // #80 [ref=1x]
3618 { InstDB::RWInfo::kCategoryGeneric , 24, { 42, 9 , 0 , 0 , 0 , 0 } }, // #81 [ref=4x]
3619 { InstDB::RWInfo::kCategoryGeneric , 25, { 34, 7 , 0 , 0 , 0 , 0 } }, // #82 [ref=3x]
3620 { InstDB::RWInfo::kCategoryGeneric , 26, { 47, 13, 0 , 0 , 0 , 0 } }, // #83 [ref=1x]
3621 { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 39, 0 , 0 , 0 , 0 } }, // #84 [ref=3x]
3622 { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 9 , 0 , 0 , 0 , 0 } }, // #85 [ref=3x]
3623 { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 7 , 0 , 0 , 0 , 0 } }, // #86 [ref=5x]
3624 { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 13, 0 , 0 , 0 , 0 } }, // #87 [ref=3x]
3625 { InstDB::RWInfo::kCategoryGeneric , 0 , { 39, 39, 0 , 0 , 0 , 0 } }, // #88 [ref=2x]
3626 { InstDB::RWInfo::kCategoryGeneric , 0 , { 9 , 9 , 0 , 0 , 0 , 0 } }, // #89 [ref=2x]
3627 { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 0 , 0 , 0 , 0 } }, // #90 [ref=2x]
3628 { InstDB::RWInfo::kCategoryGeneric , 0 , { 13, 13, 0 , 0 , 0 , 0 } }, // #91 [ref=2x]
3629 { InstDB::RWInfo::kCategoryGeneric , 0 , { 47, 39, 39, 0 , 0 , 0 } }, // #92 [ref=1x]
3630 { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 9 , 9 , 0 , 0 , 0 } }, // #93 [ref=1x]
3631 { InstDB::RWInfo::kCategoryGeneric , 0 , { 42, 13, 13, 0 , 0 , 0 } }, // #94 [ref=1x]
3632 { InstDB::RWInfo::kCategoryGeneric , 0 , { 54, 0 , 0 , 0 , 0 , 0 } }, // #95 [ref=1x]
3633 { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 3 , 0 , 0 , 0 , 0 } }, // #96 [ref=2x]
3634 { InstDB::RWInfo::kCategoryGeneric , 12, { 10, 5 , 0 , 0 , 0 , 0 } }, // #97 [ref=5x]
3635 { InstDB::RWInfo::kCategoryGeneric , 28, { 9 , 0 , 0 , 0 , 0 , 0 } }, // #98 [ref=2x]
3636 { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 0 , 0 , 0 , 0 } }, // #99 [ref=13x]
3637 { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #100 [ref=1x]
3638 { InstDB::RWInfo::kCategoryGeneric , 7 , { 13, 0 , 0 , 0 , 0 , 0 } }, // #101 [ref=5x]
3639 { InstDB::RWInfo::kCategoryGeneric , 0 , { 3 , 0 , 0 , 0 , 0 , 0 } }, // #102 [ref=3x]
3640 { InstDB::RWInfo::kCategoryGeneric , 0 , { 49, 19, 0 , 0 , 0 , 0 } }, // #103 [ref=1x]
3641 { InstDB::RWInfo::kCategoryGeneric , 0 , { 55, 0 , 0 , 0 , 0 , 0 } }, // #104 [ref=3x]
3642 { InstDB::RWInfo::kCategoryGeneric , 5 , { 3 , 9 , 0 , 0 , 0 , 0 } }, // #105 [ref=2x]
3643 { InstDB::RWInfo::kCategoryGeneric , 0 , { 5 , 5 , 20, 0 , 0 , 0 } }, // #106 [ref=2x]
3644 { InstDB::RWInfo::kCategoryGeneric , 0 , { 7 , 7 , 20, 0 , 0 , 0 } }, // #107 [ref=1x]
3645 { InstDB::RWInfo::kCategoryGeneric , 0 , { 18, 28, 56, 0 , 0 , 0 } }, // #108 [ref=2x]
3646 { InstDB::RWInfo::kCategoryMov , 29, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #109 [ref=1x]
3647 { InstDB::RWInfo::kCategoryGeneric , 30, { 10, 5 , 0 , 0 , 0 , 0 } }, // #110 [ref=6x]
3648 { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 3 , 0 , 0 , 0 , 0 } }, // #111 [ref=14x]
3649 { InstDB::RWInfo::kCategoryGeneric , 16, { 11, 43, 0 , 0 , 0 , 0 } }, // #112 [ref=1x]
3650 { InstDB::RWInfo::kCategoryGeneric , 0 , { 35, 57, 0 , 0 , 0 , 0 } }, // #113 [ref=1x]
3651 { InstDB::RWInfo::kCategoryMovh64 , 13, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #114 [ref=2x]
3652 { InstDB::RWInfo::kCategoryGeneric , 0 , { 58, 7 , 0 , 0 , 0 , 0 } }, // #115 [ref=1x]
3653 { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 7 , 0 , 0 , 0 , 0 } }, // #116 [ref=7x]
3654 { InstDB::RWInfo::kCategoryGeneric , 0 , { 53, 5 , 0 , 0 , 0 , 0 } }, // #117 [ref=2x]
3655 { InstDB::RWInfo::kCategoryGeneric , 28, { 42, 9 , 0 , 0 , 0 , 0 } }, // #118 [ref=2x]
3656 { InstDB::RWInfo::kCategoryGeneric , 0 , { 20, 19, 0 , 0 , 0 , 0 } }, // #119 [ref=1x]
3657 { InstDB::RWInfo::kCategoryGeneric , 14, { 11, 3 , 0 , 0 , 0 , 0 } }, // #120 [ref=3x]
3658 { InstDB::RWInfo::kCategoryGeneric , 5 , { 34, 9 , 0 , 0 , 0 , 0 } }, // #121 [ref=2x]
3659 { InstDB::RWInfo::kCategoryGeneric , 6 , { 59, 41, 3 , 0 , 0 , 0 } }, // #122 [ref=1x]
3660 { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 11, 3 , 60, 0 , 0 } }, // #123 [ref=1x]
3661 { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 28, 0 , 0 , 0 , 0 } }, // #124 [ref=1x]
3662 { InstDB::RWInfo::kCategoryGeneric , 0 , { 16, 28, 29, 0 , 0 , 0 } }, // #125 [ref=1x]
3663 { InstDB::RWInfo::kCategoryGeneric , 10, { 3 , 0 , 0 , 0 , 0 , 0 } }, // #126 [ref=1x]
3664 { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 21, 0 , 0 , 0 , 0 } }, // #127 [ref=1x]
3665 { InstDB::RWInfo::kCategoryGeneric , 0 , { 50, 61, 0 , 0 , 0 , 0 } }, // #128 [ref=1x]
3666 { InstDB::RWInfo::kCategoryGeneric , 4 , { 25, 7 , 0 , 0 , 0 , 0 } }, // #129 [ref=18x]
3667 { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 62, 16, 56 } }, // #130 [ref=2x]
3668 { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 63, 16, 56 } }, // #131 [ref=2x]
3669 { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 62, 0 , 0 } }, // #132 [ref=2x]
3670 { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 63, 0 , 0 } }, // #133 [ref=2x]
3671 { InstDB::RWInfo::kCategoryGeneric , 31, { 53, 5 , 0 , 0 , 0 , 0 } }, // #134 [ref=2x]
3672 { InstDB::RWInfo::kCategoryGeneric , 32, { 34, 5 , 0 , 0 , 0 , 0 } }, // #135 [ref=2x]
3673 { InstDB::RWInfo::kCategoryGeneric , 33, { 47, 3 , 0 , 0 , 0 , 0 } }, // #136 [ref=1x]
3674 { InstDB::RWInfo::kCategoryGeneric , 3 , { 64, 5 , 0 , 0 , 0 , 0 } }, // #137 [ref=2x]
3675 { InstDB::RWInfo::kCategoryGeneric , 15, { 4 , 39, 0 , 0 , 0 , 0 } }, // #138 [ref=1x]
3676 { InstDB::RWInfo::kCategoryGeneric , 4 , { 4 , 7 , 0 , 0 , 0 , 0 } }, // #139 [ref=1x]
3677 { InstDB::RWInfo::kCategoryGeneric , 27, { 2 , 13, 0 , 0 , 0 , 0 } }, // #140 [ref=1x]
3678 { InstDB::RWInfo::kCategoryVmov1_8 , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #141 [ref=2x]
3679 { InstDB::RWInfo::kCategoryGeneric , 5 , { 10, 9 , 0 , 0 , 0 , 0 } }, // #142 [ref=4x]
3680 { InstDB::RWInfo::kCategoryGeneric , 27, { 10, 13, 0 , 0 , 0 , 0 } }, // #143 [ref=2x]
3681 { InstDB::RWInfo::kCategoryGeneric , 10, { 65, 0 , 0 , 0 , 0 , 0 } }, // #144 [ref=1x]
3682 { InstDB::RWInfo::kCategoryGeneric , 3 , { 5 , 5 , 0 , 0 , 0 , 0 } }, // #145 [ref=1x]
3683 { InstDB::RWInfo::kCategoryGeneric , 10, { 60, 0 , 0 , 0 , 0 , 0 } }, // #146 [ref=1x]
3684 { InstDB::RWInfo::kCategoryGeneric , 10, { 2 , 66, 0 , 0 , 0 , 0 } }, // #147 [ref=8x]
3685 { InstDB::RWInfo::kCategoryGeneric , 5 , { 36, 9 , 0 , 0 , 0 , 0 } }, // #148 [ref=4x]
3686 { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 0 , 0 , 0 , 0 , 0 } }, // #149 [ref=6x]
3687 { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 28, 0 , 0 , 0 } }, // #150 [ref=3x]
3688 { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 0 , 0 , 0 , 0 } }, // #151 [ref=1x]
3689 { InstDB::RWInfo::kCategoryGeneric , 0 , { 15, 67, 62, 0 , 0 , 0 } }, // #152 [ref=1x]
3690 { InstDB::RWInfo::kCategoryGeneric , 0 , { 68, 0 , 0 , 0 , 0 , 0 } }, // #153 [ref=1x]
3691 { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 20, 0 , 0 , 0 , 0 } }, // #154 [ref=1x]
3692 { InstDB::RWInfo::kCategoryGeneric , 31, { 69, 0 , 0 , 0 , 0 , 0 } }, // #155 [ref=30x]
3693 { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 3 , 66, 0 , 0 , 0 } }, // #156 [ref=2x]
3694 { InstDB::RWInfo::kCategoryGeneric , 34, { 11, 0 , 0 , 0 , 0 , 0 } }, // #157 [ref=3x]
3695 { InstDB::RWInfo::kCategoryGeneric , 28, { 42, 0 , 0 , 0 , 0 , 0 } }, // #158 [ref=2x]
3696 { InstDB::RWInfo::kCategoryGeneric , 0 , { 20, 21, 0 , 0 , 0 , 0 } }, // #159 [ref=1x]
3697 { InstDB::RWInfo::kCategoryGeneric , 12, { 70, 43, 43, 43, 43, 5 } }, // #160 [ref=2x]
3698 { InstDB::RWInfo::kCategoryGeneric , 12, { 4 , 5 , 5 , 5 , 5 , 5 } }, // #161 [ref=2x]
3699 { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 5 , 7 , 0 , 0 , 0 } }, // #162 [ref=8x]
3700 { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 5 , 9 , 0 , 0 , 0 } }, // #163 [ref=9x]
3701 { InstDB::RWInfo::kCategoryGeneric , 6 , { 11, 3 , 3 , 3 , 0 , 0 } }, // #164 [ref=3x]
3702 { InstDB::RWInfo::kCategoryGeneric , 12, { 71, 5 , 0 , 0 , 0 , 0 } }, // #165 [ref=2x]
3703 { InstDB::RWInfo::kCategoryGeneric , 12, { 11, 5 , 0 , 0 , 0 , 0 } }, // #166 [ref=4x]
3704 { InstDB::RWInfo::kCategoryGeneric , 37, { 72, 73, 0 , 0 , 0 , 0 } }, // #167 [ref=4x]
3705 { InstDB::RWInfo::kCategoryGeneric , 38, { 11, 7 , 0 , 0 , 0 , 0 } }, // #168 [ref=1x]
3706 { InstDB::RWInfo::kCategoryGeneric , 39, { 11, 9 , 0 , 0 , 0 , 0 } }, // #169 [ref=1x]
3707 { InstDB::RWInfo::kCategoryGeneric , 35, { 11, 5 , 7 , 0 , 0 , 0 } }, // #170 [ref=1x]
3708 { InstDB::RWInfo::kCategoryGeneric , 36, { 11, 5 , 9 , 0 , 0 , 0 } }, // #171 [ref=1x]
3709 { InstDB::RWInfo::kCategoryGeneric , 11, { 11, 3 , 0 , 0 , 0 , 0 } }, // #172 [ref=7x]
3710 { InstDB::RWInfo::kCategoryVmov2_1 , 40, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #173 [ref=14x]
3711 { InstDB::RWInfo::kCategoryVmov1_2 , 14, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #174 [ref=7x]
3712 { InstDB::RWInfo::kCategoryVmov1_2 , 41, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #175 [ref=10x]
3713 { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 74, 7 , 0 , 0 , 0 } }, // #176 [ref=1x]
3714 { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 57, 3 , 0 , 0 , 0 } }, // #177 [ref=2x]
3715 { InstDB::RWInfo::kCategoryGeneric , 42, { 10, 74, 3 , 0 , 0 , 0 } }, // #178 [ref=2x]
3716 { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 57, 9 , 0 , 0 , 0 } }, // #179 [ref=1x]
3717 { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 0 , 0 , 0 } }, // #180 [ref=9x]
3718 { InstDB::RWInfo::kCategoryGeneric , 44, { 72, 43, 0 , 0 , 0 , 0 } }, // #181 [ref=6x]
3719 { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 73, 0 , 0 , 0 , 0 } }, // #182 [ref=2x]
3720 { InstDB::RWInfo::kCategoryGeneric , 45, { 10, 3 , 0 , 0 , 0 , 0 } }, // #183 [ref=4x]
3721 { InstDB::RWInfo::kCategoryGeneric , 46, { 71, 43, 0 , 0 , 0 , 0 } }, // #184 [ref=4x]
3722 { InstDB::RWInfo::kCategoryGeneric , 6 , { 2 , 3 , 3 , 0 , 0 , 0 } }, // #185 [ref=60x]
3723 { InstDB::RWInfo::kCategoryGeneric , 35, { 4 , 57, 7 , 0 , 0 , 0 } }, // #186 [ref=1x]
3724 { InstDB::RWInfo::kCategoryGeneric , 36, { 4 , 74, 9 , 0 , 0 , 0 } }, // #187 [ref=1x]
3725 { InstDB::RWInfo::kCategoryGeneric , 35, { 6 , 7 , 7 , 0 , 0 , 0 } }, // #188 [ref=11x]
3726 { InstDB::RWInfo::kCategoryGeneric , 36, { 8 , 9 , 9 , 0 , 0 , 0 } }, // #189 [ref=11x]
3727 { InstDB::RWInfo::kCategoryGeneric , 47, { 11, 3 , 3 , 3 , 0 , 0 } }, // #190 [ref=15x]
3728 { InstDB::RWInfo::kCategoryGeneric , 48, { 34, 7 , 7 , 7 , 0 , 0 } }, // #191 [ref=4x]
3729 { InstDB::RWInfo::kCategoryGeneric , 49, { 42, 9 , 9 , 9 , 0 , 0 } }, // #192 [ref=4x]
3730 { InstDB::RWInfo::kCategoryGeneric , 35, { 25, 7 , 7 , 0 , 0 , 0 } }, // #193 [ref=1x]
3731 { InstDB::RWInfo::kCategoryGeneric , 36, { 75, 9 , 9 , 0 , 0 , 0 } }, // #194 [ref=1x]
3732 { InstDB::RWInfo::kCategoryGeneric , 14, { 34, 3 , 0 , 0 , 0 , 0 } }, // #195 [ref=2x]
3733 { InstDB::RWInfo::kCategoryGeneric , 5 , { 42, 9 , 0 , 0 , 0 , 0 } }, // #196 [ref=1x]
3734 { InstDB::RWInfo::kCategoryGeneric , 8 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #197 [ref=2x]
3735 { InstDB::RWInfo::kCategoryGeneric , 0 , { 2 , 3 , 2 , 0 , 0 , 0 } }, // #198 [ref=4x]
3736 { InstDB::RWInfo::kCategoryGeneric , 18, { 4 , 3 , 4 , 0 , 0 , 0 } }, // #199 [ref=2x]
3737 { InstDB::RWInfo::kCategoryGeneric , 35, { 10, 57, 7 , 0 , 0 , 0 } }, // #200 [ref=11x]
3738 { InstDB::RWInfo::kCategoryGeneric , 36, { 10, 74, 9 , 0 , 0 , 0 } }, // #201 [ref=13x]
3739 { InstDB::RWInfo::kCategoryGeneric , 43, { 71, 73, 5 , 0 , 0 , 0 } }, // #202 [ref=2x]
3740 { InstDB::RWInfo::kCategoryGeneric , 43, { 11, 3 , 5 , 0 , 0 , 0 } }, // #203 [ref=4x]
3741 { InstDB::RWInfo::kCategoryGeneric , 50, { 72, 43, 73, 0 , 0 , 0 } }, // #204 [ref=4x]
3742 { InstDB::RWInfo::kCategoryVmaskmov , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #205 [ref=4x]
3743 { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 0 , 0 , 0 , 0 , 0 } }, // #206 [ref=2x]
3744 { InstDB::RWInfo::kCategoryGeneric , 0 , { 21, 0 , 0 , 0 , 0 , 0 } }, // #207 [ref=2x]
3745 { InstDB::RWInfo::kCategoryGeneric , 51, { 11, 3 , 0 , 0 , 0 , 0 } }, // #208 [ref=12x]
3746 { InstDB::RWInfo::kCategoryVmovddup , 52, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #209 [ref=1x]
3747 { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 57, 57, 0 , 0 , 0 } }, // #210 [ref=1x]
3748 { InstDB::RWInfo::kCategoryGeneric , 13, { 34, 57, 0 , 0 , 0 , 0 } }, // #211 [ref=2x]
3749 { InstDB::RWInfo::kCategoryGeneric , 13, { 10, 7 , 7 , 0 , 0 , 0 } }, // #212 [ref=2x]
3750 { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 7 , 7 , 0 , 0 , 0 } }, // #213 [ref=1x]
3751 { InstDB::RWInfo::kCategoryGeneric , 13, { 10, 57, 7 , 0 , 0 , 0 } }, // #214 [ref=2x]
3752 { InstDB::RWInfo::kCategoryVmovmskpd , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #215 [ref=1x]
3753 { InstDB::RWInfo::kCategoryVmovmskps , 0 , { 0 , 0 , 0 , 0 , 0 , 0 } }, // #216 [ref=1x]
3754 { InstDB::RWInfo::kCategoryGeneric , 53, { 34, 7 , 0 , 0 , 0 , 0 } }, // #217 [ref=1x]
3755 { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 57, 7 , 0 , 0 , 0 } }, // #218 [ref=1x]
3756 { InstDB::RWInfo::kCategoryGeneric , 0 , { 10, 74, 9 , 0 , 0 , 0 } }, // #219 [ref=1x]
3757 { InstDB::RWInfo::kCategoryGeneric , 13, { 7 , 0 , 0 , 0 , 0 , 0 } }, // #220 [ref=2x]
3758 { InstDB::RWInfo::kCategoryGeneric , 0 , { 76, 0 , 0 , 0 , 0 , 0 } }, // #221 [ref=1x]
3759 { InstDB::RWInfo::kCategoryGeneric , 2 , { 3 , 3 , 0 , 0 , 0 , 0 } }, // #222 [ref=4x]
3760 { InstDB::RWInfo::kCategoryGeneric , 12, { 72, 43, 43, 43, 43, 5 } }, // #223 [ref=2x]
3761 { InstDB::RWInfo::kCategoryGeneric , 15, { 11, 39, 0 , 0 , 0 , 0 } }, // #224 [ref=1x]
3762 { InstDB::RWInfo::kCategoryGeneric , 0 , { 11, 7 , 0 , 0 , 0 , 0 } }, // #225 [ref=6x]
3763 { InstDB::RWInfo::kCategoryGeneric , 27, { 11, 13, 0 , 0 , 0 , 0 } }, // #226 [ref=1x]
3764 { InstDB::RWInfo::kCategoryGeneric , 6 , { 34, 3 , 3 , 0 , 0 , 0 } }, // #227 [ref=17x]
3765 { InstDB::RWInfo::kCategoryGeneric , 50, { 71, 73, 73, 0 , 0 , 0 } }, // #228 [ref=2x]
3766 { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 3 , 0 , 0 , 0 } }, // #229 [ref=4x]
3767 { InstDB::RWInfo::kCategoryGeneric , 7 , { 47, 5 , 0 , 0 , 0 , 0 } }, // #230 [ref=1x]
3768 { InstDB::RWInfo::kCategoryGeneric , 54, { 10, 5 , 39, 0 , 0 , 0 } }, // #231 [ref=1x]
3769 { InstDB::RWInfo::kCategoryGeneric , 55, { 10, 5 , 13, 0 , 0 , 0 } }, // #232 [ref=1x]
3770 { InstDB::RWInfo::kCategoryGeneric , 43, { 10, 5 , 5 , 5 , 0 , 0 } }, // #233 [ref=12x]
3771 { InstDB::RWInfo::kCategoryGeneric , 0 , { 34, 3 , 0 , 0 , 0 , 0 } }, // #234 [ref=4x]
3772 { InstDB::RWInfo::kCategoryVmov1_4 , 56, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #235 [ref=6x]
3773 { InstDB::RWInfo::kCategoryVmov1_8 , 57, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #236 [ref=3x]
3774 { InstDB::RWInfo::kCategoryVmov4_1 , 58, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #237 [ref=4x]
3775 { InstDB::RWInfo::kCategoryVmov8_1 , 59, { 0 , 0 , 0 , 0 , 0 , 0 } }, // #238 [ref=2x]
3776 { InstDB::RWInfo::kCategoryGeneric , 60, { 10, 5 , 5 , 5 , 0 , 0 } }, // #239 [ref=1x]
3777 { InstDB::RWInfo::kCategoryGeneric , 61, { 10, 5 , 5 , 0 , 0 , 0 } }, // #240 [ref=12x]
3778 { InstDB::RWInfo::kCategoryGeneric , 18, { 11, 3 , 0 , 0 , 0 , 0 } }, // #241 [ref=2x]
3779 { InstDB::RWInfo::kCategoryGeneric , 22, { 11, 3 , 5 , 0 , 0 , 0 } }, // #242 [ref=9x]
3780 { InstDB::RWInfo::kCategoryGeneric , 62, { 11, 3 , 0 , 0 , 0 , 0 } }, // #243 [ref=2x]
3781 { InstDB::RWInfo::kCategoryGeneric , 0 , { 56, 16, 28, 0 , 0 , 0 } }, // #244 [ref=2x]
3782 { InstDB::RWInfo::kCategoryGeneric , 11, { 2 , 2 , 0 , 0 , 0 , 0 } }, // #245 [ref=1x]
3783 { InstDB::RWInfo::kCategoryGeneric , 51, { 2 , 2 , 0 , 0 , 0 , 0 } }, // #246 [ref=1x]
3784 { InstDB::RWInfo::kCategoryGeneric , 8 , { 3 , 56, 16, 0 , 0 , 0 } }, // #247 [ref=4x]
3785 { InstDB::RWInfo::kCategoryGeneric , 8 , { 11, 56, 16, 0 , 0 , 0 } } // #248 [ref=8x]
3786 };
3787
3788 const InstDB::RWInfoOp InstDB::rwInfoOp[] = {
3789 { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, 0 }, // #0 [ref=14957x]
3790 { 0x0000000000000003u, 0x0000000000000003u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId }, // #1 [ref=10x]
3791 { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #2 [ref=217x]
3792 { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #3 [ref=978x]
3793 { 0x000000000000FFFFu, 0x000000000000FFFFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #4 [ref=92x]
3794 { 0x000000000000FFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #5 [ref=305x]
3795 { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kRW }, // #6 [ref=18x]
3796 { 0x00000000000000FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #7 [ref=181x]
3797 { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kRW }, // #8 [ref=18x]
3798 { 0x000000000000000Fu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #9 [ref=130x]
3799 { 0x0000000000000000u, 0x000000000000FFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #10 [ref=160x]
3800 { 0x0000000000000000u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #11 [ref=415x]
3801 { 0x0000000000000003u, 0x0000000000000003u, 0xFF, { 0 }, OpRWInfo::kRW }, // #12 [ref=1x]
3802 { 0x0000000000000003u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #13 [ref=34x]
3803 { 0x000000000000FFFFu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #14 [ref=4x]
3804 { 0x0000000000000000u, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #15 [ref=7x]
3805 { 0x000000000000000Fu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #16 [ref=21x]
3806 { 0x00000000000000FFu, 0x00000000000000FFu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #17 [ref=2x]
3807 { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #18 [ref=3x]
3808 { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #19 [ref=3x]
3809 { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #20 [ref=7x]
3810 { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #21 [ref=7x]
3811 { 0x00000000000000FFu, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #22 [ref=1x]
3812 { 0x00000000000000FFu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #23 [ref=1x]
3813 { 0x00000000000000FFu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #24 [ref=1x]
3814 { 0x00000000000000FFu, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #25 [ref=20x]
3815 { 0x000000000000000Fu, 0x000000000000000Fu, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #26 [ref=1x]
3816 { 0x000000000000000Fu, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #27 [ref=4x]
3817 { 0x000000000000000Fu, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #28 [ref=11x]
3818 { 0x000000000000000Fu, 0x0000000000000000u, 0x03, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #29 [ref=2x]
3819 { 0x0000000000000000u, 0x000000000000000Fu, 0x03, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #30 [ref=1x]
3820 { 0x000000000000000Fu, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #31 [ref=1x]
3821 { 0x0000000000000000u, 0x00000000000000FFu, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #32 [ref=1x]
3822 { 0x00000000000000FFu, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #33 [ref=1x]
3823 { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #34 [ref=76x]
3824 { 0x0000000000000000u, 0x00000000000000FFu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #35 [ref=6x]
3825 { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite }, // #36 [ref=6x]
3826 { 0x0000000000000000u, 0x0000000000000003u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #37 [ref=1x]
3827 { 0x0000000000000003u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #38 [ref=1x]
3828 { 0x0000000000000001u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #39 [ref=28x]
3829 { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #40 [ref=2x]
3830 { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #41 [ref=3x]
3831 { 0x0000000000000000u, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #42 [ref=29x]
3832 { 0xFFFFFFFFFFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #43 [ref=33x]
3833 { 0x00000000000003FFu, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #44 [ref=22x]
3834 { 0x00000000000003FFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #45 [ref=13x]
3835 { 0x0000000000000000u, 0x00000000000003FFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #46 [ref=1x]
3836 { 0x0000000000000000u, 0x0000000000000003u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #47 [ref=15x]
3837 { 0x0000000000000000u, 0x0000000000000003u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #48 [ref=2x]
3838 { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #49 [ref=2x]
3839 { 0x0000000000000003u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #50 [ref=4x]
3840 { 0x0000000000000000u, 0x0000000000000000u, 0x07, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kMemPhysId }, // #51 [ref=1x]
3841 { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #52 [ref=1x]
3842 { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #53 [ref=14x]
3843 { 0x0000000000000000u, 0x0000000000000001u, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId }, // #54 [ref=1x]
3844 { 0x0000000000000000u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRW | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #55 [ref=3x]
3845 { 0x000000000000000Fu, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #56 [ref=20x]
3846 { 0x000000000000FF00u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #57 [ref=23x]
3847 { 0x0000000000000000u, 0x000000000000FF00u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #58 [ref=1x]
3848 { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kWrite | OpRWInfo::kRegPhysId | OpRWInfo::kZExt }, // #59 [ref=1x]
3849 { 0x0000000000000000u, 0x0000000000000000u, 0x02, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #60 [ref=2x]
3850 { 0x0000000000000000u, 0x0000000000000000u, 0x06, { 0 }, OpRWInfo::kRead | OpRWInfo::kMemPhysId }, // #61 [ref=1x]
3851 { 0x0000000000000000u, 0x000000000000000Fu, 0x01, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #62 [ref=5x]
3852 { 0x0000000000000000u, 0x000000000000FFFFu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #63 [ref=4x]
3853 { 0x0000000000000000u, 0x0000000000000007u, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #64 [ref=2x]
3854 { 0x0000000000000000u, 0x0000000000000000u, 0x04, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #65 [ref=1x]
3855 { 0x0000000000000001u, 0x0000000000000000u, 0x01, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #66 [ref=10x]
3856 { 0x0000000000000000u, 0x000000000000000Fu, 0x00, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt | OpRWInfo::kRegPhysId }, // #67 [ref=5x]
3857 { 0x0000000000000001u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRead | OpRWInfo::kRegPhysId }, // #68 [ref=1x]
3858 { 0x0000000000000000u, 0x0000000000000001u, 0xFF, { 0 }, OpRWInfo::kWrite }, // #69 [ref=30x]
3859 { 0xFFFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #70 [ref=2x]
3860 { 0x0000000000000000u, 0x00000000FFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #71 [ref=10x]
3861 { 0x0000000000000000u, 0xFFFFFFFFFFFFFFFFu, 0xFF, { 0 }, OpRWInfo::kWrite | OpRWInfo::kZExt }, // #72 [ref=16x]
3862 { 0x00000000FFFFFFFFu, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #73 [ref=16x]
3863 { 0x000000000000FFF0u, 0x0000000000000000u, 0xFF, { 0 }, OpRWInfo::kRead }, // #74 [ref=18x]
3864 { 0x000000000000000Fu, 0x000000000000000Fu, 0xFF, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt }, // #75 [ref=1x]
3865 { 0x0000000000000000u, 0x0000000000000000u, 0x00, { 0 }, OpRWInfo::kRW | OpRWInfo::kZExt | OpRWInfo::kRegPhysId } // #76 [ref=1x]
3866 };
3867
3868 const InstDB::RWInfoRm InstDB::rwInfoRm[] = {
3869 { InstDB::RWInfoRm::kCategoryNone , 0x00, 0 , 0, 0 }, // #0 [ref=1809x]
3870 { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #1 [ref=8x]
3871 { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, 0 }, // #2 [ref=193x]
3872 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 16, 0, 0 }, // #3 [ref=122x]
3873 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 8 , 0, 0 }, // #4 [ref=66x]
3874 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 4 , 0, 0 }, // #5 [ref=34x]
3875 { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , 0, 0 }, // #6 [ref=270x]
3876 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , 0, 0 }, // #7 [ref=9x]
3877 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 0 , 0, 0 }, // #8 [ref=60x]
3878 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 0 , 0, 0 }, // #9 [ref=1x]
3879 { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #10 [ref=20x]
3880 { InstDB::RWInfoRm::kCategoryConsistent, 0x01, 0 , 0, 0 }, // #11 [ref=13x]
3881 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 16, 0, 0 }, // #12 [ref=21x]
3882 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 8 , 0, 0 }, // #13 [ref=20x]
3883 { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #14 [ref=15x]
3884 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 1 , 0, 0 }, // #15 [ref=5x]
3885 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 64, 0, 0 }, // #16 [ref=3x]
3886 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 4 , 0, 0 }, // #17 [ref=4x]
3887 { InstDB::RWInfoRm::kCategoryNone , 0x00, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #18 [ref=22x]
3888 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 10, 0, 0 }, // #19 [ref=2x]
3889 { InstDB::RWInfoRm::kCategoryNone , 0x01, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #20 [ref=5x]
3890 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 2 , 0, 0 }, // #21 [ref=3x]
3891 { InstDB::RWInfoRm::kCategoryConsistent, 0x06, 0 , 0, 0 }, // #22 [ref=14x]
3892 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 1 , 0, 0 }, // #23 [ref=1x]
3893 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 4 , 0, 0 }, // #24 [ref=4x]
3894 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 8 , 0, 0 }, // #25 [ref=3x]
3895 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 2 , 0, 0 }, // #26 [ref=1x]
3896 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 2 , 0, 0 }, // #27 [ref=6x]
3897 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 4 , 0, 0 }, // #28 [ref=6x]
3898 { InstDB::RWInfoRm::kCategoryNone , 0x03, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #29 [ref=1x]
3899 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 16, 0, 0 }, // #30 [ref=6x]
3900 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 1 , 0, 0 }, // #31 [ref=32x]
3901 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 8 , 0, 0 }, // #32 [ref=2x]
3902 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , 0, Features::kSSE4_1 }, // #33 [ref=1x]
3903 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 2 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #34 [ref=3x]
3904 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 8 , 0, 0 }, // #35 [ref=34x]
3905 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 4 , 0, 0 }, // #36 [ref=37x]
3906 { InstDB::RWInfoRm::kCategoryFixed , 0x00, 32, 0, 0 }, // #37 [ref=4x]
3907 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #38 [ref=1x]
3908 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 4 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #39 [ref=1x]
3909 { InstDB::RWInfoRm::kCategoryHalf , 0x02, 0 , 0, 0 }, // #40 [ref=14x]
3910 { InstDB::RWInfoRm::kCategoryHalf , 0x01, 0 , 0, 0 }, // #41 [ref=10x]
3911 { InstDB::RWInfoRm::kCategoryConsistent, 0x04, 0 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #42 [ref=4x]
3912 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 16, 0, 0 }, // #43 [ref=27x]
3913 { InstDB::RWInfoRm::kCategoryFixed , 0x02, 64, 0, 0 }, // #44 [ref=6x]
3914 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 16, 0, 0 }, // #45 [ref=6x]
3915 { InstDB::RWInfoRm::kCategoryFixed , 0x01, 32, 0, 0 }, // #46 [ref=4x]
3916 { InstDB::RWInfoRm::kCategoryConsistent, 0x0C, 0 , 0, 0 }, // #47 [ref=15x]
3917 { InstDB::RWInfoRm::kCategoryFixed , 0x0C, 8 , 0, 0 }, // #48 [ref=4x]
3918 { InstDB::RWInfoRm::kCategoryFixed , 0x0C, 4 , 0, 0 }, // #49 [ref=4x]
3919 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 32, 0, 0 }, // #50 [ref=6x]
3920 { InstDB::RWInfoRm::kCategoryConsistent, 0x03, 0 , 0, 0 }, // #51 [ref=13x]
3921 { InstDB::RWInfoRm::kCategoryNone , 0x02, 0 , 0, 0 }, // #52 [ref=1x]
3922 { InstDB::RWInfoRm::kCategoryFixed , 0x03, 8 , InstDB::RWInfoRm::kFlagAmbiguous, 0 }, // #53 [ref=1x]
3923 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 1 , 0, 0 }, // #54 [ref=1x]
3924 { InstDB::RWInfoRm::kCategoryFixed , 0x04, 2 , 0, 0 }, // #55 [ref=1x]
3925 { InstDB::RWInfoRm::kCategoryQuarter , 0x01, 0 , 0, 0 }, // #56 [ref=6x]
3926 { InstDB::RWInfoRm::kCategoryEighth , 0x01, 0 , 0, 0 }, // #57 [ref=3x]
3927 { InstDB::RWInfoRm::kCategoryQuarter , 0x02, 0 , 0, 0 }, // #58 [ref=4x]
3928 { InstDB::RWInfoRm::kCategoryEighth , 0x02, 0 , 0, 0 }, // #59 [ref=2x]
3929 { InstDB::RWInfoRm::kCategoryFixed , 0x0C, 16, 0, 0 }, // #60 [ref=1x]
3930 { InstDB::RWInfoRm::kCategoryFixed , 0x06, 16, 0, 0 }, // #61 [ref=12x]
3931 { InstDB::RWInfoRm::kCategoryConsistent, 0x02, 0 , 0, Features::kAVX512_BW } // #62 [ref=2x]
3932 };
3933 // ----------------------------------------------------------------------------
3934 // ${InstRWInfoTable:End}
3935
3936 // ============================================================================
3937 // [asmjit::x86::InstDB - Unit]
3938 // ============================================================================
3939
3940 #if defined(ASMJIT_TEST)
3941 UNIT(x86_inst_db) {
3942 INFO("Checking validity of Inst enums");
3943
3944 // Cross-validate prefixes.
3945 EXPECT(Inst::kOptionRex == 0x40000000u, "REX prefix must be at 0x40000000");
3946 EXPECT(Inst::kOptionVex3 == 0x00000400u, "VEX3 prefix must be at 0x00000400");
3947 EXPECT(Inst::kOptionEvex == 0x00001000u, "EVEX prefix must be at 0x00001000");
3948
3949 // These could be combined together to form a valid REX prefix, they must match.
3950 EXPECT(uint32_t(Inst::kOptionOpCodeB) == uint32_t(Opcode::kB), "Opcode::kB must match Inst::kOptionOpCodeB");
3951 EXPECT(uint32_t(Inst::kOptionOpCodeX) == uint32_t(Opcode::kX), "Opcode::kX must match Inst::kOptionOpCodeX");
3952 EXPECT(uint32_t(Inst::kOptionOpCodeR) == uint32_t(Opcode::kR), "Opcode::kR must match Inst::kOptionOpCodeR");
3953 EXPECT(uint32_t(Inst::kOptionOpCodeW) == uint32_t(Opcode::kW), "Opcode::kW must match Inst::kOptionOpCodeW");
3954
3955 uint32_t rex_rb = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kB >> Opcode::kREX_Shift) | 0x40;
3956 uint32_t rex_rw = (Opcode::kR >> Opcode::kREX_Shift) | (Opcode::kW >> Opcode::kREX_Shift) | 0x40;
3957
3958 EXPECT(rex_rb == 0x45, "Opcode::kR|B must form a valid REX prefix (0x45) if combined with 0x40");
3959 EXPECT(rex_rw == 0x4C, "Opcode::kR|W must form a valid REX prefix (0x4C) if combined with 0x40");
3960 }
3961 #endif
3962
3963 ASMJIT_END_SUB_NAMESPACE
3964
3965 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86INSTDB_H
7 #define _ASMJIT_X86_X86INSTDB_H
8
9 #include "../x86/x86globals.h"
10
11 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
12
13 //! \addtogroup asmjit_x86
14 //! \{
15
16 //! Instruction database (X86).
17 namespace InstDB {
18
19 // ============================================================================
20 // [asmjit::x86::InstDB::Mode]
21 // ============================================================================
22
23 //! Describes which mode is supported by an instruction or instruction signature.
24 enum Mode : uint32_t {
25 kModeNone = 0x00u, //!< Invalid.
26 kModeX86 = 0x01u, //!< X86 mode supported.
27 kModeX64 = 0x02u, //!< X64 mode supported.
28 kModeAny = 0x03u //!< Both X86 and X64 modes supported.
29 };
30
31 static constexpr uint32_t modeFromArchId(uint32_t archId) noexcept {
32 return archId == ArchInfo::kIdX86 ? kModeX86 :
33 archId == ArchInfo::kIdX64 ? kModeX64 : kModeNone;
34 }
35
36 // ============================================================================
37 // [asmjit::x86::InstDB::OpFlags]
38 // ============================================================================
39
40 //! Operand flags (X86).
41 enum OpFlags : uint32_t {
42 kOpNone = 0x00000000u, //!< No flags.
43
44 kOpGpbLo = 0x00000001u, //!< Operand can be low 8-bit GPB register.
45 kOpGpbHi = 0x00000002u, //!< Operand can be high 8-bit GPB register.
46 kOpGpw = 0x00000004u, //!< Operand can be 16-bit GPW register.
47 kOpGpd = 0x00000008u, //!< Operand can be 32-bit GPD register.
48 kOpGpq = 0x00000010u, //!< Operand can be 64-bit GPQ register.
49 kOpXmm = 0x00000020u, //!< Operand can be 128-bit XMM register.
50 kOpYmm = 0x00000040u, //!< Operand can be 256-bit YMM register.
51 kOpZmm = 0x00000080u, //!< Operand can be 512-bit ZMM register.
52 kOpMm = 0x00000100u, //!< Operand can be 64-bit MM register.
53 kOpKReg = 0x00000200u, //!< Operand can be 64-bit K register.
54 kOpSReg = 0x00000400u, //!< Operand can be SReg (segment register).
55 kOpCReg = 0x00000800u, //!< Operand can be CReg (control register).
56 kOpDReg = 0x00001000u, //!< Operand can be DReg (debug register).
57 kOpSt = 0x00002000u, //!< Operand can be 80-bit ST register (X87).
58 kOpBnd = 0x00004000u, //!< Operand can be 128-bit BND register.
59 kOpAllRegs = 0x00007FFFu, //!< Combination of all possible registers.
60
61 kOpI4 = 0x00010000u, //!< Operand can be unsigned 4-bit immediate.
62 kOpU4 = 0x00020000u, //!< Operand can be unsigned 4-bit immediate.
63 kOpI8 = 0x00040000u, //!< Operand can be signed 8-bit immediate.
64 kOpU8 = 0x00080000u, //!< Operand can be unsigned 8-bit immediate.
65 kOpI16 = 0x00100000u, //!< Operand can be signed 16-bit immediate.
66 kOpU16 = 0x00200000u, //!< Operand can be unsigned 16-bit immediate.
67 kOpI32 = 0x00400000u, //!< Operand can be signed 32-bit immediate.
68 kOpU32 = 0x00800000u, //!< Operand can be unsigned 32-bit immediate.
69 kOpI64 = 0x01000000u, //!< Operand can be signed 64-bit immediate.
70 kOpU64 = 0x02000000u, //!< Operand can be unsigned 64-bit immediate.
71 kOpAllImm = 0x03FF0000u, //!< Operand can be any immediate.
72
73 kOpMem = 0x04000000u, //!< Operand can be a scalar memory pointer.
74 kOpVm = 0x08000000u, //!< Operand can be a vector memory pointer.
75
76 kOpRel8 = 0x10000000u, //!< Operand can be relative 8-bit displacement.
77 kOpRel32 = 0x20000000u, //!< Operand can be relative 32-bit displacement.
78
79 kOpImplicit = 0x80000000u //!< Operand is implicit.
80 };
81
82 // ============================================================================
83 // [asmjit::x86::InstDB::MemFlags]
84 // ============================================================================
85
86 //! Memory operand flags (X86).
87 enum MemFlags : uint32_t {
88 // NOTE: Instruction uses either scalar or vector memory operands, they never
89 // collide. This allows us to share bits between "M" and "Vm" enums.
90
91 kMemOpAny = 0x0001u, //!< Operand can be any scalar memory pointer.
92 kMemOpM8 = 0x0002u, //!< Operand can be an 8-bit memory pointer.
93 kMemOpM16 = 0x0004u, //!< Operand can be a 16-bit memory pointer.
94 kMemOpM32 = 0x0008u, //!< Operand can be a 32-bit memory pointer.
95 kMemOpM48 = 0x0010u, //!< Operand can be a 48-bit memory pointer (FAR pointers only).
96 kMemOpM64 = 0x0020u, //!< Operand can be a 64-bit memory pointer.
97 kMemOpM80 = 0x0040u, //!< Operand can be an 80-bit memory pointer.
98 kMemOpM128 = 0x0080u, //!< Operand can be a 128-bit memory pointer.
99 kMemOpM256 = 0x0100u, //!< Operand can be a 256-bit memory pointer.
100 kMemOpM512 = 0x0200u, //!< Operand can be a 512-bit memory pointer.
101 kMemOpM1024 = 0x0400u, //!< Operand can be a 1024-bit memory pointer.
102
103 kMemOpVm32x = 0x0002u, //!< Operand can be a vm32x (vector) pointer.
104 kMemOpVm32y = 0x0004u, //!< Operand can be a vm32y (vector) pointer.
105 kMemOpVm32z = 0x0008u, //!< Operand can be a vm32z (vector) pointer.
106 kMemOpVm64x = 0x0020u, //!< Operand can be a vm64x (vector) pointer.
107 kMemOpVm64y = 0x0040u, //!< Operand can be a vm64y (vector) pointer.
108 kMemOpVm64z = 0x0080u, //!< Operand can be a vm64z (vector) pointer.
109
110 kMemOpBaseOnly = 0x0800u, //!< Only memory base is allowed (no index, no offset).
111 kMemOpDs = 0x1000u, //!< Implicit memory operand's DS segment.
112 kMemOpEs = 0x2000u, //!< Implicit memory operand's ES segment.
113
114 kMemOpMib = 0x4000u //!< Operand must be MIB (base+index) pointer.
115 };
116
117 // ============================================================================
118 // [asmjit::x86::InstDB::Flags]
119 // ============================================================================
120
121 //! Instruction flags (X86).
122 //!
123 //! Details about instruction encoding, operation, features, and some limitations.
124 enum Flags : uint32_t {
125 kFlagNone = 0x00000000u, //!< No flags.
126
127 // TODO: Deprecated
128 // ----------------
129
130 kFlagVolatile = 0x00000040u,
131 kFlagPrivileged = 0x00000080u, //!< This is a privileged operation that cannot run in user mode.
132
133 // Instruction Family
134 // ------------------
135 //
136 // Instruction family information.
137
138 kFlagFpu = 0x00000100u, //!< Instruction that accesses FPU registers.
139 kFlagMmx = 0x00000200u, //!< Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS.
140 kFlagVec = 0x00000400u, //!< Instruction that accesses XMM registers (SSE, AVX, AVX512).
141
142 // Prefixes and Encoding Flags
143 // ---------------------------
144 //
145 // These describe optional X86 prefixes that can be used to change the instruction's operation.
146
147 kFlagRep = 0x00001000u, //!< Instruction can be prefixed with using the REP(REPE) or REPNE prefix.
148 kFlagRepIgnored = 0x00002000u, //!< Instruction ignores REP|REPNE prefixes, but they are accepted.
149 kFlagLock = 0x00004000u, //!< Instruction can be prefixed with using the LOCK prefix.
150 kFlagXAcquire = 0x00008000u, //!< Instruction can be prefixed with using the XACQUIRE prefix.
151 kFlagXRelease = 0x00010000u, //!< Instruction can be prefixed with using the XRELEASE prefix.
152 kFlagMib = 0x00020000u, //!< Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers.
153 kFlagVsib = 0x00040000u, //!< Instruction uses VSIB instead of legacy SIB.
154 kFlagVex = 0x00080000u, //!< Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
155 kFlagEvex = 0x00100000u, //!< Instruction can be encoded by EVEX (AVX512).
156
157 // FPU Flags
158 // ---------
159 //
160 // Used to tell the encoder which memory operand sizes are encodable.
161
162 kFlagFpuM16 = 0x00200000u, //!< FPU instruction can address `word_ptr` (shared with M80).
163 kFlagFpuM32 = 0x00400000u, //!< FPU instruction can address `dword_ptr`.
164 kFlagFpuM64 = 0x00800000u, //!< FPU instruction can address `qword_ptr`.
165 kFlagFpuM80 = 0x00200000u, //!< FPU instruction can address `tword_ptr` (shared with M16).
166
167 // AVX and AVX515 Flags
168 // --------------------
169 //
170 // If both `kFlagPrefixVex` and `kFlagPrefixEvex` flags are specified it
171 // means that the instructions can be encoded by either VEX or EVEX prefix.
172 // In that case AsmJit checks global options and also instruction options
173 // to decide whether to emit VEX or EVEX prefix.
174
175 kFlagAvx512_ = 0x00000000u, //!< Internally used in tables, has no meaning.
176 kFlagAvx512K = 0x01000000u, //!< Supports masking {k1..k7}.
177 kFlagAvx512Z = 0x02000000u, //!< Supports zeroing {z}, must be used together with `kAvx512k`.
178 kFlagAvx512ER = 0x04000000u, //!< Supports 'embedded-rounding' {er} with implicit {sae},
179 kFlagAvx512SAE = 0x08000000u, //!< Supports 'suppress-all-exceptions' {sae}.
180 kFlagAvx512B32 = 0x10000000u, //!< Supports 32-bit broadcast 'b32'.
181 kFlagAvx512B64 = 0x20000000u, //!< Supports 64-bit broadcast 'b64'.
182 kFlagAvx512T4X = 0x80000000u, //!< Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW).
183
184 // Combinations used by instruction tables to make AVX512 definitions more compact.
185 kFlagAvx512KZ = kFlagAvx512K | kFlagAvx512Z,
186 kFlagAvx512ER_SAE = kFlagAvx512ER | kFlagAvx512SAE,
187 kFlagAvx512KZ_SAE = kFlagAvx512KZ | kFlagAvx512SAE,
188 kFlagAvx512KZ_SAE_B32 = kFlagAvx512KZ_SAE | kFlagAvx512B32,
189 kFlagAvx512KZ_SAE_B64 = kFlagAvx512KZ_SAE | kFlagAvx512B64,
190
191 kFlagAvx512KZ_ER_SAE = kFlagAvx512KZ | kFlagAvx512ER_SAE,
192 kFlagAvx512KZ_ER_SAE_B32 = kFlagAvx512KZ_ER_SAE | kFlagAvx512B32,
193 kFlagAvx512KZ_ER_SAE_B64 = kFlagAvx512KZ_ER_SAE | kFlagAvx512B64,
194
195 kFlagAvx512K_B32 = kFlagAvx512K | kFlagAvx512B32,
196 kFlagAvx512K_B64 = kFlagAvx512K | kFlagAvx512B64,
197 kFlagAvx512KZ_B32 = kFlagAvx512KZ | kFlagAvx512B32,
198 kFlagAvx512KZ_B64 = kFlagAvx512KZ | kFlagAvx512B64
199 };
200
201 // ============================================================================
202 // [asmjit::x86::InstDB::SingleRegCase]
203 // ============================================================================
204
205 enum SingleRegCase : uint32_t {
206 //! No special handling.
207 kSingleRegNone = 0,
208 //! Operands become read-only - `REG & REG` and similar.
209 kSingleRegRO = 1,
210 //! Operands become write-only - `REG ^ REG` and similar.
211 kSingleRegWO = 2
212 };
213
214 // ============================================================================
215 // [asmjit::x86::InstDB::InstSignature / OpSignature]
216 // ============================================================================
217
218 //! Operand signature (X86).
219 //!
220 //! Contains all possible operand combinations, memory size information, and
221 //! a fixed register id (or `BaseReg::kIdBad` if fixed id isn't required).
222 struct OpSignature {
223 //! Operand flags.
224 uint32_t opFlags;
225 //! Memory flags.
226 uint16_t memFlags;
227 //! Extra flags.
228 uint8_t extFlags;
229 //! Mask of possible register IDs.
230 uint8_t regMask;
231 };
232
233 ASMJIT_VARAPI const OpSignature _opSignatureTable[];
234
235 //! Instruction signature (X86).
236 //!
237 //! Contains a sequence of operands' combinations and other metadata that defines
238 //! a single instruction. This data is used by instruction validator.
239 struct InstSignature {
240 //! Count of operands in `opIndex` (0..6).
241 uint8_t opCount : 3;
242 //! Architecture modes supported (X86 / X64).
243 uint8_t modes : 2;
244 //! Number of implicit operands.
245 uint8_t implicit : 3;
246 //! Reserved for future use.
247 uint8_t reserved;
248 //! Indexes to `OpSignature` table.
249 uint8_t operands[Globals::kMaxOpCount];
250 };
251
252 ASMJIT_VARAPI const InstSignature _instSignatureTable[];
253
254 // ============================================================================
255 // [asmjit::x86::InstDB::CommonInfo]
256 // ============================================================================
257
258 //! Instruction common information (X86)
259 //!
260 //! Aggregated information shared across one or more instruction.
261 struct CommonInfo {
262 //! Instruction flags.
263 uint32_t _flags;
264 //! First `InstSignature` entry in the database.
265 uint32_t _iSignatureIndex : 11;
266 //! Number of relevant `ISignature` entries.
267 uint32_t _iSignatureCount : 5;
268 //! Control type, see `ControlType`.
269 uint32_t _controlType : 3;
270 //! Specifies what happens if all source operands share the same register.
271 uint32_t _singleRegCase : 2;
272 //! Reserved for future use.
273 uint32_t _reserved : 11;
274
275 // --------------------------------------------------------------------------
276 // [Accessors]
277 // --------------------------------------------------------------------------
278
279 //! Returns instruction flags, see `InstInfo::Flags`.
280 inline uint32_t flags() const noexcept { return _flags; }
281 //! Tests whether the instruction has a `flag`, see `InstInfo::Flags`.
282 inline bool hasFlag(uint32_t flag) const noexcept { return (_flags & flag) != 0; }
283
284 //! Tests whether the instruction is FPU instruction.
285 inline bool isFpu() const noexcept { return hasFlag(kFlagFpu); }
286 //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
287 inline bool isMmx() const noexcept { return hasFlag(kFlagMmx); }
288 //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
289 inline bool isVec() const noexcept { return hasFlag(kFlagVec); }
290 //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
291 inline bool isSse() const noexcept { return (flags() & (kFlagVec | kFlagVex | kFlagEvex)) == kFlagVec; }
292 //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
293 inline bool isAvx() const noexcept { return isVec() && isVexOrEvex(); }
294
295 //! Tests whether the instruction can be prefixed with LOCK prefix.
296 inline bool hasLockPrefix() const noexcept { return hasFlag(kFlagLock); }
297 //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
298 inline bool hasRepPrefix() const noexcept { return hasFlag(kFlagRep); }
299 //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
300 inline bool hasXAcquirePrefix() const noexcept { return hasFlag(kFlagXAcquire); }
301 //! Tests whether the instruction can be prefixed with XRELEASE prefix.
302 inline bool hasXReleasePrefix() const noexcept { return hasFlag(kFlagXRelease); }
303
304 //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
305 inline bool isRepIgnored() const noexcept { return hasFlag(kFlagRepIgnored); }
306 //! Tests whether the instruction uses MIB.
307 inline bool isMibOp() const noexcept { return hasFlag(kFlagMib); }
308 //! Tests whether the instruction uses VSIB.
309 inline bool isVsibOp() const noexcept { return hasFlag(kFlagVsib); }
310 //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
311 inline bool isVex() const noexcept { return hasFlag(kFlagVex); }
312 //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
313 inline bool isEvex() const noexcept { return hasFlag(kFlagEvex); }
314 //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
315 inline bool isVexOrEvex() const noexcept { return hasFlag(kFlagVex | kFlagEvex); }
316
317 //! Tests whether the instruction supports AVX512 masking {k}.
318 inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); }
319 //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
320 inline bool hasAvx512Z() const noexcept { return hasFlag(kFlagAvx512Z); }
321 //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
322 inline bool hasAvx512ER() const noexcept { return hasFlag(kFlagAvx512ER); }
323 //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
324 inline bool hasAvx512SAE() const noexcept { return hasFlag(kFlagAvx512SAE); }
325 //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
326 inline bool hasAvx512B() const noexcept { return hasFlag(kFlagAvx512B32 | kFlagAvx512B64); }
327 //! Tests whether the instruction supports AVX512 broadcast (32-bit).
328 inline bool hasAvx512B32() const noexcept { return hasFlag(kFlagAvx512B32); }
329 //! Tests whether the instruction supports AVX512 broadcast (64-bit).
330 inline bool hasAvx512B64() const noexcept { return hasFlag(kFlagAvx512B64); }
331
332 inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; }
333 inline uint32_t signatureCount() const noexcept { return _iSignatureCount; }
334
335 inline const InstSignature* signatureData() const noexcept { return _instSignatureTable + _iSignatureIndex; }
336 inline const InstSignature* signatureEnd() const noexcept { return _instSignatureTable + _iSignatureIndex + _iSignatureCount; }
337
338 //! Returns the control-flow type of the instruction.
339 inline uint32_t controlType() const noexcept { return _controlType; }
340
341 inline uint32_t singleRegCase() const noexcept { return _singleRegCase; }
342 };
343
344 ASMJIT_VARAPI const CommonInfo _commonInfoTable[];
345
346 // ============================================================================
347 // [asmjit::x86::InstDB::InstInfo]
348 // ============================================================================
349
350 //! Instruction information (X86).
351 struct InstInfo {
352 //! Index to `_nameData`.
353 uint32_t _nameDataIndex : 14;
354 //! Index to `_commonInfoTable`.
355 uint32_t _commonInfoIndex : 10;
356 //! Index to `InstDB::_commonInfoTableB`.
357 uint32_t _commonInfoIndexB : 8;
358
359 //! Instruction encoding, see `InstDB::EncodingId`.
360 uint8_t _encoding;
361 //! Main opcode value (0.255).
362 uint8_t _mainOpcodeValue;
363 //! Index to `InstDB::_mainOpcodeTable` that is combined with `_mainOpcodeValue`
364 //! to form the final opcode.
365 uint8_t _mainOpcodeIndex;
366 //! Index to `InstDB::_altOpcodeTable` that contains a full alternative opcode.
367 uint8_t _altOpcodeIndex;
368
369 // --------------------------------------------------------------------------
370 // [Accessors]
371 // --------------------------------------------------------------------------
372
373 //! Returns common information, see `CommonInfo`.
374 inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
375
376 //! Tests whether the instruction has flag `flag`, see `Flags`.
377 inline bool hasFlag(uint32_t flag) const noexcept { return commonInfo().hasFlag(flag); }
378 //! Returns instruction flags, see `Flags`.
379 inline uint32_t flags() const noexcept { return commonInfo().flags(); }
380
381 //! Tests whether the instruction is FPU instruction.
382 inline bool isFpu() const noexcept { return commonInfo().isFpu(); }
383 //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
384 inline bool isMmx() const noexcept { return commonInfo().isMmx(); }
385 //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
386 inline bool isVec() const noexcept { return commonInfo().isVec(); }
387 //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
388 inline bool isSse() const noexcept { return commonInfo().isSse(); }
389 //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
390 inline bool isAvx() const noexcept { return commonInfo().isAvx(); }
391
392 //! Tests whether the instruction can be prefixed with LOCK prefix.
393 inline bool hasLockPrefix() const noexcept { return commonInfo().hasLockPrefix(); }
394 //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
395 inline bool hasRepPrefix() const noexcept { return commonInfo().hasRepPrefix(); }
396 //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
397 inline bool hasXAcquirePrefix() const noexcept { return commonInfo().hasXAcquirePrefix(); }
398 //! Tests whether the instruction can be prefixed with XRELEASE prefix.
399 inline bool hasXReleasePrefix() const noexcept { return commonInfo().hasXReleasePrefix(); }
400
401 //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
402 inline bool isRepIgnored() const noexcept { return commonInfo().isRepIgnored(); }
403 //! Tests whether the instruction uses MIB.
404 inline bool isMibOp() const noexcept { return hasFlag(kFlagMib); }
405 //! Tests whether the instruction uses VSIB.
406 inline bool isVsibOp() const noexcept { return hasFlag(kFlagVsib); }
407 //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
408 inline bool isVex() const noexcept { return hasFlag(kFlagVex); }
409 //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
410 inline bool isEvex() const noexcept { return hasFlag(kFlagEvex); }
411 //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
412 inline bool isVexOrEvex() const noexcept { return hasFlag(kFlagVex | kFlagEvex); }
413
414 //! Tests whether the instruction supports AVX512 masking {k}.
415 inline bool hasAvx512K() const noexcept { return hasFlag(kFlagAvx512K); }
416 //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
417 inline bool hasAvx512Z() const noexcept { return hasFlag(kFlagAvx512Z); }
418 //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
419 inline bool hasAvx512ER() const noexcept { return hasFlag(kFlagAvx512ER); }
420 //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
421 inline bool hasAvx512SAE() const noexcept { return hasFlag(kFlagAvx512SAE); }
422 //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
423 inline bool hasAvx512B() const noexcept { return hasFlag(kFlagAvx512B32 | kFlagAvx512B64); }
424 //! Tests whether the instruction supports AVX512 broadcast (32-bit).
425 inline bool hasAvx512B32() const noexcept { return hasFlag(kFlagAvx512B32); }
426 //! Tests whether the instruction supports AVX512 broadcast (64-bit).
427 inline bool hasAvx512B64() const noexcept { return hasFlag(kFlagAvx512B64); }
428
429 //! Gets the control-flow type of the instruction.
430 inline uint32_t controlType() const noexcept { return commonInfo().controlType(); }
431 inline uint32_t singleRegCase() const noexcept { return commonInfo().singleRegCase(); }
432
433 inline uint32_t signatureIndex() const noexcept { return commonInfo().signatureIndex(); }
434 inline uint32_t signatureCount() const noexcept { return commonInfo().signatureCount(); }
435
436 inline const InstSignature* signatureData() const noexcept { return commonInfo().signatureData(); }
437 inline const InstSignature* signatureEnd() const noexcept { return commonInfo().signatureEnd(); }
438 };
439
440 ASMJIT_VARAPI const InstInfo _instInfoTable[];
441
442 inline const InstInfo& infoById(uint32_t instId) noexcept {
443 ASMJIT_ASSERT(Inst::isDefinedId(instId));
444 return _instInfoTable[instId];
445 }
446
447 } // {InstDB}
448
449 //! \}
450
451 ASMJIT_END_SUB_NAMESPACE
452
453 #endif // _ASMJIT_X86_X86INSTDB_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86INSTDB_P_H
7 #define _ASMJIT_X86_X86INSTDB_P_H
8
9 #include "../x86/x86instdb.h"
10
11 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
12
13 //! \cond INTERNAL
14 //! \addtogroup asmjit_x86
15 //! \{
16
17 namespace InstDB {
18
19 // ============================================================================
20 // [asmjit::x86::InstDB::Encoding]
21 // ============================================================================
22
23 //! Instruction encoding (X86).
24 //!
25 //! This is a specific identifier that is used by AsmJit to describe the way
26 //! each instruction is encoded. Some encodings are special only for a single
27 //! instruction as X86 instruction set contains a lot of legacy encodings, and
28 //! some encodings describe a group of instructions that share some commons,
29 //! like MMX, SSE, AVX, AVX512 instructions, etc...
30 enum EncodingId : uint32_t {
31 kEncodingNone = 0, //!< Never used.
32 kEncodingX86Op, //!< X86 [OP].
33 kEncodingX86Op_O, //!< X86 [OP] (opcode and /0-7).
34 kEncodingX86Op_O_I8, //!< X86 [OP] (opcode and /0-7 + 8-bit immediate).
35 kEncodingX86Op_xAddr, //!< X86 [OP] (implicit address in the first register operand).
36 kEncodingX86Op_xAX, //!< X86 [OP] (implicit or explicit '?AX' form).
37 kEncodingX86Op_xDX_xAX, //!< X86 [OP] (implicit or explicit '?DX, ?AX' form).
38 kEncodingX86Op_MemZAX, //!< X86 [OP] (implicit or explicit '[EAX|RAX]' form).
39 kEncodingX86I_xAX, //!< X86 [I] (implicit or explicit '?AX' form).
40 kEncodingX86M, //!< X86 [M] (handles 2|4|8-bytes size).
41 kEncodingX86M_NoSize, //!< X86 [M] (doesn't handle any size).
42 kEncodingX86M_GPB, //!< X86 [M] (handles single-byte size).
43 kEncodingX86M_GPB_MulDiv, //!< X86 [M] (like GPB, handles implicit|explicit MUL|DIV|IDIV).
44 kEncodingX86M_Only, //!< X86 [M] (restricted to memory operand of any size).
45 kEncodingX86M_Nop, //!< X86 [M] (special case of NOP instruction).
46 kEncodingX86R_Native, //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch).
47 kEncodingX86Rm, //!< X86 [RM] (doesn't handle single-byte size).
48 kEncodingX86Rm_Raw66H, //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT).
49 kEncodingX86Rm_NoSize, //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used).
50 kEncodingX86Mr, //!< X86 [MR] (doesn't handle single-byte size).
51 kEncodingX86Mr_NoSize, //!< X86 [MR] (doesn't handle any size).
52 kEncodingX86Arith, //!< X86 adc, add, and, cmp, or, sbb, sub, xor.
53 kEncodingX86Bswap, //!< X86 bswap.
54 kEncodingX86Bt, //!< X86 bt, btc, btr, bts.
55 kEncodingX86Call, //!< X86 call.
56 kEncodingX86Cmpxchg, //!< X86 [MR] cmpxchg.
57 kEncodingX86Cmpxchg8b_16b, //!< X86 [MR] cmpxchg8b, cmpxchg16b.
58 kEncodingX86Crc, //!< X86 crc32.
59 kEncodingX86Enter, //!< X86 enter.
60 kEncodingX86Imul, //!< X86 imul.
61 kEncodingX86In, //!< X86 in.
62 kEncodingX86Ins, //!< X86 ins[b|q|d].
63 kEncodingX86IncDec, //!< X86 inc, dec.
64 kEncodingX86Int, //!< X86 int (interrupt).
65 kEncodingX86Jcc, //!< X86 jcc.
66 kEncodingX86JecxzLoop, //!< X86 jcxz, jecxz, jrcxz, loop, loope, loopne.
67 kEncodingX86Jmp, //!< X86 jmp.
68 kEncodingX86JmpRel, //!< X86 xbegin.
69 kEncodingX86Lea, //!< X86 lea.
70 kEncodingX86Mov, //!< X86 mov (all possible cases).
71 kEncodingX86MovsxMovzx, //!< X86 movsx, movzx.
72 kEncodingX86MovntiMovdiri, //!< X86 movnti/movdiri.
73 kEncodingX86EnqcmdMovdir64b, //!< X86 enqcmd/enqcmds/movdir64b.
74 kEncodingX86Out, //!< X86 out.
75 kEncodingX86Outs, //!< X86 out[b|w|d].
76 kEncodingX86Push, //!< X86 push.
77 kEncodingX86Pop, //!< X86 pop.
78 kEncodingX86Ret, //!< X86 ret.
79 kEncodingX86Rot, //!< X86 rcl, rcr, rol, ror, sal, sar, shl, shr.
80 kEncodingX86Set, //!< X86 setcc.
81 kEncodingX86ShldShrd, //!< X86 shld, shrd.
82 kEncodingX86StrRm, //!< X86 lods.
83 kEncodingX86StrMr, //!< X86 scas, stos.
84 kEncodingX86StrMm, //!< X86 cmps, movs.
85 kEncodingX86Test, //!< X86 test.
86 kEncodingX86Xadd, //!< X86 xadd.
87 kEncodingX86Xchg, //!< X86 xchg.
88 kEncodingX86Fence, //!< X86 lfence, mfence, sfence.
89 kEncodingX86Bndmov, //!< X86 [RM|MR] (used by BNDMOV).
90 kEncodingFpuOp, //!< FPU [OP].
91 kEncodingFpuArith, //!< FPU fadd, fdiv, fdivr, fmul, fsub, fsubr.
92 kEncodingFpuCom, //!< FPU fcom, fcomp.
93 kEncodingFpuFldFst, //!< FPU fld, fst, fstp.
94 kEncodingFpuM, //!< FPU fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr.
95 kEncodingFpuR, //!< FPU fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch.
96 kEncodingFpuRDef, //!< FPU faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp.
97 kEncodingFpuStsw, //!< FPU fnstsw, Fstsw.
98 kEncodingExtRm, //!< EXT [RM].
99 kEncodingExtRm_XMM0, //!< EXT [RM<XMM0>].
100 kEncodingExtRm_ZDI, //!< EXT [RM<ZDI>].
101 kEncodingExtRm_P, //!< EXT [RM] (propagates 66H if the instruction uses XMM register).
102 kEncodingExtRm_Wx, //!< EXT [RM] (propagates REX.W if GPQ is used).
103 kEncodingExtRmRi, //!< EXT [RM|RI].
104 kEncodingExtRmRi_P, //!< EXT [RM|RI] (propagates 66H if the instruction uses XMM register).
105 kEncodingExtRmi, //!< EXT [RMI].
106 kEncodingExtRmi_P, //!< EXT [RMI] (propagates 66H if the instruction uses XMM register).
107 kEncodingExtPextrw, //!< EXT pextrw.
108 kEncodingExtExtract, //!< EXT pextrb, pextrd, pextrq, extractps.
109 kEncodingExtMov, //!< EXT mov?? - #1:[MM|XMM, MM|XMM|Mem] #2:[MM|XMM|Mem, MM|XMM].
110 kEncodingExtMovbe, //!< EXT movbe.
111 kEncodingExtMovd, //!< EXT movd.
112 kEncodingExtMovq, //!< EXT movq.
113 kEncodingExtExtrq, //!< EXT extrq (SSE4A).
114 kEncodingExtInsertq, //!< EXT insrq (SSE4A).
115 kEncodingExt3dNow, //!< EXT [RMI] (3DNOW specific).
116 kEncodingVexOp, //!< VEX [OP].
117 kEncodingVexKmov, //!< VEX [RM|MR] (used by kmov[b|w|d|q]).
118 kEncodingVexR_Wx, //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used).
119 kEncodingVexM, //!< VEX|EVEX [M].
120 kEncodingVexM_VM, //!< VEX|EVEX [M] (propagates VEX|EVEX.L, VSIB support).
121 kEncodingVexMr_Lx, //!< VEX|EVEX [MR] (propagates VEX|EVEX.L if YMM used).
122 kEncodingVexMr_VM, //!< VEX|EVEX [MR] (propagates VEX|EVEX.L, VSIB support).
123 kEncodingVexMri, //!< VEX|EVEX [MRI].
124 kEncodingVexMri_Lx, //!< VEX|EVEX [MRI] (propagates VEX|EVEX.L if YMM used).
125 kEncodingVexRm, //!< VEX|EVEX [RM].
126 kEncodingVexRm_ZDI, //!< VEX|EVEX [RM<ZDI>].
127 kEncodingVexRm_Wx, //!< VEX|EVEX [RM] (propagates VEX|EVEX.W if GPQ used).
128 kEncodingVexRm_Lx, //!< VEX|EVEX [RM] (propagates VEX|EVEX.L if YMM used).
129 kEncodingVexRm_Lx_Bcst, //!< VEX|EVEX [RM] (can handle broadcast r32/r64).
130 kEncodingVexRm_VM, //!< VEX|EVEX [RM] (propagates VEX|EVEX.L, VSIB support).
131 kEncodingVexRm_T1_4X, //!< EVEX [RM] (used by NN instructions that use RM-T1_4X encoding).
132 kEncodingVexRmi, //!< VEX|EVEX [RMI].
133 kEncodingVexRmi_Wx, //!< VEX|EVEX [RMI] (propagates VEX|EVEX.W if GPQ used).
134 kEncodingVexRmi_Lx, //!< VEX|EVEX [RMI] (propagates VEX|EVEX.L if YMM used).
135 kEncodingVexRvm, //!< VEX|EVEX [RVM].
136 kEncodingVexRvm_Wx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used).
137 kEncodingVexRvm_ZDX_Wx, //!< VEX|EVEX [RVM<ZDX>] (propagates VEX|EVEX.W if GPQ used).
138 kEncodingVexRvm_Lx, //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used).
139 kEncodingVexRvmr, //!< VEX|EVEX [RVMR].
140 kEncodingVexRvmr_Lx, //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used).
141 kEncodingVexRvmi, //!< VEX|EVEX [RVMI].
142 kEncodingVexRvmi_Lx, //!< VEX|EVEX [RVMI] (propagates VEX|EVEX.L if YMM used).
143 kEncodingVexRmv, //!< VEX|EVEX [RMV].
144 kEncodingVexRmv_Wx, //!< VEX|EVEX [RMV] (propagates VEX|EVEX.W if GPQ used).
145 kEncodingVexRmv_VM, //!< VEX|EVEX [RMV] (propagates VEX|EVEX.L, VSIB support).
146 kEncodingVexRmvRm_VM, //!< VEX|EVEX [RMV|RM] (propagates VEX|EVEX.L, VSIB support).
147 kEncodingVexRmvi, //!< VEX|EVEX [RMVI].
148 kEncodingVexRmMr, //!< VEX|EVEX [RM|MR].
149 kEncodingVexRmMr_Lx, //!< VEX|EVEX [RM|MR] (propagates VEX|EVEX.L if YMM used).
150 kEncodingVexRvmRmv, //!< VEX|EVEX [RVM|RMV].
151 kEncodingVexRvmRmi, //!< VEX|EVEX [RVM|RMI].
152 kEncodingVexRvmRmi_Lx, //!< VEX|EVEX [RVM|RMI] (propagates VEX|EVEX.L if YMM used).
153 kEncodingVexRvmRmvRmi, //!< VEX|EVEX [RVM|RMV|RMI].
154 kEncodingVexRvmMr, //!< VEX|EVEX [RVM|MR].
155 kEncodingVexRvmMvr, //!< VEX|EVEX [RVM|MVR].
156 kEncodingVexRvmMvr_Lx, //!< VEX|EVEX [RVM|MVR] (propagates VEX|EVEX.L if YMM used).
157 kEncodingVexRvmVmi, //!< VEX|EVEX [RVM|VMI].
158 kEncodingVexRvmVmi_Lx, //!< VEX|EVEX [RVM|VMI] (propagates VEX|EVEX.L if YMM used).
159 kEncodingVexVm, //!< VEX|EVEX [VM].
160 kEncodingVexVm_Wx, //!< VEX|EVEX [VM] (propagates VEX|EVEX.W if GPQ used).
161 kEncodingVexVmi, //!< VEX|EVEX [VMI].
162 kEncodingVexVmi_Lx, //!< VEX|EVEX [VMI] (propagates VEX|EVEX.L if YMM used).
163 kEncodingVexVmi4_Wx, //!< VEX|EVEX [VMI] (propagates VEX|EVEX.W if GPQ used, DWORD Immediate).
164 kEncodingVexEvexVmi_Lx, //!< VEX|EVEX [VMI] (special, used by vpsrldq and vpslldq)
165 kEncodingVexRvrmRvmr, //!< VEX|EVEX [RVRM|RVMR].
166 kEncodingVexRvrmRvmr_Lx, //!< VEX|EVEX [RVRM|RVMR] (propagates VEX|EVEX.L if YMM used).
167 kEncodingVexRvrmiRvmri_Lx, //!< VEX|EVEX [RVRMI|RVMRI] (propagates VEX|EVEX.L if YMM used).
168 kEncodingVexMovdMovq, //!< VEX|EVEX vmovd, vmovq.
169 kEncodingVexMovssMovsd, //!< VEX|EVEX vmovss, vmovsd.
170 kEncodingFma4, //!< FMA4 [R, R, R/M, R/M].
171 kEncodingFma4_Lx, //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used).
172 kEncodingCount //!< Count of instruction encodings.
173 };
174
175 // ============================================================================
176 // [asmjit::x86::InstDB - CommonInfoTableB]
177 // ============================================================================
178
179 //! CPU extensions required to execute instruction.
180 struct CommonInfoTableB {
181 //! Features vector.
182 uint8_t _features[6];
183 //! Index to `_rwFlagsTable`.
184 uint8_t _rwFlagsIndex;
185 //! Reserved for future use.
186 uint8_t _reserved;
187
188 inline const uint8_t* featuresBegin() const noexcept { return _features; }
189 inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
190 };
191
192 // ============================================================================
193 // [asmjit::x86::InstDB - InstNameIndex]
194 // ============================================================================
195
196 // ${NameLimits:Begin}
197 // ------------------- Automatically generated, do not edit -------------------
198 enum : uint32_t { kMaxNameSize = 17 };
199 // ----------------------------------------------------------------------------
200 // ${NameLimits:End}
201
202 struct InstNameIndex {
203 uint16_t start;
204 uint16_t end;
205 };
206
207 // ============================================================================
208 // [asmjit::x86::InstDB - RWInfo]
209 // ============================================================================
210
211 struct RWInfo {
212 enum Category : uint8_t {
213 kCategoryGeneric,
214 kCategoryMov,
215 kCategoryImul,
216 kCategoryMovh64,
217 kCategoryVmaskmov,
218 kCategoryVmovddup,
219 kCategoryVmovmskpd,
220 kCategoryVmovmskps,
221 kCategoryVmov1_2,
222 kCategoryVmov1_4,
223 kCategoryVmov1_8,
224 kCategoryVmov2_1,
225 kCategoryVmov4_1,
226 kCategoryVmov8_1
227 };
228
229 uint8_t category;
230 uint8_t rmInfo;
231 uint8_t opInfoIndex[6];
232 };
233
234 struct RWInfoOp {
235 uint64_t rByteMask;
236 uint64_t wByteMask;
237 uint8_t physId;
238 uint8_t reserved[3];
239 uint32_t flags;
240 };
241
242 //! R/M information.
243 //!
244 //! This data is used to replace register operand by a memory operand reliably.
245 struct RWInfoRm {
246 enum Category : uint8_t {
247 kCategoryNone = 0,
248 kCategoryFixed,
249 kCategoryConsistent,
250 kCategoryHalf,
251 kCategoryQuarter,
252 kCategoryEighth
253 };
254
255 enum Flags : uint8_t {
256 kFlagAmbiguous = 0x01
257 };
258
259 uint8_t category;
260 uint8_t rmOpsMask;
261 uint8_t fixedSize;
262 uint8_t flags;
263 uint8_t rmFeature;
264 };
265
266 struct RWFlagsInfoTable {
267 //! CPU/FPU flags read.
268 uint32_t readFlags;
269 //! CPU/FPU flags written or undefined.
270 uint32_t writeFlags;
271 };
272
273 extern const uint8_t rwInfoIndex[Inst::_kIdCount * 2];
274 extern const RWInfo rwInfo[];
275 extern const RWInfoOp rwInfoOp[];
276 extern const RWInfoRm rwInfoRm[];
277 extern const RWFlagsInfoTable _rwFlagsInfoTable[];
278
279 // ============================================================================
280 // [asmjit::x86::InstDB::Tables]
281 // ============================================================================
282
283 extern const uint32_t _mainOpcodeTable[];
284 extern const uint32_t _altOpcodeTable[];
285
286 #ifndef ASMJIT_NO_TEXT
287 extern const char _nameData[];
288 extern const InstNameIndex instNameIndex[26];
289 #endif // !ASMJIT_NO_TEXT
290
291 extern const CommonInfoTableB _commonInfoTableB[];
292
293 } // {InstDB}
294
295 //! \}
296 //! \endcond
297
298 ASMJIT_END_SUB_NAMESPACE
299
300 #endif // _ASMJIT_X86_X86INSTDB_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifdef ASMJIT_BUILD_X86
8
9 #include "../core/logging.h"
10 #include "../core/string.h"
11 #include "../core/support.h"
12 #include "../core/type.h"
13 #include "../x86/x86internal_p.h"
14
15 // Can be used for debugging...
16 // #define ASMJIT_DUMP_ARGS_ASSIGNMENT
17
18 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
19
20 // ============================================================================
21 // [asmjit::X86Internal - Helpers]
22 // ============================================================================
23
24 static ASMJIT_INLINE uint32_t x86GetXmmMovInst(const FuncFrame& frame) {
25 bool avx = frame.isAvxEnabled();
26 bool aligned = frame.hasAlignedVecSR();
27
28 return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
29 : (avx ? Inst::kIdVmovups : Inst::kIdMovups);
30 }
31
32 static ASMJIT_INLINE uint32_t x86VecTypeIdToRegType(uint32_t typeId) noexcept {
33 return typeId <= Type::_kIdVec128End ? Reg::kTypeXmm :
34 typeId <= Type::_kIdVec256End ? Reg::kTypeYmm : Reg::kTypeZmm;
35 }
36
37 //! Converts `size` to a 'kmov?' instructio.
38 static inline uint32_t x86KmovFromSize(uint32_t size) noexcept {
39 switch (size) {
40 case 1: return Inst::kIdKmovb;
41 case 2: return Inst::kIdKmovw;
42 case 4: return Inst::kIdKmovd;
43 case 8: return Inst::kIdKmovq;
44 default: return Inst::kIdNone;
45 }
46 }
47
48 // ============================================================================
49 // [asmjit::X86Internal - FuncDetail]
50 // ============================================================================
51
52 ASMJIT_FAVOR_SIZE Error X86Internal::initFuncDetail(FuncDetail& func, const FuncSignature& sign, uint32_t gpSize) noexcept {
53 ASMJIT_UNUSED(sign);
54
55 const CallConv& cc = func.callConv();
56 uint32_t archId = cc.archId();
57 uint32_t stackOffset = cc._spillZoneSize;
58
59 uint32_t i;
60 uint32_t argCount = func.argCount();
61
62 if (func.retCount() != 0) {
63 uint32_t typeId = func._rets[0].typeId();
64 switch (typeId) {
65 case Type::kIdI64:
66 case Type::kIdU64: {
67 if (archId == ArchInfo::kIdX86) {
68 // Convert a 64-bit return value to two 32-bit return values.
69 func._retCount = 2;
70 typeId -= 2;
71
72 // 64-bit value is returned in EDX:EAX on X86.
73 func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, typeId);
74 func._rets[1].initReg(Reg::kTypeGpd, Gp::kIdDx, typeId);
75 break;
76 }
77 else {
78 func._rets[0].initReg(Reg::kTypeGpq, Gp::kIdAx, typeId);
79 }
80 break;
81 }
82
83 case Type::kIdI8:
84 case Type::kIdI16:
85 case Type::kIdI32: {
86 func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdI32);
87 break;
88 }
89
90 case Type::kIdU8:
91 case Type::kIdU16:
92 case Type::kIdU32: {
93 func._rets[0].initReg(Reg::kTypeGpd, Gp::kIdAx, Type::kIdU32);
94 break;
95 }
96
97 case Type::kIdF32:
98 case Type::kIdF64: {
99 uint32_t regType = (archId == ArchInfo::kIdX86) ? Reg::kTypeSt : Reg::kTypeXmm;
100 func._rets[0].initReg(regType, 0, typeId);
101 break;
102 }
103
104 case Type::kIdF80: {
105 // 80-bit floats are always returned by FP0.
106 func._rets[0].initReg(Reg::kTypeSt, 0, typeId);
107 break;
108 }
109
110 case Type::kIdMmx32:
111 case Type::kIdMmx64: {
112 // MM registers are returned through XMM or GPQ (Win64).
113 uint32_t regType = Reg::kTypeMm;
114 if (archId != ArchInfo::kIdX86)
115 regType = cc.strategy() == CallConv::kStrategyDefault ? Reg::kTypeXmm : Reg::kTypeGpq;
116
117 func._rets[0].initReg(regType, 0, typeId);
118 break;
119 }
120
121 default: {
122 func._rets[0].initReg(x86VecTypeIdToRegType(typeId), 0, typeId);
123 break;
124 }
125 }
126 }
127
128 if (cc.strategy() == CallConv::kStrategyDefault) {
129 uint32_t gpzPos = 0;
130 uint32_t vecPos = 0;
131
132 for (i = 0; i < argCount; i++) {
133 FuncValue& arg = func._args[i];
134 uint32_t typeId = arg.typeId();
135
136 if (Type::isInt(typeId)) {
137 uint32_t regId = gpzPos < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupGp].id[gpzPos] : uint8_t(BaseReg::kIdBad);
138 if (regId != BaseReg::kIdBad) {
139 uint32_t regType = (typeId <= Type::kIdU32) ? Reg::kTypeGpd : Reg::kTypeGpq;
140 arg.assignRegData(regType, regId);
141 func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
142 gpzPos++;
143 }
144 else {
145 uint32_t size = Support::max<uint32_t>(Type::sizeOf(typeId), gpSize);
146 arg.assignStackOffset(int32_t(stackOffset));
147 stackOffset += size;
148 }
149 continue;
150 }
151
152 if (Type::isFloat(typeId) || Type::isVec(typeId)) {
153 uint32_t regId = vecPos < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupVec].id[vecPos] : uint8_t(BaseReg::kIdBad);
154
155 // If this is a float, but `floatByVec` is false, we have to pass by stack.
156 if (Type::isFloat(typeId) && !cc.hasFlag(CallConv::kFlagPassFloatsByVec))
157 regId = BaseReg::kIdBad;
158
159 if (regId != BaseReg::kIdBad) {
160 arg.initTypeId(typeId);
161 arg.assignRegData(x86VecTypeIdToRegType(typeId), regId);
162 func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
163 vecPos++;
164 }
165 else {
166 uint32_t size = Type::sizeOf(typeId);
167 arg.assignStackOffset(int32_t(stackOffset));
168 stackOffset += size;
169 }
170 continue;
171 }
172 }
173 }
174
175 if (cc.strategy() == CallConv::kStrategyWin64) {
176 for (i = 0; i < argCount; i++) {
177 FuncValue& arg = func._args[i];
178
179 uint32_t typeId = arg.typeId();
180 uint32_t size = Type::sizeOf(typeId);
181
182 if (Type::isInt(typeId) || Type::isMmx(typeId)) {
183 uint32_t regId = i < CallConv::kMaxRegArgsPerGroup ? cc._passedOrder[Reg::kGroupGp].id[i] : uint8_t(BaseReg::kIdBad);
184 if (regId != BaseReg::kIdBad) {
185 uint32_t regType = (size <= 4 && !Type::isMmx(typeId)) ? Reg::kTypeGpd : Reg::kTypeGpq;
186 arg.assignRegData(regType, regId);
187 func.addUsedRegs(Reg::kGroupGp, Support::bitMask(regId));
188 }
189 else {
190 arg.assignStackOffset(int32_t(stackOffset));
191 stackOffset += gpSize;
192 }
193 continue;
194 }
195
196 if (Type::isFloat(typeId) || Type::isVec(typeId)) {
197 uint32_t regId = BaseReg::kIdBad;
198 if (i < CallConv::kMaxRegArgsPerGroup)
199 regId = cc._passedOrder[Reg::kGroupVec].id[i];
200
201 if (regId != BaseReg::kIdBad && (Type::isFloat(typeId) || cc.hasFlag(CallConv::kFlagVectorCall))) {
202 uint32_t regType = x86VecTypeIdToRegType(typeId);
203 arg.assignRegData(regType, regId);
204 func.addUsedRegs(Reg::kGroupVec, Support::bitMask(regId));
205 }
206 else {
207 arg.assignStackOffset(int32_t(stackOffset));
208 stackOffset += 8; // Always 8 bytes (float/double).
209 }
210 continue;
211 }
212 }
213 }
214
215 func._argStackSize = stackOffset;
216 return kErrorOk;
217 }
218
219 // ============================================================================
220 // [asmjit::X86FuncArgsContext]
221 // ============================================================================
222
223 static RegInfo x86GetRegForMemToMemMove(uint32_t archId, uint32_t dstTypeId, uint32_t srcTypeId) noexcept {
224 uint32_t dstSize = Type::sizeOf(dstTypeId);
225 uint32_t srcSize = Type::sizeOf(srcTypeId);
226 uint32_t maxSize = Support::max<uint32_t>(dstSize, srcSize);
227 uint32_t gpSize = archId == ArchInfo::kIdX86 ? 4 : 8;
228
229 uint32_t signature = 0;
230 if (maxSize <= gpSize || (Type::isInt(dstTypeId) && Type::isInt(srcTypeId)))
231 signature = maxSize <= 4 ? Gpd::kSignature : Gpq::kSignature;
232 else if (maxSize <= 16)
233 signature = Xmm::kSignature;
234 else if (maxSize <= 32)
235 signature = Ymm::kSignature;
236 else if (maxSize <= 64)
237 signature = Zmm::kSignature;
238
239 return RegInfo { signature };
240 }
241
242 // Used by both `argsToFuncFrame()` and `emitArgsAssignment()`.
243 class X86FuncArgsContext {
244 public:
245 enum VarId : uint32_t {
246 kVarIdNone = 0xFF
247 };
248
249 //! Contains information about a single argument or SA register that may need shuffling.
250 struct Var {
251 inline void init(const FuncValue& cur_, const FuncValue& out_) noexcept {
252 cur = cur_;
253 out = out_;
254 }
255
256 //! Reset the value to its unassigned state.
257 inline void reset() noexcept {
258 cur.reset();
259 out.reset();
260 }
261
262 inline bool isDone() const noexcept { return cur.isDone(); }
263 inline void markDone() noexcept { cur.addFlags(FuncValue::kFlagIsDone); }
264
265 FuncValue cur;
266 FuncValue out;
267 };
268
269 struct WorkData {
270 inline void reset() noexcept {
271 _archRegs = 0;
272 _workRegs = 0;
273 _usedRegs = 0;
274 _assignedRegs = 0;
275 _dstRegs = 0;
276 _dstShuf = 0;
277 _numSwaps = 0;
278 _numStackArgs = 0;
279 memset(_reserved, 0, sizeof(_reserved));
280 memset(_physToVarId, kVarIdNone, 32);
281 }
282
283 inline bool isAssigned(uint32_t regId) const noexcept {
284 ASMJIT_ASSERT(regId < 32);
285 return Support::bitTest(_assignedRegs, regId);
286 }
287
288 inline void assign(uint32_t varId, uint32_t regId) noexcept {
289 ASMJIT_ASSERT(!isAssigned(regId));
290 ASMJIT_ASSERT(_physToVarId[regId] == kVarIdNone);
291
292 _physToVarId[regId] = uint8_t(varId);
293 _assignedRegs ^= Support::bitMask(regId);
294 }
295
296 inline void reassign(uint32_t varId, uint32_t newId, uint32_t oldId) noexcept {
297 ASMJIT_ASSERT( isAssigned(oldId));
298 ASMJIT_ASSERT(!isAssigned(newId));
299 ASMJIT_ASSERT(_physToVarId[oldId] == varId);
300 ASMJIT_ASSERT(_physToVarId[newId] == kVarIdNone);
301
302 _physToVarId[oldId] = uint8_t(kVarIdNone);
303 _physToVarId[newId] = uint8_t(varId);
304 _assignedRegs ^= Support::bitMask(newId) ^ Support::bitMask(oldId);
305 }
306
307 inline void swap(uint32_t aVarId, uint32_t aRegId, uint32_t bVarId, uint32_t bRegId) noexcept {
308 ASMJIT_ASSERT(isAssigned(aRegId));
309 ASMJIT_ASSERT(isAssigned(bRegId));
310 ASMJIT_ASSERT(_physToVarId[aRegId] == aVarId);
311 ASMJIT_ASSERT(_physToVarId[bRegId] == bVarId);
312
313 _physToVarId[aRegId] = uint8_t(bVarId);
314 _physToVarId[bRegId] = uint8_t(aVarId);
315 }
316
317 inline void unassign(uint32_t varId, uint32_t regId) noexcept {
318 ASMJIT_UNUSED(varId);
319 ASMJIT_ASSERT(isAssigned(regId));
320 ASMJIT_ASSERT(_physToVarId[regId] == varId);
321
322 _physToVarId[regId] = uint8_t(kVarIdNone);
323 _assignedRegs ^= Support::bitMask(regId);
324 }
325
326 inline uint32_t archRegs() const noexcept { return _archRegs; }
327 inline uint32_t workRegs() const noexcept { return _workRegs; }
328 inline uint32_t usedRegs() const noexcept { return _usedRegs; }
329 inline uint32_t assignedRegs() const noexcept { return _assignedRegs; }
330 inline uint32_t dstRegs() const noexcept { return _dstRegs; }
331 inline uint32_t availableRegs() const noexcept { return _workRegs & ~_assignedRegs; }
332
333 uint32_t _archRegs; //!< All allocable registers provided by the architecture.
334 uint32_t _workRegs; //!< All registers that can be used by the shuffler.
335 uint32_t _usedRegs; //!< Registers used by the shuffler (all).
336 uint32_t _assignedRegs; //!< Assigned registers.
337 uint32_t _dstRegs; //!< Destination registers assigned to arguments or SA.
338 uint32_t _dstShuf; //!< Destination registers that require shuffling.
339 uint8_t _numSwaps; //!< Number of register swaps.
340 uint8_t _numStackArgs; //!< Number of stack loads.
341 uint8_t _reserved[6]; //!< Reserved (only used as padding).
342 uint8_t _physToVarId[32]; //!< Physical ID to variable ID mapping.
343 };
344
345 uint8_t _archId;
346 bool _hasStackSrc; //!< Has arguments passed via stack (SRC).
347 bool _hasPreservedFP; //!< Has preserved frame-pointer (FP).
348 uint8_t _stackDstMask; //!< Has arguments assigned to stack (DST).
349 uint8_t _regSwapsMask; //!< Register swap groups (bit-mask).
350 uint8_t _saVarId;
351 uint32_t _varCount;
352 WorkData _workData[BaseReg::kGroupVirt];
353 Var _vars[kFuncArgCountLoHi + 1];
354
355 X86FuncArgsContext() noexcept;
356
357 inline uint32_t archId() const noexcept { return _archId; }
358 inline uint32_t varCount() const noexcept { return _varCount; }
359
360 inline Var& var(uint32_t varId) noexcept { return _vars[varId]; }
361 inline const Var& var(uint32_t varId) const noexcept { return _vars[varId]; }
362 inline uint32_t indexOf(const Var* var) const noexcept { return uint32_t((size_t)(var - _vars)); }
363
364 Error initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept;
365 Error markScratchRegs(FuncFrame& frame) noexcept;
366 Error markDstRegsDirty(FuncFrame& frame) noexcept;
367 Error markStackArgsReg(FuncFrame& frame) noexcept;
368 };
369
370 X86FuncArgsContext::X86FuncArgsContext() noexcept {
371 _archId = ArchInfo::kIdNone;
372 _varCount = 0;
373 _hasStackSrc = false;
374 _hasPreservedFP = false;
375 _stackDstMask = 0;
376 _regSwapsMask = 0;
377 _saVarId = kVarIdNone;
378
379 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++)
380 _workData[group].reset();
381 }
382
383 ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::initWorkData(const FuncFrame& frame, const FuncArgsAssignment& args) noexcept {
384 // The code has to be updated if this changes.
385 ASMJIT_ASSERT(BaseReg::kGroupVirt == 4);
386
387 uint32_t i;
388 const FuncDetail& func = *args.funcDetail();
389
390 // Initialize ArchType.
391 uint32_t archId = func.callConv().archId();
392 uint32_t archRegCount = (archId == ArchInfo::kIdX86) ? 8 : 16;
393
394 _archId = uint8_t(archId);
395
396 // Initialize `_archRegs`.
397 _workData[Reg::kGroupGp ]._archRegs = Support::lsbMask<uint32_t>(archRegCount) & ~Support::bitMask(Gp::kIdSp);
398 _workData[Reg::kGroupVec ]._archRegs = Support::lsbMask<uint32_t>(archRegCount);
399 _workData[Reg::kGroupMm ]._archRegs = Support::lsbMask<uint32_t>(8);
400 _workData[Reg::kGroupKReg]._archRegs = Support::lsbMask<uint32_t>(8);
401
402 if (frame.hasPreservedFP())
403 _workData[Reg::kGroupGp]._archRegs &= ~Support::bitMask(Gp::kIdBp);
404
405 // Extract information from all function arguments/assignments and build Var[] array.
406 uint32_t varId = 0;
407 for (i = 0; i < kFuncArgCountLoHi; i++) {
408 const FuncValue& dst_ = args.arg(i);
409 if (!dst_.isAssigned()) continue;
410
411 const FuncValue& src_ = func.arg(i);
412 if (ASMJIT_UNLIKELY(!src_.isAssigned()))
413 return DebugUtils::errored(kErrorInvalidState);
414
415 Var& var = _vars[varId];
416 var.init(src_, dst_);
417
418 FuncValue& src = var.cur;
419 FuncValue& dst = var.out;
420
421 uint32_t dstGroup = 0xFFFFFFFFu;
422 uint32_t dstId = BaseReg::kIdBad;
423 WorkData* dstWd = nullptr;
424
425 if (dst.isReg()) {
426 uint32_t dstType = dst.regType();
427 if (ASMJIT_UNLIKELY(dstType >= Reg::kTypeCount))
428 return DebugUtils::errored(kErrorInvalidRegType);
429
430 // Copy TypeId from source if the destination doesn't have it. The RA
431 // used by BaseCompiler would never leave TypeId undefined, but users
432 // of FuncAPI can just assign phys regs without specifying the type.
433 if (!dst.hasTypeId())
434 dst.setTypeId(Reg::typeIdOf(dst.regType()));
435
436 dstGroup = Reg::groupOf(dstType);
437 if (ASMJIT_UNLIKELY(dstGroup >= BaseReg::kGroupVirt))
438 return DebugUtils::errored(kErrorInvalidRegGroup);
439
440 dstWd = &_workData[dstGroup];
441 dstId = dst.regId();
442 if (ASMJIT_UNLIKELY(dstId >= 32 || !Support::bitTest(dstWd->archRegs(), dstId)))
443 return DebugUtils::errored(kErrorInvalidPhysId);
444
445 if (ASMJIT_UNLIKELY(Support::bitTest(dstWd->dstRegs(), dstId)))
446 return DebugUtils::errored(kErrorOverlappedRegs);
447
448 dstWd->_dstRegs |= Support::bitMask(dstId);
449 dstWd->_dstShuf |= Support::bitMask(dstId);
450 dstWd->_usedRegs |= Support::bitMask(dstId);
451 }
452 else {
453 if (!dst.hasTypeId())
454 dst.setTypeId(src.typeId());
455
456 RegInfo regInfo = x86GetRegForMemToMemMove(archId, dst.typeId(), src.typeId());
457 if (ASMJIT_UNLIKELY(!regInfo.isValid()))
458 return DebugUtils::errored(kErrorInvalidState);
459 _stackDstMask = uint8_t(_stackDstMask | Support::bitMask(regInfo.group()));
460 }
461
462 if (src.isReg()) {
463 uint32_t srcId = src.regId();
464 uint32_t srcGroup = Reg::groupOf(src.regType());
465
466 if (dstGroup == srcGroup) {
467 dstWd->assign(varId, srcId);
468
469 // The best case, register is allocated where it is expected to be.
470 if (dstId == srcId)
471 var.markDone();
472 }
473 else {
474 if (ASMJIT_UNLIKELY(srcGroup >= BaseReg::kGroupVirt))
475 return DebugUtils::errored(kErrorInvalidState);
476
477 WorkData& srcData = _workData[srcGroup];
478 srcData.assign(varId, srcId);
479 }
480 }
481 else {
482 if (dstWd)
483 dstWd->_numStackArgs++;
484 _hasStackSrc = true;
485 }
486
487 varId++;
488 }
489
490 // Initialize WorkData::workRegs.
491 for (i = 0; i < BaseReg::kGroupVirt; i++)
492 _workData[i]._workRegs = (_workData[i].archRegs() & (frame.dirtyRegs(i) | ~frame.preservedRegs(i))) | _workData[i].dstRegs() | _workData[i].assignedRegs();
493
494 // Create a variable that represents `SARegId` if necessary.
495 bool saRegRequired = _hasStackSrc && frame.hasDynamicAlignment() && !frame.hasPreservedFP();
496
497 WorkData& gpRegs = _workData[BaseReg::kGroupGp];
498 uint32_t saCurRegId = frame.saRegId();
499 uint32_t saOutRegId = args.saRegId();
500
501 if (saCurRegId != BaseReg::kIdBad) {
502 // Check if the provided `SARegId` doesn't collide with input registers.
503 if (ASMJIT_UNLIKELY(gpRegs.isAssigned(saCurRegId)))
504 return DebugUtils::errored(kErrorOverlappedRegs);
505 }
506
507 if (saOutRegId != BaseReg::kIdBad) {
508 // Check if the provided `SARegId` doesn't collide with argument assignments.
509 if (ASMJIT_UNLIKELY(Support::bitTest(gpRegs.dstRegs(), saOutRegId)))
510 return DebugUtils::errored(kErrorOverlappedRegs);
511 saRegRequired = true;
512 }
513
514 if (saRegRequired) {
515 uint32_t ptrTypeId = (archId == ArchInfo::kIdX86) ? Type::kIdU32 : Type::kIdU64;
516 uint32_t ptrRegType = (archId == ArchInfo::kIdX86) ? BaseReg::kTypeGp32 : BaseReg::kTypeGp64;
517
518 _saVarId = uint8_t(varId);
519 _hasPreservedFP = frame.hasPreservedFP();
520
521 Var& var = _vars[varId];
522 var.reset();
523
524 if (saCurRegId == BaseReg::kIdBad) {
525 if (saOutRegId != BaseReg::kIdBad && !gpRegs.isAssigned(saOutRegId)) {
526 saCurRegId = saOutRegId;
527 }
528 else {
529 uint32_t availableRegs = gpRegs.availableRegs();
530 if (!availableRegs)
531 availableRegs = gpRegs.archRegs() & ~gpRegs.workRegs();
532
533 if (ASMJIT_UNLIKELY(!availableRegs))
534 return DebugUtils::errored(kErrorNoMorePhysRegs);
535
536 saCurRegId = Support::ctz(availableRegs);
537 }
538 }
539
540 var.cur.initReg(ptrRegType, saCurRegId, ptrTypeId);
541 gpRegs.assign(varId, saCurRegId);
542 gpRegs._workRegs |= Support::bitMask(saCurRegId);
543
544 if (saOutRegId != BaseReg::kIdBad) {
545 var.out.initReg(ptrRegType, saOutRegId, ptrTypeId);
546 gpRegs._dstRegs |= Support::bitMask(saOutRegId);
547 gpRegs._workRegs |= Support::bitMask(saOutRegId);
548 }
549 else {
550 var.markDone();
551 }
552
553 varId++;
554 }
555
556 _varCount = varId;
557
558 // Detect register swaps.
559 for (varId = 0; varId < _varCount; varId++) {
560 Var& var = _vars[varId];
561 if (var.cur.isReg() && var.out.isReg()) {
562 uint32_t srcId = var.cur.regId();
563 uint32_t dstId = var.out.regId();
564
565 uint32_t group = Reg::groupOf(var.cur.regType());
566 if (group != Reg::groupOf(var.out.regType()))
567 continue;
568
569 WorkData& wd = _workData[group];
570 if (wd.isAssigned(dstId)) {
571 Var& other = _vars[wd._physToVarId[dstId]];
572 if (Reg::groupOf(other.out.regType()) == group && other.out.regId() == srcId) {
573 wd._numSwaps++;
574 _regSwapsMask = uint8_t(_regSwapsMask | Support::bitMask(group));
575 }
576 }
577 }
578 }
579
580 return kErrorOk;
581 }
582
583 ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markDstRegsDirty(FuncFrame& frame) noexcept {
584 for (uint32_t i = 0; i < BaseReg::kGroupVirt; i++) {
585 WorkData& wd = _workData[i];
586 uint32_t regs = wd.usedRegs() | wd._dstShuf;
587
588 wd._workRegs |= regs;
589 frame.addDirtyRegs(i, regs);
590 }
591
592 return kErrorOk;
593 }
594
595 ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markScratchRegs(FuncFrame& frame) noexcept {
596 uint32_t groupMask = 0;
597
598 // Handle stack to stack moves.
599 groupMask |= _stackDstMask;
600
601 // Handle register swaps.
602 groupMask |= _regSwapsMask & ~Support::bitMask(BaseReg::kGroupGp);
603
604 if (!groupMask)
605 return kErrorOk;
606
607 // selects one dirty register per affected group that can be used as a scratch register.
608 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
609 if (Support::bitTest(groupMask, group)) {
610 WorkData& wd = _workData[group];
611
612 // Initially, pick some clobbered or dirty register.
613 uint32_t workRegs = wd.workRegs();
614 uint32_t regs = workRegs & ~(wd.usedRegs() | wd._dstShuf);
615
616 // If that didn't work out pick some register which is not in 'used'.
617 if (!regs) regs = workRegs & ~wd.usedRegs();
618
619 // If that didn't work out pick any other register that is allocable.
620 // This last resort case will, however, result in marking one more
621 // register dirty.
622 if (!regs) regs = wd.archRegs() & ~workRegs;
623
624 // If that didn't work out we will have to use XORs instead of MOVs.
625 if (!regs) continue;
626
627 uint32_t regMask = Support::blsi(regs);
628 wd._workRegs |= regMask;
629 frame.addDirtyRegs(group, regMask);
630 }
631 }
632
633 return kErrorOk;
634 }
635
636 ASMJIT_FAVOR_SIZE Error X86FuncArgsContext::markStackArgsReg(FuncFrame& frame) noexcept {
637 // TODO: Validate, improve...
638 if (_saVarId != kVarIdNone) {
639 const Var& var = _vars[_saVarId];
640 frame.setSARegId(var.cur.regId());
641 }
642 else if (frame.hasPreservedFP()) {
643 // Always EBP|RBP if the frame-pointer isn't omitted.
644 frame.setSARegId(Gp::kIdBp);
645 }
646
647 return kErrorOk;
648 }
649
650 // ============================================================================
651 // [asmjit::X86Internal - FrameLayout]
652 // ============================================================================
653
654 ASMJIT_FAVOR_SIZE Error X86Internal::initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept {
655 uint32_t archId = func.callConv().archId();
656
657 // Initializing FuncFrame means making a copy of some properties of `func`.
658 // Properties like `_localStackSize` will be set by the user before the frame
659 // is finalized.
660 frame.reset();
661
662 frame._archId = uint8_t(archId);
663 frame._spRegId = Gp::kIdSp;
664 frame._saRegId = Gp::kIdBad;
665
666 uint32_t naturalStackAlignment = func.callConv().naturalStackAlignment();
667 uint32_t minDynamicAlignment = Support::max<uint32_t>(naturalStackAlignment, 16);
668
669 if (minDynamicAlignment == naturalStackAlignment)
670 minDynamicAlignment <<= 1;
671
672 frame._naturalStackAlignment = uint8_t(naturalStackAlignment);
673 frame._minDynamicAlignment = uint8_t(minDynamicAlignment);
674 frame._redZoneSize = uint8_t(func.redZoneSize());
675 frame._spillZoneSize = uint8_t(func.spillZoneSize());
676 frame._finalStackAlignment = uint8_t(frame._naturalStackAlignment);
677
678 if (func.hasFlag(CallConv::kFlagCalleePopsStack)) {
679 frame._calleeStackCleanup = uint16_t(func.argStackSize());
680 }
681
682 // Initial masks of dirty and preserved registers.
683 for (uint32_t group = 0; group < BaseReg::kGroupVirt; group++) {
684 frame._dirtyRegs[group] = func.usedRegs(group);
685 frame._preservedRegs[group] = func.preservedRegs(group);
686 }
687
688 // Exclude ESP/RSP - this register is never included in saved GP regs.
689 frame._preservedRegs[BaseReg::kGroupGp] &= ~Support::bitMask(Gp::kIdSp);
690
691 return kErrorOk;
692 }
693
694 ASMJIT_FAVOR_SIZE Error X86Internal::finalizeFuncFrame(FuncFrame& frame) noexcept {
695 uint32_t gpSize = frame.archId() == ArchInfo::kIdX86 ? 4 : 8;
696
697 // The final stack alignment must be updated accordingly to call and local stack alignments.
698 uint32_t stackAlignment = frame._finalStackAlignment;
699 ASMJIT_ASSERT(stackAlignment == Support::max(frame._naturalStackAlignment,
700 frame._callStackAlignment,
701 frame._localStackAlignment));
702
703 // TODO: Must be configurable.
704 uint32_t vecSize = 16;
705
706 bool hasFP = frame.hasPreservedFP();
707 bool hasDA = frame.hasDynamicAlignment();
708
709 // Include EBP|RBP if the function preserves the frame-pointer.
710 if (hasFP)
711 frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(Gp::kIdBp);
712
713 // These two are identical if the function doesn't align its stack dynamically.
714 uint32_t saRegId = frame.saRegId();
715 if (saRegId == BaseReg::kIdBad)
716 saRegId = Gp::kIdSp;
717
718 // Fix stack arguments base-register from ESP|RSP to EBP|RBP in case it was
719 // not picked before and the function performs dynamic stack alignment.
720 if (hasDA && saRegId == Gp::kIdSp)
721 saRegId = Gp::kIdBp;
722
723 // Mark as dirty any register but ESP|RSP if used as SA pointer.
724 if (saRegId != Gp::kIdSp)
725 frame._dirtyRegs[Reg::kGroupGp] |= Support::bitMask(saRegId);
726
727 frame._spRegId = uint8_t(Gp::kIdSp);
728 frame._saRegId = uint8_t(saRegId);
729
730 // Setup stack size used to save preserved registers.
731 frame._gpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupGp )) * gpSize);
732 frame._nonGpSaveSize = uint16_t(Support::popcnt(frame.savedRegs(Reg::kGroupVec )) * vecSize +
733 Support::popcnt(frame.savedRegs(Reg::kGroupMm )) * 8 +
734 Support::popcnt(frame.savedRegs(Reg::kGroupKReg)) * 8);
735
736 uint32_t v = 0; // The beginning of the stack frame relative to SP after prolog.
737 v += frame.callStackSize(); // Count 'callStackSize' <- This is used to call functions.
738 v = Support::alignUp(v, stackAlignment); // Align to function's stack alignment.
739
740 frame._localStackOffset = v; // Store 'localStackOffset' <- Function's local stack starts here.
741 v += frame.localStackSize(); // Count 'localStackSize' <- Function's local stack ends here.
742
743 // If the function's stack must be aligned, calculate the alignment necessary
744 // to store vector registers, and set `FuncFrame::kAttrAlignedVecSR` to inform
745 // PEI that it can use instructions that perform aligned stores/loads.
746 if (stackAlignment >= vecSize && frame._nonGpSaveSize) {
747 frame.addAttributes(FuncFrame::kAttrAlignedVecSR);
748 v = Support::alignUp(v, vecSize); // Align '_nonGpSaveOffset'.
749 }
750
751 frame._nonGpSaveOffset = v; // Store '_nonGpSaveOffset' <- Non-GP Save/Restore starts here.
752 v += frame._nonGpSaveSize; // Count '_nonGpSaveSize' <- Non-GP Save/Restore ends here.
753
754 // Calculate if dynamic alignment (DA) slot (stored as offset relative to SP) is required and its offset.
755 if (hasDA && !hasFP) {
756 frame._daOffset = v; // Store 'daOffset' <- DA pointer would be stored here.
757 v += gpSize; // Count 'daOffset'.
758 }
759 else {
760 frame._daOffset = FuncFrame::kTagInvalidOffset;
761 }
762
763 // The return address should be stored after GP save/restore regs. It has
764 // the same size as `gpSize` (basically the native register/pointer size).
765 // We don't adjust it now as `v` now contains the exact size that the
766 // function requires to adjust (call frame + stack frame, vec stack size).
767 // The stack (if we consider this size) is misaligned now, as it's always
768 // aligned before the function call - when `call()` is executed it pushes
769 // the current EIP|RIP onto the stack, and misaligns it by 12 or 8 bytes
770 // (depending on the architecture). So count number of bytes needed to align
771 // it up to the function's CallFrame (the beginning).
772 if (v || frame.hasFuncCalls())
773 v += Support::alignUpDiff(v + frame.gpSaveSize() + gpSize, stackAlignment);
774
775 frame._gpSaveOffset = v; // Store 'gpSaveOffset' <- Function's GP Save/Restore starts here.
776 frame._stackAdjustment = v; // Store 'stackAdjustment' <- SA used by 'add zsp, SA' and 'sub zsp, SA'.
777
778 v += frame._gpSaveSize; // Count 'gpSaveSize' <- Function's GP Save/Restore ends here.
779 v += gpSize; // Count 'ReturnAddress' <- As CALL pushes onto stack.
780
781 // If the function performs dynamic stack alignment then the stack-adjustment must be aligned.
782 if (hasDA)
783 frame._stackAdjustment = Support::alignUp(frame._stackAdjustment, stackAlignment);
784
785 uint32_t saInvOff = FuncFrame::kTagInvalidOffset;
786 uint32_t saTmpOff = gpSize + frame._gpSaveSize;
787
788 // Calculate where the function arguments start relative to SP.
789 frame._saOffsetFromSP = hasDA ? saInvOff : v;
790
791 // Calculate where the function arguments start relative to FP or user-provided register.
792 frame._saOffsetFromSA = hasFP ? gpSize * 2 // Return address + frame pointer.
793 : saTmpOff; // Return address + all saved GP regs.
794
795 return kErrorOk;
796 }
797
798 // ============================================================================
799 // [asmjit::X86Internal - ArgsToFrameInfo]
800 // ============================================================================
801
802 ASMJIT_FAVOR_SIZE Error X86Internal::argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept {
803 X86FuncArgsContext ctx;
804 ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
805 ASMJIT_PROPAGATE(ctx.markDstRegsDirty(frame));
806 ASMJIT_PROPAGATE(ctx.markScratchRegs(frame));
807 ASMJIT_PROPAGATE(ctx.markStackArgsReg(frame));
808 return kErrorOk;
809 }
810
811 // ============================================================================
812 // [asmjit::X86Internal - Emit Helpers]
813 // ============================================================================
814
815 ASMJIT_FAVOR_SIZE Error X86Internal::emitRegMove(Emitter* emitter,
816 const Operand_& dst_,
817 const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment) {
818
819 // Invalid or abstract TypeIds are not allowed.
820 ASMJIT_ASSERT(Type::isValid(typeId) && !Type::isAbstract(typeId));
821
822 Operand dst(dst_);
823 Operand src(src_);
824
825 uint32_t instId = Inst::kIdNone;
826 uint32_t memFlags = 0;
827 uint32_t overrideMemSize = 0;
828
829 enum MemFlags : uint32_t {
830 kDstMem = 0x1,
831 kSrcMem = 0x2
832 };
833
834 // Detect memory operands and patch them to have the same size as the register.
835 // BaseCompiler always sets memory size of allocs and spills, so it shouldn't
836 // be really necessary, however, after this function was separated from Compiler
837 // it's better to make sure that the size is always specified, as we can use
838 // 'movzx' and 'movsx' that rely on it.
839 if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
840 if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
841
842 switch (typeId) {
843 case Type::kIdI8:
844 case Type::kIdU8:
845 case Type::kIdI16:
846 case Type::kIdU16:
847 // Special case - 'movzx' load.
848 if (memFlags & kSrcMem) {
849 instId = Inst::kIdMovzx;
850 dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
851 }
852 else if (!memFlags) {
853 // Change both destination and source registers to GPD (safer, no dependencies).
854 dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
855 src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
856 }
857 ASMJIT_FALLTHROUGH;
858
859 case Type::kIdI32:
860 case Type::kIdU32:
861 case Type::kIdI64:
862 case Type::kIdU64:
863 instId = Inst::kIdMov;
864 break;
865
866 case Type::kIdMmx32:
867 instId = Inst::kIdMovd;
868 if (memFlags) break;
869 ASMJIT_FALLTHROUGH;
870
871 case Type::kIdMmx64 : instId = Inst::kIdMovq ; break;
872 case Type::kIdMask8 : instId = Inst::kIdKmovb; break;
873 case Type::kIdMask16: instId = Inst::kIdKmovw; break;
874 case Type::kIdMask32: instId = Inst::kIdKmovd; break;
875 case Type::kIdMask64: instId = Inst::kIdKmovq; break;
876
877 default: {
878 uint32_t elementTypeId = Type::baseOf(typeId);
879 if (Type::isVec32(typeId) && memFlags) {
880 overrideMemSize = 4;
881 if (elementTypeId == Type::kIdF32)
882 instId = avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
883 else
884 instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
885 break;
886 }
887
888 if (Type::isVec64(typeId) && memFlags) {
889 overrideMemSize = 8;
890 if (elementTypeId == Type::kIdF64)
891 instId = avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
892 else
893 instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
894 break;
895 }
896
897 if (elementTypeId == Type::kIdF32)
898 instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
899 else if (elementTypeId == Type::kIdF64)
900 instId = avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
901 else if (typeId <= Type::_kIdVec256End)
902 instId = avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
903 else if (elementTypeId <= Type::kIdU32)
904 instId = Inst::kIdVmovdqa32;
905 else
906 instId = Inst::kIdVmovdqa64;
907 break;
908 }
909 }
910
911 if (!instId)
912 return DebugUtils::errored(kErrorInvalidState);
913
914 if (overrideMemSize) {
915 if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
916 if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
917 }
918
919 emitter->setInlineComment(comment);
920 return emitter->emit(instId, dst, src);
921 }
922
923 ASMJIT_FAVOR_SIZE Error X86Internal::emitArgMove(Emitter* emitter,
924 const Reg& dst_, uint32_t dstTypeId,
925 const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment) {
926
927 // Deduce optional `dstTypeId`, which may be `Type::kIdVoid` in some cases.
928 if (!dstTypeId) dstTypeId = opData.archRegs.regTypeToTypeId[dst_.type()];
929
930 // Invalid or abstract TypeIds are not allowed.
931 ASMJIT_ASSERT(Type::isValid(dstTypeId) && !Type::isAbstract(dstTypeId));
932 ASMJIT_ASSERT(Type::isValid(srcTypeId) && !Type::isAbstract(srcTypeId));
933
934 Reg dst(dst_);
935 Operand src(src_);
936
937 uint32_t dstSize = Type::sizeOf(dstTypeId);
938 uint32_t srcSize = Type::sizeOf(srcTypeId);
939
940 uint32_t instId = Inst::kIdNone;
941
942 // Not a real loop, just 'break' is nicer than 'goto'.
943 for (;;) {
944 if (Type::isInt(dstTypeId)) {
945 if (Type::isInt(srcTypeId)) {
946 instId = Inst::kIdMovsx;
947 uint32_t typeOp = (dstTypeId << 8) | srcTypeId;
948
949 // Sign extend by using 'movsx'.
950 if (typeOp == ((Type::kIdI16 << 8) | Type::kIdI8 ) ||
951 typeOp == ((Type::kIdI32 << 8) | Type::kIdI8 ) ||
952 typeOp == ((Type::kIdI32 << 8) | Type::kIdI16) ||
953 typeOp == ((Type::kIdI64 << 8) | Type::kIdI8 ) ||
954 typeOp == ((Type::kIdI64 << 8) | Type::kIdI16)) break;
955
956 // Sign extend by using 'movsxd'.
957 instId = Inst::kIdMovsxd;
958 if (typeOp == ((Type::kIdI64 << 8) | Type::kIdI32)) break;
959 }
960
961 if (Type::isInt(srcTypeId) || src_.isMem()) {
962 // Zero extend by using 'movzx' or 'mov'.
963 if (dstSize <= 4 && srcSize < 4) {
964 instId = Inst::kIdMovzx;
965 dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
966 }
967 else {
968 // We should have caught all possibilities where `srcSize` is less
969 // than 4, so we don't have to worry about 'movzx' anymore. Minimum
970 // size is enough to determine if we want 32-bit or 64-bit move.
971 instId = Inst::kIdMov;
972 srcSize = Support::min(srcSize, dstSize);
973
974 dst.setSignature(srcSize == 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
975 : Reg::signatureOfT<Reg::kTypeGpq>());
976 if (src.isReg()) src.setSignature(dst.signature());
977 }
978 break;
979 }
980
981 // NOTE: The previous branch caught all memory sources, from here it's
982 // always register to register conversion, so catch the remaining cases.
983 srcSize = Support::min(srcSize, dstSize);
984
985 if (Type::isMmx(srcTypeId)) {
986 // 64-bit move.
987 instId = Inst::kIdMovq;
988 if (srcSize == 8) break;
989
990 // 32-bit move.
991 instId = Inst::kIdMovd;
992 dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
993 break;
994 }
995
996 if (Type::isMask(srcTypeId)) {
997 instId = x86KmovFromSize(srcSize);
998 dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<Reg::kTypeGpd>()
999 : Reg::signatureOfT<Reg::kTypeGpq>());
1000 break;
1001 }
1002
1003 if (Type::isVec(srcTypeId)) {
1004 // 64-bit move.
1005 instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
1006 if (srcSize == 8) break;
1007
1008 // 32-bit move.
1009 instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
1010 dst.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
1011 break;
1012 }
1013 }
1014
1015 if (Type::isMmx(dstTypeId)) {
1016 instId = Inst::kIdMovq;
1017 srcSize = Support::min(srcSize, dstSize);
1018
1019 if (Type::isInt(srcTypeId) || src.isMem()) {
1020 // 64-bit move.
1021 if (srcSize == 8) break;
1022
1023 // 32-bit move.
1024 instId = Inst::kIdMovd;
1025 if (src.isReg()) src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
1026 break;
1027 }
1028
1029 if (Type::isMmx(srcTypeId)) break;
1030
1031 // This will hurt if `avxEnabled`.
1032 instId = Inst::kIdMovdq2q;
1033 if (Type::isVec(srcTypeId)) break;
1034 }
1035
1036 if (Type::isMask(dstTypeId)) {
1037 srcSize = Support::min(srcSize, dstSize);
1038
1039 if (Type::isInt(srcTypeId) || Type::isMask(srcTypeId) || src.isMem()) {
1040 instId = x86KmovFromSize(srcSize);
1041 if (Reg::isGp(src) && srcSize <= 4) src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
1042 break;
1043 }
1044 }
1045
1046 if (Type::isVec(dstTypeId)) {
1047 // By default set destination to XMM, will be set to YMM|ZMM if needed.
1048 dst.setSignature(Reg::signatureOfT<Reg::kTypeXmm>());
1049
1050 // This will hurt if `avxEnabled`.
1051 if (Reg::isMm(src)) {
1052 // 64-bit move.
1053 instId = Inst::kIdMovq2dq;
1054 break;
1055 }
1056
1057 // Argument conversion.
1058 uint32_t dstElement = Type::baseOf(dstTypeId);
1059 uint32_t srcElement = Type::baseOf(srcTypeId);
1060
1061 if (dstElement == Type::kIdF32 && srcElement == Type::kIdF64) {
1062 srcSize = Support::min(dstSize * 2, srcSize);
1063 dstSize = srcSize / 2;
1064
1065 if (srcSize <= 8)
1066 instId = avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
1067 else
1068 instId = avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
1069
1070 if (dstSize == 32)
1071 dst.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
1072 if (src.isReg())
1073 src.setSignature(Reg::signatureOfVecBySize(srcSize));
1074 break;
1075 }
1076
1077 if (dstElement == Type::kIdF64 && srcElement == Type::kIdF32) {
1078 srcSize = Support::min(dstSize, srcSize * 2) / 2;
1079 dstSize = srcSize * 2;
1080
1081 if (srcSize <= 4)
1082 instId = avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
1083 else
1084 instId = avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
1085
1086 dst.setSignature(Reg::signatureOfVecBySize(dstSize));
1087 if (src.isReg() && srcSize >= 32)
1088 src.setSignature(Reg::signatureOfT<Reg::kTypeYmm>());
1089 break;
1090 }
1091
1092 srcSize = Support::min(srcSize, dstSize);
1093 if (Reg::isGp(src) || src.isMem()) {
1094 // 32-bit move.
1095 if (srcSize <= 4) {
1096 instId = avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
1097 if (src.isReg()) src.setSignature(Reg::signatureOfT<Reg::kTypeGpd>());
1098 break;
1099 }
1100
1101 // 64-bit move.
1102 if (srcSize == 8) {
1103 instId = avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
1104 break;
1105 }
1106 }
1107
1108 if (Reg::isVec(src) || src.isMem()) {
1109 instId = avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
1110 uint32_t sign = Reg::signatureOfVecBySize(srcSize);
1111
1112 dst.setSignature(sign);
1113 if (src.isReg()) src.setSignature(sign);
1114 break;
1115 }
1116 }
1117
1118 return DebugUtils::errored(kErrorInvalidState);
1119 }
1120
1121 if (src.isMem())
1122 src.as<Mem>().setSize(srcSize);
1123
1124 emitter->setInlineComment(comment);
1125 return emitter->emit(instId, dst, src);
1126 }
1127
1128 // ============================================================================
1129 // [asmjit::X86Internal - Emit Prolog & Epilog]
1130 // ============================================================================
1131
1132 static ASMJIT_INLINE void X86Internal_setupSaveRestoreInfo(uint32_t group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
1133 switch (group) {
1134 case Reg::kGroupVec:
1135 xReg = xmm(0);
1136 xInst = x86GetXmmMovInst(frame);
1137 xSize = xReg.size();
1138 break;
1139 case Reg::kGroupMm:
1140 xReg = mm(0);
1141 xInst = Inst::kIdMovq;
1142 xSize = xReg.size();
1143 break;
1144 case Reg::kGroupKReg:
1145 xReg = k(0);
1146 xInst = Inst::kIdKmovq;
1147 xSize = xReg.size();
1148 break;
1149 }
1150 }
1151
1152 ASMJIT_FAVOR_SIZE Error X86Internal::emitProlog(Emitter* emitter, const FuncFrame& frame) {
1153 uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
1154
1155 Gp zsp = emitter->zsp(); // ESP|RSP register.
1156 Gp zbp = emitter->zbp(); // EBP|RBP register.
1157 Gp gpReg = zsp; // General purpose register (temporary).
1158 Gp saReg = zsp; // Stack-arguments base pointer.
1159
1160 // Emit: 'push zbp'
1161 // 'mov zbp, zsp'.
1162 if (frame.hasPreservedFP()) {
1163 gpSaved &= ~Support::bitMask(Gp::kIdBp);
1164 ASMJIT_PROPAGATE(emitter->push(zbp));
1165 ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
1166 }
1167
1168 // Emit: 'push gp' sequence.
1169 {
1170 Support::BitWordIterator<uint32_t> it(gpSaved);
1171 while (it.hasNext()) {
1172 gpReg.setId(it.next());
1173 ASMJIT_PROPAGATE(emitter->push(gpReg));
1174 }
1175 }
1176
1177 // Emit: 'mov saReg, zsp'.
1178 uint32_t saRegId = frame.saRegId();
1179 if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
1180 saReg.setId(saRegId);
1181 if (frame.hasPreservedFP()) {
1182 if (saRegId != Gp::kIdBp)
1183 ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
1184 }
1185 else {
1186 ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
1187 }
1188 }
1189
1190 // Emit: 'and zsp, StackAlignment'.
1191 if (frame.hasDynamicAlignment()) {
1192 ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
1193 }
1194
1195 // Emit: 'sub zsp, StackAdjustment'.
1196 if (frame.hasStackAdjustment()) {
1197 ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
1198 }
1199
1200 // Emit: 'mov [zsp + DAOffset], saReg'.
1201 if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
1202 Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
1203 ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
1204 }
1205
1206 // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
1207 {
1208 Reg xReg;
1209 Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
1210
1211 uint32_t xInst;
1212 uint32_t xSize;
1213
1214 for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
1215 Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
1216 if (it.hasNext()) {
1217 X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
1218 do {
1219 xReg.setId(it.next());
1220 ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
1221 xBase.addOffsetLo32(int32_t(xSize));
1222 } while (it.hasNext());
1223 }
1224 }
1225 }
1226
1227 return kErrorOk;
1228 }
1229
1230 ASMJIT_FAVOR_SIZE Error X86Internal::emitEpilog(Emitter* emitter, const FuncFrame& frame) {
1231 uint32_t i;
1232 uint32_t regId;
1233
1234 uint32_t gpSize = emitter->gpSize();
1235 uint32_t gpSaved = frame.savedRegs(Reg::kGroupGp);
1236
1237 Gp zsp = emitter->zsp(); // ESP|RSP register.
1238 Gp zbp = emitter->zbp(); // EBP|RBP register.
1239 Gp gpReg = emitter->zsp(); // General purpose register (temporary).
1240
1241 // Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
1242 if (frame.hasPreservedFP())
1243 gpSaved &= ~Support::bitMask(Gp::kIdBp);
1244
1245 // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
1246 {
1247 Reg xReg;
1248 Mem xBase = ptr(zsp, int32_t(frame.nonGpSaveOffset()));
1249
1250 uint32_t xInst;
1251 uint32_t xSize;
1252
1253 for (uint32_t group = 1; group < BaseReg::kGroupVirt; group++) {
1254 Support::BitWordIterator<uint32_t> it(frame.savedRegs(group));
1255 if (it.hasNext()) {
1256 X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
1257 do {
1258 xReg.setId(it.next());
1259 ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
1260 xBase.addOffsetLo32(int32_t(xSize));
1261 } while (it.hasNext());
1262 }
1263 }
1264 }
1265
1266 // Emit 'emms' and/or 'vzeroupper'.
1267 if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
1268 if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
1269
1270 if (frame.hasPreservedFP()) {
1271 // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
1272 int32_t count = int32_t(frame.gpSaveSize() - gpSize);
1273 if (!count)
1274 ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
1275 else
1276 ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
1277 }
1278 else {
1279 if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
1280 // Emit 'mov zsp, [zsp + DsaSlot]'.
1281 Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
1282 ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
1283 }
1284 else if (frame.hasStackAdjustment()) {
1285 // Emit 'add zsp, StackAdjustment'.
1286 ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
1287 }
1288 }
1289
1290 // Emit 'pop gp' sequence.
1291 if (gpSaved) {
1292 i = gpSaved;
1293 regId = 16;
1294
1295 do {
1296 regId--;
1297 if (i & 0x8000) {
1298 gpReg.setId(regId);
1299 ASMJIT_PROPAGATE(emitter->pop(gpReg));
1300 }
1301 i <<= 1;
1302 } while (regId != 0);
1303 }
1304
1305 // Emit 'pop zbp'.
1306 if (frame.hasPreservedFP())
1307 ASMJIT_PROPAGATE(emitter->pop(zbp));
1308
1309 // Emit 'ret' or 'ret x'.
1310 if (frame.hasCalleeStackCleanup())
1311 ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
1312 else
1313 ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
1314
1315 return kErrorOk;
1316 }
1317
1318 // ============================================================================
1319 // [asmjit::X86Internal - Emit Arguments Assignment]
1320 // ============================================================================
1321
1322 #ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
1323 static void dumpFuncValue(String& sb, uint32_t archId, const FuncValue& value) noexcept {
1324 Logging::formatTypeId(sb, value.typeId());
1325 sb.appendChar('@');
1326 if (value.isReg()) {
1327 Logging::formatRegister(sb, 0, nullptr, archId, value.regType(), value.regId());
1328 }
1329 else if (value.isStack()) {
1330 sb.appendFormat("[%d]", value.stackOffset());
1331 }
1332 else {
1333 sb.appendString("<none>");
1334 }
1335 }
1336
1337 static void dumpAssignment(String& sb, const X86FuncArgsContext& ctx) noexcept {
1338 typedef X86FuncArgsContext::Var Var;
1339
1340 uint32_t archId = ctx.archId();
1341 uint32_t varCount = ctx.varCount();
1342
1343 for (uint32_t i = 0; i < varCount; i++) {
1344 const Var& var = ctx.var(i);
1345 const FuncValue& dst = var.out;
1346 const FuncValue& cur = var.cur;
1347
1348 sb.appendFormat("Var%u: ", i);
1349 dumpFuncValue(sb, archId, dst);
1350 sb.appendString(" <- ");
1351 dumpFuncValue(sb, archId, cur);
1352
1353 if (var.isDone())
1354 sb.appendString(" {Done}");
1355
1356 sb.appendChar('\n');
1357 }
1358 }
1359 #endif
1360
1361 ASMJIT_FAVOR_SIZE Error X86Internal::emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
1362 typedef X86FuncArgsContext::Var Var;
1363 typedef X86FuncArgsContext::WorkData WorkData;
1364
1365 enum WorkFlags : uint32_t {
1366 kWorkNone = 0x00,
1367 kWorkDidSome = 0x01,
1368 kWorkPending = 0x02,
1369 kWorkPostponed = 0x04
1370 };
1371
1372 X86FuncArgsContext ctx;
1373 ASMJIT_PROPAGATE(ctx.initWorkData(frame, args));
1374
1375 #ifdef ASMJIT_DUMP_ARGS_ASSIGNMENT
1376 {
1377 String sb;
1378 dumpAssignment(sb, ctx);
1379 printf("%s\n", sb.data());
1380 }
1381 #endif
1382
1383 uint32_t archId = ctx.archId();
1384 uint32_t varCount = ctx._varCount;
1385 WorkData* workData = ctx._workData;
1386
1387 // Use AVX if it's enabled.
1388 bool avxEnabled = frame.isAvxEnabled();
1389
1390 uint32_t saVarId = ctx._saVarId;
1391 uint32_t saRegId = Gp::kIdSp;
1392
1393 if (frame.hasDynamicAlignment()) {
1394 if (frame.hasPreservedFP())
1395 saRegId = Gp::kIdBp;
1396 else
1397 saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
1398 }
1399
1400 // --------------------------------------------------------------------------
1401 // Register to stack and stack to stack moves must be first as now we have
1402 // the biggest chance of having as many as possible unassigned registers.
1403 // --------------------------------------------------------------------------
1404
1405 if (ctx._stackDstMask) {
1406 // Base address of all arguments passed by stack.
1407 Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
1408 Mem baseStackPtr = ptr(emitter->gpz(Gp::kIdSp), int32_t(0));
1409
1410 for (uint32_t varId = 0; varId < varCount; varId++) {
1411 Var& var = ctx._vars[varId];
1412 if (!var.out.isStack()) continue;
1413
1414 ASMJIT_ASSERT(var.cur.isReg() || var.cur.isStack());
1415 Reg reg;
1416
1417 if (var.cur.isReg()) {
1418 WorkData& wd = workData[Reg::groupOf(var.cur.regType())];
1419 uint32_t rId = var.cur.regId();
1420
1421 reg.setSignatureAndId(Reg::signatureOf(var.cur.regType()), rId);
1422 wd.unassign(varId, rId);
1423 }
1424 else {
1425 // Stack to reg move - tricky since we move stack to stack we can decide which
1426 // register to use. In general we follow the rule that IntToInt moves will use
1427 // GP regs with possibility to sign or zero extend, and all other moves will
1428 // either use GP or VEC regs depending on the size of the move.
1429 RegInfo rInfo = x86GetRegForMemToMemMove(archId, var.out.typeId(), var.cur.typeId());
1430 if (ASMJIT_UNLIKELY(!rInfo.isValid()))
1431 return DebugUtils::errored(kErrorInvalidState);
1432
1433 WorkData& wd = workData[rInfo.group()];
1434 uint32_t availableRegs = wd.availableRegs();
1435 if (ASMJIT_UNLIKELY(!availableRegs))
1436 return DebugUtils::errored(kErrorInvalidState);
1437
1438 uint32_t rId = Support::ctz(availableRegs);
1439 reg.setSignatureAndId(rInfo.signature(), rId);
1440
1441 ASMJIT_PROPAGATE(
1442 emitArgMove(emitter,
1443 reg,
1444 var.out.typeId(),
1445 baseArgPtr.cloneAdjusted(var.cur.stackOffset()),
1446 var.cur.typeId(),
1447 avxEnabled));
1448 }
1449
1450 // Register to stack move.
1451 ASMJIT_PROPAGATE(
1452 emitRegMove(emitter, baseStackPtr.cloneAdjusted(var.out.stackOffset()), reg, var.cur.typeId(), avxEnabled));
1453
1454 var.markDone();
1455 }
1456 }
1457
1458 // --------------------------------------------------------------------------
1459 // Shuffle all registers that are currently assigned accordingly to the assignment.
1460 // --------------------------------------------------------------------------
1461
1462 uint32_t workFlags = kWorkNone;
1463 for (;;) {
1464 for (uint32_t varId = 0; varId < varCount; varId++) {
1465 Var& var = ctx._vars[varId];
1466 if (var.isDone() || !var.cur.isReg()) continue;
1467
1468 uint32_t curType = var.cur.regType();
1469 uint32_t outType = var.out.regType();
1470
1471 uint32_t curGroup = Reg::groupOf(curType);
1472 uint32_t outGroup = Reg::groupOf(outType);
1473
1474 uint32_t curId = var.cur.regId();
1475 uint32_t outId = var.out.regId();
1476
1477 if (curGroup != outGroup) {
1478 ASMJIT_ASSERT(false);
1479
1480 // Requires a conversion between two register groups.
1481 if (workData[outGroup]._numSwaps) {
1482 // TODO: Postponed
1483 workFlags |= kWorkPending;
1484 }
1485 else {
1486 // TODO:
1487 workFlags |= kWorkPending;
1488 }
1489 }
1490 else {
1491 WorkData& wd = workData[outGroup];
1492 if (!wd.isAssigned(outId)) {
1493 EmitMove:
1494 ASMJIT_PROPAGATE(
1495 emitArgMove(emitter,
1496 Reg::fromTypeAndId(outType, outId), var.out.typeId(),
1497 Reg::fromTypeAndId(curType, curId), var.cur.typeId(), avxEnabled));
1498
1499 wd.reassign(varId, outId, curId);
1500 var.cur.initReg(outType, outId, var.out.typeId());
1501
1502 if (outId == var.out.regId())
1503 var.markDone();
1504 workFlags |= kWorkDidSome | kWorkPending;
1505 }
1506 else {
1507 uint32_t altId = wd._physToVarId[outId];
1508 Var& altVar = ctx._vars[altId];
1509
1510 if (!altVar.out.isInitialized() || (altVar.out.isReg() && altVar.out.regId() == curId)) {
1511 // Swap operation is possible only between two GP registers.
1512 if (curGroup == Reg::kGroupGp) {
1513 uint32_t highestType = Support::max(var.cur.regType(), altVar.cur.regType());
1514 uint32_t signature = highestType == Reg::kTypeGpq ? Reg::signatureOfT<Reg::kTypeGpq>()
1515 : Reg::signatureOfT<Reg::kTypeGpd>();
1516
1517 ASMJIT_PROPAGATE(emitter->emit(Inst::kIdXchg, Reg(signature, outId), Reg(signature, curId)));
1518 wd.swap(varId, curId, altId, outId);
1519 var.cur.setRegId(outId);
1520 var.markDone();
1521 altVar.cur.setRegId(curId);
1522
1523 if (altVar.out.isInitialized())
1524 altVar.markDone();
1525 workFlags |= kWorkDidSome;
1526 }
1527 else {
1528 // If there is a scratch register it can be used to perform the swap.
1529 uint32_t availableRegs = wd.availableRegs();
1530 if (availableRegs) {
1531 uint32_t inOutRegs = wd.dstRegs();
1532 if (availableRegs & ~inOutRegs)
1533 availableRegs &= ~inOutRegs;
1534 outId = Support::ctz(availableRegs);
1535 goto EmitMove;
1536 }
1537 else {
1538 workFlags |= kWorkPending;
1539 }
1540 }
1541 }
1542 else {
1543 workFlags |= kWorkPending;
1544 }
1545 }
1546 }
1547 }
1548
1549 if (!(workFlags & kWorkPending))
1550 break;
1551
1552 // If we did nothing twice it means that something is really broken.
1553 if ((workFlags & (kWorkDidSome | kWorkPostponed)) == kWorkPostponed)
1554 return DebugUtils::errored(kErrorInvalidState);
1555
1556 workFlags = (workFlags & kWorkDidSome) ? kWorkNone : kWorkPostponed;
1557 }
1558
1559 // --------------------------------------------------------------------------
1560 // Load arguments passed by stack into registers. This is pretty simple and
1561 // it never requires multiple iterations like the previous phase.
1562 // --------------------------------------------------------------------------
1563
1564 if (ctx._hasStackSrc) {
1565 uint32_t iterCount = 1;
1566 if (frame.hasDynamicAlignment() && !frame.hasPreservedFP())
1567 saRegId = saVarId < varCount ? ctx._vars[saVarId].cur.regId() : frame.saRegId();
1568
1569 // Base address of all arguments passed by stack.
1570 Mem baseArgPtr = ptr(emitter->gpz(saRegId), int32_t(frame.saOffset(saRegId)));
1571
1572 for (uint32_t iter = 0; iter < iterCount; iter++) {
1573 for (uint32_t varId = 0; varId < varCount; varId++) {
1574 Var& var = ctx._vars[varId];
1575 if (var.isDone()) continue;
1576
1577 if (var.cur.isStack()) {
1578 ASMJIT_ASSERT(var.out.isReg());
1579
1580 uint32_t outId = var.out.regId();
1581 uint32_t outType = var.out.regType();
1582
1583 uint32_t group = Reg::groupOf(outType);
1584 WorkData& wd = ctx._workData[group];
1585
1586 if (outId == saRegId && group == BaseReg::kGroupGp) {
1587 // This register will be processed last as we still need `saRegId`.
1588 if (iterCount == 1) {
1589 iterCount++;
1590 continue;
1591 }
1592 wd.unassign(wd._physToVarId[outId], outId);
1593 }
1594
1595 Reg dstReg = Reg::fromTypeAndId(outType, outId);
1596 Mem srcMem = baseArgPtr.cloneAdjusted(var.cur.stackOffset());
1597
1598 ASMJIT_PROPAGATE(
1599 emitArgMove(emitter,
1600 dstReg, var.out.typeId(),
1601 srcMem, var.cur.typeId(), avxEnabled));
1602
1603 wd.assign(varId, outId);
1604 var.cur.initReg(outType, outId, var.cur.typeId(), FuncValue::kFlagIsDone);
1605 }
1606 }
1607 }
1608 }
1609
1610 return kErrorOk;
1611 }
1612
1613 ASMJIT_END_SUB_NAMESPACE
1614
1615 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86INTERNAL_P_H
7 #define _ASMJIT_X86_X86INTERNAL_P_H
8
9 #include "../core/api-config.h"
10
11 #include "../core/func.h"
12 #include "../x86/x86emitter.h"
13 #include "../x86/x86operand.h"
14
15 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
16
17 //! \cond INTERNAL
18 //! \addtogroup asmjit_x86
19 //! \{
20
21 // ============================================================================
22 // [asmjit::X86Internal]
23 // ============================================================================
24
25 //! X86 utilities used at multiple places, not part of public API, not exported.
26 struct X86Internal {
27 //! Initialize `FuncDetail` (X86 specific).
28 static Error initFuncDetail(FuncDetail& func, const FuncSignature& sign, uint32_t gpSize) noexcept;
29
30 //! Initialize `FuncFrame` (X86 specific).
31 static Error initFuncFrame(FuncFrame& frame, const FuncDetail& func) noexcept;
32
33 //! Finalize `FuncFrame` (X86 specific).
34 static Error finalizeFuncFrame(FuncFrame& frame) noexcept;
35
36 static Error argsToFuncFrame(const FuncArgsAssignment& args, FuncFrame& frame) noexcept;
37
38 //! Emit function prolog.
39 static Error emitProlog(Emitter* emitter, const FuncFrame& frame);
40
41 //! Emit function epilog.
42 static Error emitEpilog(Emitter* emitter, const FuncFrame& frame);
43
44 //! Emit a pure move operation between two registers or the same type or
45 //! between a register and its home slot. This function does not handle
46 //! register conversion.
47 static Error emitRegMove(Emitter* emitter,
48 const Operand_& dst_,
49 const Operand_& src_, uint32_t typeId, bool avxEnabled, const char* comment = nullptr);
50
51 //! Emit move from a function argument (either register or stack) to a register.
52 //!
53 //! This function can handle the necessary conversion from one argument to
54 //! another, and from one register type to another, if it's possible. Any
55 //! attempt of conversion that requires third register of a different group
56 //! (for example conversion from K to MMX) will fail.
57 static Error emitArgMove(Emitter* emitter,
58 const Reg& dst_, uint32_t dstTypeId,
59 const Operand_& src_, uint32_t srcTypeId, bool avxEnabled, const char* comment = nullptr);
60
61 static Error emitArgsAssignment(Emitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args);
62 };
63
64 //! \}
65 //! \endcond
66
67 ASMJIT_END_SUB_NAMESPACE
68
69 #endif // _ASMJIT_X86_X86INTERNAL_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifndef ASMJIT_NO_LOGGING
8
9 #include "../core/misc_p.h"
10 #include "../core/support.h"
11 #include "../x86/x86instdb_p.h"
12 #include "../x86/x86logging_p.h"
13 #include "../x86/x86operand.h"
14
15 #ifndef ASMJIT_NO_COMPILER
16 #include "../core/compiler.h"
17 #endif
18
19 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
20
21 // ============================================================================
22 // [asmjit::x86::LoggingInternal - Constants]
23 // ============================================================================
24
25 struct RegFormatInfo {
26 struct TypeEntry {
27 uint8_t index;
28 };
29
30 struct NameEntry {
31 uint8_t count;
32 uint8_t formatIndex;
33 uint8_t specialIndex;
34 uint8_t specialCount;
35 };
36
37 TypeEntry typeEntries[BaseReg::kTypeMax + 1];
38 char typeStrings[128 - 32];
39
40 NameEntry nameEntries[BaseReg::kTypeMax + 1];
41 char nameStrings[280];
42 };
43
44 template<uint32_t X>
45 struct RegFormatInfo_T {
46 enum {
47 kTypeIndex = X == Reg::kTypeGpbLo ? 1 :
48 X == Reg::kTypeGpbHi ? 8 :
49 X == Reg::kTypeGpw ? 15 :
50 X == Reg::kTypeGpd ? 19 :
51 X == Reg::kTypeGpq ? 23 :
52 X == Reg::kTypeXmm ? 27 :
53 X == Reg::kTypeYmm ? 31 :
54 X == Reg::kTypeZmm ? 35 :
55 X == Reg::kTypeMm ? 50 :
56 X == Reg::kTypeKReg ? 53 :
57 X == Reg::kTypeSReg ? 43 :
58 X == Reg::kTypeCReg ? 59 :
59 X == Reg::kTypeDReg ? 62 :
60 X == Reg::kTypeSt ? 47 :
61 X == Reg::kTypeBnd ? 55 :
62 X == Reg::kTypeRip ? 39 : 0,
63
64 kFormatIndex = X == Reg::kTypeGpbLo ? 1 :
65 X == Reg::kTypeGpbHi ? 6 :
66 X == Reg::kTypeGpw ? 11 :
67 X == Reg::kTypeGpd ? 16 :
68 X == Reg::kTypeGpq ? 21 :
69 X == Reg::kTypeXmm ? 25 :
70 X == Reg::kTypeYmm ? 31 :
71 X == Reg::kTypeZmm ? 37 :
72 X == Reg::kTypeMm ? 60 :
73 X == Reg::kTypeKReg ? 65 :
74 X == Reg::kTypeSReg ? 49 :
75 X == Reg::kTypeCReg ? 75 :
76 X == Reg::kTypeDReg ? 80 :
77 X == Reg::kTypeSt ? 55 :
78 X == Reg::kTypeBnd ? 69 :
79 X == Reg::kTypeRip ? 43 : 0,
80
81 kSpecialIndex = X == Reg::kTypeGpbLo ? 96 :
82 X == Reg::kTypeGpbHi ? 128 :
83 X == Reg::kTypeGpw ? 161 :
84 X == Reg::kTypeGpd ? 160 :
85 X == Reg::kTypeGpq ? 192 :
86 X == Reg::kTypeSReg ? 224 :
87 X == Reg::kTypeRip ? 85 : 0,
88
89 kSpecialCount = X == Reg::kTypeGpbLo ? 8 :
90 X == Reg::kTypeGpbHi ? 4 :
91 X == Reg::kTypeGpw ? 8 :
92 X == Reg::kTypeGpd ? 8 :
93 X == Reg::kTypeGpq ? 8 :
94 X == Reg::kTypeSReg ? 7 :
95 X == Reg::kTypeRip ? 1 : 0
96 };
97 };
98
99 #define ASMJIT_REG_TYPE_ENTRY(TYPE) { \
100 RegFormatInfo_T<TYPE>::kTypeIndex \
101 }
102
103 #define ASMJIT_REG_NAME_ENTRY(TYPE) { \
104 RegTraits<TYPE>::kCount, \
105 RegFormatInfo_T<TYPE>::kFormatIndex, \
106 RegFormatInfo_T<TYPE>::kSpecialIndex, \
107 RegFormatInfo_T<TYPE>::kSpecialCount \
108 }
109
110 static const RegFormatInfo x86RegFormatInfo = {
111 // Register type entries and strings.
112 { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_TYPE_ENTRY, 0) },
113
114 "\0" // #0
115 "gpb\0\0\0\0" // #1
116 "gpb.hi\0" // #8
117 "gpw\0" // #15
118 "gpd\0" // #19
119 "gpq\0" // #23
120 "xmm\0" // #27
121 "ymm\0" // #31
122 "zmm\0" // #35
123 "rip\0" // #39
124 "seg\0" // #43
125 "st\0" // #47
126 "mm\0" // #50
127 "k\0" // #53
128 "bnd\0" // #55
129 "cr\0" // #59
130 "dr\0", // #62
131
132 // Register name entries and strings.
133 { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) },
134
135 "\0"
136 "r%ub\0" // #1
137 "r%uh\0" // #6
138 "r%uw\0" // #11
139 "r%ud\0" // #16
140 "r%u\0" // #21
141 "xmm%u\0" // #25
142 "ymm%u\0" // #31
143 "zmm%u\0" // #37
144 "rip%u\0" // #43
145 "seg%u\0" // #49
146 "st%u\0" // #55
147 "mm%u\0" // #60
148 "k%u\0" // #65
149 "bnd%u\0" // #69
150 "cr%u\0" // #75
151 "dr%u\0" // #80
152
153 "rip\0" // #85
154 "\0\0\0\0\0\0\0" // #89
155
156 "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" // #96
157 "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0" "n/a\0" "n/a\0" "n/a\0" // #128
158 "eax\0" "ecx\0" "edx\0" "ebx\0" "esp\0" "ebp\0" "esi\0" "edi\0" // #160
159 "rax\0" "rcx\0" "rdx\0" "rbx\0" "rsp\0" "rbp\0" "rsi\0" "rdi\0" // #192
160 "n/a\0" "es\0\0" "cs\0\0" "ss\0\0" "ds\0\0" "fs\0\0" "gs\0\0" "n/a\0" // #224
161 };
162 #undef ASMJIT_REG_NAME_ENTRY
163 #undef ASMJIT_REG_TYPE_ENTRY
164
165 static const char* x86GetAddressSizeString(uint32_t size) noexcept {
166 switch (size) {
167 case 1 : return "byte ";
168 case 2 : return "word ";
169 case 4 : return "dword ";
170 case 6 : return "fword ";
171 case 8 : return "qword ";
172 case 10: return "tword ";
173 case 16: return "oword ";
174 case 32: return "yword ";
175 case 64: return "zword ";
176 default: return "";
177 }
178 }
179
180 // ============================================================================
181 // [asmjit::x86::LoggingInternal - Format Operand]
182 // ============================================================================
183
184 ASMJIT_FAVOR_SIZE Error LoggingInternal::formatOperand(
185 String& sb,
186 uint32_t flags,
187 const BaseEmitter* emitter,
188 uint32_t archId,
189 const Operand_& op) noexcept {
190
191 if (op.isReg())
192 return formatRegister(sb, flags, emitter, archId, op.as<BaseReg>().type(), op.as<BaseReg>().id());
193
194 if (op.isMem()) {
195 const Mem& m = op.as<Mem>();
196 ASMJIT_PROPAGATE(sb.appendString(x86GetAddressSizeString(m.size())));
197
198 // Segment override prefix.
199 uint32_t seg = m.segmentId();
200 if (seg != SReg::kIdNone && seg < SReg::kIdCount)
201 ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + seg * 4));
202
203 ASMJIT_PROPAGATE(sb.appendChar('['));
204 switch (m.addrType()) {
205 case BaseMem::kAddrTypeAbs: ASMJIT_PROPAGATE(sb.appendString("abs ")); break;
206 case BaseMem::kAddrTypeRel: ASMJIT_PROPAGATE(sb.appendString("rel ")); break;
207 }
208
209 char opSign = '\0';
210 if (m.hasBase()) {
211 opSign = '+';
212 if (m.hasBaseLabel()) {
213 ASMJIT_PROPAGATE(Logging::formatLabel(sb, flags, emitter, m.baseId()));
214 }
215 else {
216 uint32_t modifiedFlags = flags;
217 if (m.isRegHome()) {
218 ASMJIT_PROPAGATE(sb.appendString("&"));
219 modifiedFlags &= ~FormatOptions::kFlagRegCasts;
220 }
221 ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, archId, m.baseType(), m.baseId()));
222 }
223 }
224
225 if (m.hasIndex()) {
226 if (opSign)
227 ASMJIT_PROPAGATE(sb.appendChar(opSign));
228
229 opSign = '+';
230 ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, archId, m.indexType(), m.indexId()));
231 if (m.hasShift())
232 ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift()));
233 }
234
235 uint64_t off = uint64_t(m.offset());
236 if (off || !m.hasBaseOrIndex()) {
237 if (int64_t(off) < 0) {
238 opSign = '-';
239 off = ~off + 1;
240 }
241
242 if (opSign)
243 ASMJIT_PROPAGATE(sb.appendChar(opSign));
244
245 uint32_t base = 10;
246 if ((flags & FormatOptions::kFlagHexOffsets) != 0 && off > 9) {
247 ASMJIT_PROPAGATE(sb.appendString("0x", 2));
248 base = 16;
249 }
250
251 ASMJIT_PROPAGATE(sb.appendUInt(off, base));
252 }
253
254 return sb.appendChar(']');
255 }
256
257 if (op.isImm()) {
258 const Imm& i = op.as<Imm>();
259 int64_t val = i.i64();
260
261 if ((flags & FormatOptions::kFlagHexImms) != 0 && uint64_t(val) > 9) {
262 ASMJIT_PROPAGATE(sb.appendString("0x", 2));
263 return sb.appendUInt(uint64_t(val), 16);
264 }
265 else {
266 return sb.appendInt(val, 10);
267 }
268 }
269
270 if (op.isLabel()) {
271 return Logging::formatLabel(sb, flags, emitter, op.id());
272 }
273
274 return sb.appendString("<None>");
275 }
276
277 // ============================================================================
278 // [asmjit::x86::LoggingInternal - Format Immediate (Extension)]
279 // ============================================================================
280
281 static constexpr char kImmCharStart = '{';
282 static constexpr char kImmCharEnd = '}';
283 static constexpr char kImmCharOr = '|';
284
285 struct ImmBits {
286 enum Mode : uint32_t {
287 kModeLookup = 0,
288 kModeFormat = 1
289 };
290
291 uint8_t mask;
292 uint8_t shift;
293 uint8_t mode;
294 char text[48 - 3];
295 };
296
297 ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmShuf(String& sb, uint32_t u8, uint32_t bits, uint32_t count) noexcept {
298 uint32_t mask = (1 << bits) - 1;
299
300 for (uint32_t i = 0; i < count; i++, u8 >>= bits) {
301 uint32_t value = u8 & mask;
302 ASMJIT_PROPAGATE(sb.appendChar(i == 0 ? kImmCharStart : kImmCharOr));
303 ASMJIT_PROPAGATE(sb.appendUInt(value));
304 }
305
306 if (kImmCharEnd)
307 ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd));
308
309 return kErrorOk;
310 }
311
312 ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmBits(String& sb, uint32_t u8, const ImmBits* bits, uint32_t count) noexcept {
313 uint32_t n = 0;
314 char buf[64];
315
316 for (uint32_t i = 0; i < count; i++) {
317 const ImmBits& spec = bits[i];
318
319 uint32_t value = (u8 & uint32_t(spec.mask)) >> spec.shift;
320 const char* str = nullptr;
321
322 switch (spec.mode) {
323 case ImmBits::kModeLookup:
324 str = Support::findPackedString(spec.text, value);
325 break;
326
327 case ImmBits::kModeFormat:
328 snprintf(buf, sizeof(buf), spec.text, unsigned(value));
329 str = buf;
330 break;
331
332 default:
333 return DebugUtils::errored(kErrorInvalidState);
334 }
335
336 if (!str[0])
337 continue;
338
339 ASMJIT_PROPAGATE(sb.appendChar(++n == 1 ? kImmCharStart : kImmCharOr));
340 ASMJIT_PROPAGATE(sb.appendString(str));
341 }
342
343 if (n && kImmCharEnd)
344 ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd));
345
346 return kErrorOk;
347 }
348
349 ASMJIT_FAVOR_SIZE static Error LoggingInternal_formatImmText(String& sb, uint32_t u8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept {
350 uint32_t mask = (1u << bits) - 1;
351 uint32_t pos = 0;
352
353 for (uint32_t i = 0; i < count; i++, u8 >>= bits, pos += advance) {
354 uint32_t value = (u8 & mask) + pos;
355 ASMJIT_PROPAGATE(sb.appendChar(i == 0 ? kImmCharStart : kImmCharOr));
356 ASMJIT_PROPAGATE(sb.appendString(Support::findPackedString(text, value)));
357 }
358
359 if (kImmCharEnd)
360 ASMJIT_PROPAGATE(sb.appendChar(kImmCharEnd));
361
362 return kErrorOk;
363 }
364
365 ASMJIT_FAVOR_SIZE static Error LoggingInternal_explainConst(
366 String& sb,
367 uint32_t flags,
368 uint32_t instId,
369 uint32_t vecSize,
370 const Imm& imm) noexcept {
371
372 ASMJIT_UNUSED(flags);
373
374 static const char vcmpx[] =
375 "EQ_OQ\0" "LT_OS\0" "LE_OS\0" "UNORD_Q\0" "NEQ_UQ\0" "NLT_US\0" "NLE_US\0" "ORD_Q\0"
376 "EQ_UQ\0" "NGE_US\0" "NGT_US\0" "FALSE_OQ\0" "NEQ_OQ\0" "GE_OS\0" "GT_OS\0" "TRUE_UQ\0"
377 "EQ_OS\0" "LT_OQ\0" "LE_OQ\0" "UNORD_S\0" "NEQ_US\0" "NLT_UQ\0" "NLE_UQ\0" "ORD_S\0"
378 "EQ_US\0" "NGE_UQ\0" "NGT_UQ\0" "FALSE_OS\0" "NEQ_OS\0" "GE_OQ\0" "GT_OQ\0" "TRUE_US\0";
379
380 // Why to make it compatible...
381 static const char vpcmpx[] = "EQ\0" "LT\0" "LE\0" "FALSE\0" "NEQ\0" "GE\0" "GT\0" "TRUE\0";
382 static const char vpcomx[] = "LT\0" "LE\0" "GT\0" "GE\0" "EQ\0" "NEQ\0" "FALSE\0" "TRUE\0";
383
384 static const char vshufpd[] = "A0\0A1\0B0\0B1\0A2\0A3\0B2\0B3\0A4\0A5\0B4\0B5\0A6\0A7\0B6\0B7\0";
385 static const char vshufps[] = "A0\0A1\0A2\0A3\0A0\0A1\0A2\0A3\0B0\0B1\0B2\0B3\0B0\0B1\0B2\0B3\0";
386
387 static const ImmBits vfpclassxx[] = {
388 { 0x07u, 0, ImmBits::kModeLookup, "QNAN\0" "+0\0" "-0\0" "+INF\0" "-INF\0" "DENORMAL\0" "-FINITE\0" "SNAN\0" }
389 };
390
391 static const ImmBits vfixupimmxx[] = {
392 { 0x01u, 0, ImmBits::kModeLookup, "\0" "+INF_IE\0" },
393 { 0x02u, 1, ImmBits::kModeLookup, "\0" "-VE_IE\0" },
394 { 0x04u, 2, ImmBits::kModeLookup, "\0" "-INF_IE\0" },
395 { 0x08u, 3, ImmBits::kModeLookup, "\0" "SNAN_IE\0" },
396 { 0x10u, 4, ImmBits::kModeLookup, "\0" "ONE_IE\0" },
397 { 0x20u, 5, ImmBits::kModeLookup, "\0" "ONE_ZE\0" },
398 { 0x40u, 6, ImmBits::kModeLookup, "\0" "ZERO_IE\0" },
399 { 0x80u, 7, ImmBits::kModeLookup, "\0" "ZERO_ZE\0" }
400 };
401
402 static const ImmBits vgetmantxx[] = {
403 { 0x03u, 0, ImmBits::kModeLookup, "[1, 2)\0" "[.5, 2)\0" "[.5, 1)\0" "[.75, 1.5)\0" },
404 { 0x04u, 2, ImmBits::kModeLookup, "\0" "NO_SIGN\0" },
405 { 0x08u, 3, ImmBits::kModeLookup, "\0" "QNAN_IF_SIGN\0" }
406 };
407
408 static const ImmBits vmpsadbw[] = {
409 { 0x04u, 2, ImmBits::kModeLookup, "BLK1[0]\0" "BLK1[1]\0" },
410 { 0x03u, 0, ImmBits::kModeLookup, "BLK2[0]\0" "BLK2[1]\0" "BLK2[2]\0" "BLK2[3]\0" },
411 { 0x40u, 6, ImmBits::kModeLookup, "BLK1[4]\0" "BLK1[5]\0" },
412 { 0x30u, 4, ImmBits::kModeLookup, "BLK2[4]\0" "BLK2[5]\0" "BLK2[6]\0" "BLK2[7]\0" }
413 };
414
415 static const ImmBits vpclmulqdq[] = {
416 { 0x01u, 0, ImmBits::kModeLookup, "LQ\0" "HQ\0" },
417 { 0x10u, 4, ImmBits::kModeLookup, "LQ\0" "HQ\0" }
418 };
419
420 static const ImmBits vperm2x128[] = {
421 { 0x0Bu, 0, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" },
422 { 0xB0u, 4, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" }
423 };
424
425 static const ImmBits vrangexx[] = {
426 { 0x03u, 0, ImmBits::kModeLookup, "MIN\0" "MAX\0" "MIN_ABS\0" "MAX_ABS\0" },
427 { 0x0Cu, 2, ImmBits::kModeLookup, "SIGN_A\0" "SIGN_B\0" "SIGN_0\0" "SIGN_1\0" }
428 };
429
430 static const ImmBits vreducexx_vrndscalexx[] = {
431 { 0x07u, 0, ImmBits::kModeLookup, "\0" "\0" "\0" "\0" "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" },
432 { 0x08u, 3, ImmBits::kModeLookup, "\0" "SAE\0" },
433 { 0xF0u, 4, ImmBits::kModeFormat, "LEN=%d" }
434 };
435
436 static const ImmBits vroundxx[] = {
437 { 0x07u, 0, ImmBits::kModeLookup, "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" "\0" "\0" "\0" "\0" },
438 { 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" }
439 };
440
441 uint32_t u8 = imm.u8();
442 switch (instId) {
443 case Inst::kIdVblendpd:
444 case Inst::kIdBlendpd:
445 return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
446
447 case Inst::kIdVblendps:
448 case Inst::kIdBlendps:
449 return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 4);
450
451 case Inst::kIdVcmppd:
452 case Inst::kIdVcmpps:
453 case Inst::kIdVcmpsd:
454 case Inst::kIdVcmpss:
455 return LoggingInternal_formatImmText(sb, u8, 5, 0, vcmpx);
456
457 case Inst::kIdCmppd:
458 case Inst::kIdCmpps:
459 case Inst::kIdCmpsd:
460 case Inst::kIdCmpss:
461 return LoggingInternal_formatImmText(sb, u8, 3, 0, vcmpx);
462
463 case Inst::kIdVdbpsadbw:
464 return LoggingInternal_formatImmShuf(sb, u8, 2, 4);
465
466 case Inst::kIdVdppd:
467 case Inst::kIdVdpps:
468 case Inst::kIdDppd:
469 case Inst::kIdDpps:
470 return LoggingInternal_formatImmShuf(sb, u8, 1, 8);
471
472 case Inst::kIdVmpsadbw:
473 case Inst::kIdMpsadbw:
474 return LoggingInternal_formatImmBits(sb, u8, vmpsadbw, Support::min<uint32_t>(vecSize / 8, 4));
475
476 case Inst::kIdVpblendw:
477 case Inst::kIdPblendw:
478 return LoggingInternal_formatImmShuf(sb, u8, 1, 8);
479
480 case Inst::kIdVpblendd:
481 return LoggingInternal_formatImmShuf(sb, u8, 1, Support::min<uint32_t>(vecSize / 4, 8));
482
483 case Inst::kIdVpclmulqdq:
484 case Inst::kIdPclmulqdq:
485 return LoggingInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq));
486
487 case Inst::kIdVroundpd:
488 case Inst::kIdVroundps:
489 case Inst::kIdVroundsd:
490 case Inst::kIdVroundss:
491 case Inst::kIdRoundpd:
492 case Inst::kIdRoundps:
493 case Inst::kIdRoundsd:
494 case Inst::kIdRoundss:
495 return LoggingInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx));
496
497 case Inst::kIdVshufpd:
498 case Inst::kIdShufpd:
499 return LoggingInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min<uint32_t>(vecSize / 8, 8));
500
501 case Inst::kIdVshufps:
502 case Inst::kIdShufps:
503 return LoggingInternal_formatImmText(sb, u8, 2, 4, vshufps, 4);
504
505 case Inst::kIdVcvtps2ph:
506 return LoggingInternal_formatImmBits(sb, u8, vroundxx, 1);
507
508 case Inst::kIdVperm2f128:
509 case Inst::kIdVperm2i128:
510 return LoggingInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128));
511
512 case Inst::kIdVpermilpd:
513 return LoggingInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
514
515 case Inst::kIdVpermilps:
516 return LoggingInternal_formatImmShuf(sb, u8, 2, 4);
517
518 case Inst::kIdVpshufd:
519 case Inst::kIdPshufd:
520 return LoggingInternal_formatImmShuf(sb, u8, 2, 4);
521
522 case Inst::kIdVpshufhw:
523 case Inst::kIdVpshuflw:
524 case Inst::kIdPshufhw:
525 case Inst::kIdPshuflw:
526 case Inst::kIdPshufw:
527 return LoggingInternal_formatImmShuf(sb, u8, 2, 4);
528
529 case Inst::kIdVfixupimmpd:
530 case Inst::kIdVfixupimmps:
531 case Inst::kIdVfixupimmsd:
532 case Inst::kIdVfixupimmss:
533 return LoggingInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx));
534
535 case Inst::kIdVfpclasspd:
536 case Inst::kIdVfpclassps:
537 case Inst::kIdVfpclasssd:
538 case Inst::kIdVfpclassss:
539 return LoggingInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx));
540
541 case Inst::kIdVgetmantpd:
542 case Inst::kIdVgetmantps:
543 case Inst::kIdVgetmantsd:
544 case Inst::kIdVgetmantss:
545 return LoggingInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx));
546
547 case Inst::kIdVpcmpb:
548 case Inst::kIdVpcmpd:
549 case Inst::kIdVpcmpq:
550 case Inst::kIdVpcmpw:
551 case Inst::kIdVpcmpub:
552 case Inst::kIdVpcmpud:
553 case Inst::kIdVpcmpuq:
554 case Inst::kIdVpcmpuw:
555 return LoggingInternal_formatImmText(sb, u8, 3, 0, vpcmpx);
556
557 case Inst::kIdVpcomb:
558 case Inst::kIdVpcomd:
559 case Inst::kIdVpcomq:
560 case Inst::kIdVpcomw:
561 case Inst::kIdVpcomub:
562 case Inst::kIdVpcomud:
563 case Inst::kIdVpcomuq:
564 case Inst::kIdVpcomuw:
565 return LoggingInternal_formatImmText(sb, u8, 3, 0, vpcomx);
566
567 case Inst::kIdVpermq:
568 case Inst::kIdVpermpd:
569 return LoggingInternal_formatImmShuf(sb, u8, 2, 4);
570
571 case Inst::kIdVpternlogd:
572 case Inst::kIdVpternlogq:
573 return LoggingInternal_formatImmShuf(sb, u8, 1, 8);
574
575 case Inst::kIdVrangepd:
576 case Inst::kIdVrangeps:
577 case Inst::kIdVrangesd:
578 case Inst::kIdVrangess:
579 return LoggingInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx));
580
581 case Inst::kIdVreducepd:
582 case Inst::kIdVreduceps:
583 case Inst::kIdVreducesd:
584 case Inst::kIdVreducess:
585 case Inst::kIdVrndscalepd:
586 case Inst::kIdVrndscaleps:
587 case Inst::kIdVrndscalesd:
588 case Inst::kIdVrndscaless:
589 return LoggingInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx));
590
591 case Inst::kIdVshuff32x4:
592 case Inst::kIdVshuff64x2:
593 case Inst::kIdVshufi32x4:
594 case Inst::kIdVshufi64x2: {
595 uint32_t count = Support::max<uint32_t>(vecSize / 16, 2u);
596 uint32_t bits = count <= 2 ? 1u : 2u;
597 return LoggingInternal_formatImmShuf(sb, u8, bits, count);
598 }
599
600 default:
601 return kErrorOk;
602 }
603 }
604
605 // ============================================================================
606 // [asmjit::x86::LoggingInternal - Format Register]
607 // ============================================================================
608
609 ASMJIT_FAVOR_SIZE Error LoggingInternal::formatRegister(String& sb, uint32_t flags, const BaseEmitter* emitter, uint32_t archId, uint32_t rType, uint32_t rId) noexcept {
610 ASMJIT_UNUSED(archId);
611 const RegFormatInfo& info = x86RegFormatInfo;
612
613 #ifndef ASMJIT_NO_COMPILER
614 if (Operand::isVirtId(rId)) {
615 if (emitter && emitter->emitterType() == BaseEmitter::kTypeCompiler) {
616 const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
617 if (cc->isVirtIdValid(rId)) {
618 VirtReg* vReg = cc->virtRegById(rId);
619 ASMJIT_ASSERT(vReg != nullptr);
620
621 const char* name = vReg->name();
622 if (name && name[0] != '\0')
623 ASMJIT_PROPAGATE(sb.appendString(name));
624 else
625 ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId))));
626
627 if (vReg->type() != rType && rType <= BaseReg::kTypeMax && (flags & FormatOptions::kFlagRegCasts) != 0) {
628 const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType];
629 if (typeEntry.index)
630 ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index));
631 }
632
633 return kErrorOk;
634 }
635 }
636 }
637 #else
638 ASMJIT_UNUSED(flags);
639 #endif
640
641 if (ASMJIT_LIKELY(rType <= BaseReg::kTypeMax)) {
642 const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[rType];
643
644 if (rId < nameEntry.specialCount)
645 return sb.appendString(info.nameStrings + nameEntry.specialIndex + rId * 4);
646
647 if (rId < nameEntry.count)
648 return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(rId));
649
650 const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[rType];
651 if (typeEntry.index)
652 return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, rId);
653 }
654
655 return sb.appendFormat("Reg?%u@%u", rType, rId);
656 }
657
658 // ============================================================================
659 // [asmjit::x86::LoggingInternal - Format Instruction]
660 // ============================================================================
661
662 ASMJIT_FAVOR_SIZE Error LoggingInternal::formatInstruction(
663 String& sb,
664 uint32_t flags,
665 const BaseEmitter* emitter,
666 uint32_t archId,
667 const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept {
668
669 uint32_t instId = inst.id();
670 uint32_t options = inst.options();
671
672 // Format instruction options and instruction mnemonic.
673 if (instId < Inst::_kIdCount) {
674 // SHORT|LONG options.
675 if (options & Inst::kOptionShortForm) ASMJIT_PROPAGATE(sb.appendString("short "));
676 if (options & Inst::kOptionLongForm) ASMJIT_PROPAGATE(sb.appendString("long "));
677
678 // LOCK|XACQUIRE|XRELEASE options.
679 if (options & Inst::kOptionXAcquire) ASMJIT_PROPAGATE(sb.appendString("xacquire "));
680 if (options & Inst::kOptionXRelease) ASMJIT_PROPAGATE(sb.appendString("xrelease "));
681 if (options & Inst::kOptionLock) ASMJIT_PROPAGATE(sb.appendString("lock "));
682
683 // REP|REPNE options.
684 if (options & (Inst::kOptionRep | Inst::kOptionRepne)) {
685 sb.appendString((options & Inst::kOptionRep) ? "rep " : "repnz ");
686 if (inst.hasExtraReg()) {
687 ASMJIT_PROPAGATE(sb.appendString("{"));
688 ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, archId, inst.extraReg().toReg<BaseReg>()));
689 ASMJIT_PROPAGATE(sb.appendString("} "));
690 }
691 }
692
693 // REX options.
694 if (options & Inst::kOptionRex) {
695 const uint32_t kRXBWMask = Inst::kOptionOpCodeR |
696 Inst::kOptionOpCodeX |
697 Inst::kOptionOpCodeB |
698 Inst::kOptionOpCodeW ;
699 if (options & kRXBWMask) {
700 sb.appendString("rex.");
701 if (options & Inst::kOptionOpCodeR) sb.appendChar('r');
702 if (options & Inst::kOptionOpCodeX) sb.appendChar('x');
703 if (options & Inst::kOptionOpCodeB) sb.appendChar('b');
704 if (options & Inst::kOptionOpCodeW) sb.appendChar('w');
705 sb.appendChar(' ');
706 }
707 else {
708 ASMJIT_PROPAGATE(sb.appendString("rex "));
709 }
710 }
711
712 // VEX|EVEX options.
713 if (options & Inst::kOptionVex3) ASMJIT_PROPAGATE(sb.appendString("vex3 "));
714 if (options & Inst::kOptionEvex) ASMJIT_PROPAGATE(sb.appendString("evex "));
715
716 ASMJIT_PROPAGATE(InstAPI::instIdToString(archId, instId, sb));
717 }
718 else {
719 ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
720 }
721
722 for (uint32_t i = 0; i < opCount; i++) {
723 const Operand_& op = operands[i];
724 if (op.isNone()) break;
725
726 ASMJIT_PROPAGATE(sb.appendString(i == 0 ? " " : ", "));
727 ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, archId, op));
728
729 if (op.isImm() && (flags & FormatOptions::kFlagExplainImms)) {
730 uint32_t vecSize = 16;
731 for (uint32_t j = 0; j < opCount; j++)
732 if (operands[j].isReg())
733 vecSize = Support::max<uint32_t>(vecSize, operands[j].size());
734 ASMJIT_PROPAGATE(LoggingInternal_explainConst(sb, flags, instId, vecSize, op.as<Imm>()));
735 }
736
737 // Support AVX-512 masking - {k}{z}.
738 if (i == 0) {
739 if (inst.extraReg().group() == Reg::kGroupKReg) {
740 ASMJIT_PROPAGATE(sb.appendString(" {"));
741 ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, archId, inst.extraReg().type(), inst.extraReg().id()));
742 ASMJIT_PROPAGATE(sb.appendChar('}'));
743
744 if (options & Inst::kOptionZMask)
745 ASMJIT_PROPAGATE(sb.appendString("{z}"));
746 }
747 else if (options & Inst::kOptionZMask) {
748 ASMJIT_PROPAGATE(sb.appendString(" {z}"));
749 }
750 }
751
752 // Support AVX-512 broadcast - {1tox}.
753 if (op.isMem() && op.as<Mem>().hasBroadcast()) {
754 ASMJIT_PROPAGATE(sb.appendFormat(" {1to%u}", Support::bitMask(op.as<Mem>().getBroadcast())));
755 }
756 }
757
758 return kErrorOk;
759 }
760
761 ASMJIT_END_SUB_NAMESPACE
762
763 #endif // !ASMJIT_NO_LOGGING
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86LOGGING_P_H
7 #define _ASMJIT_X86_X86LOGGING_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_LOGGING
11
12 #include "../core/logging.h"
13 #include "../core/string.h"
14 #include "../x86/x86globals.h"
15
16 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
17
18 //! \addtogroup asmjit_x86
19 //! \{
20
21 // ============================================================================
22 // [asmjit::x86::LoggingInternal]
23 // ============================================================================
24
25 namespace LoggingInternal {
26 Error formatRegister(
27 String& sb,
28 uint32_t flags,
29 const BaseEmitter* emitter,
30 uint32_t archId,
31 uint32_t regType,
32 uint32_t regId) noexcept;
33
34 Error formatOperand(
35 String& sb,
36 uint32_t flags,
37 const BaseEmitter* emitter,
38 uint32_t archId,
39 const Operand_& op) noexcept;
40
41 Error formatInstruction(
42 String& sb,
43 uint32_t flags,
44 const BaseEmitter* emitter,
45 uint32_t archId,
46 const BaseInst& inst, const Operand_* operands, uint32_t opCount) noexcept;
47 };
48
49 //! \}
50
51 ASMJIT_END_SUB_NAMESPACE
52
53 #endif // !ASMJIT_NO_LOGGING
54 #endif // _ASMJIT_X86_X86LOGGING_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86OPCODE_P_H
7 #define _ASMJIT_X86_X86OPCODE_P_H
8
9 #include "../core/logging.h"
10 #include "../core/string.h"
11 #include "../x86/x86globals.h"
12
13 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
14
15 //! \cond INTERNAL
16 //! \addtogroup asmjit_x86
17 //! \{
18
19 // ============================================================================
20 // [asmjit::x86::Opcode]
21 // ============================================================================
22
23 //! Helper class to store and manipulate X86 opcodes.
24 //!
25 //! The first 8 least significant bits describe the opcode byte as defined in
26 //! ISA manuals, all other bits describe other properties like prefixes, see
27 //! `Opcode::Bits` for more information.
28 struct Opcode {
29 uint32_t v;
30
31 //! Describes a meaning of all bits of AsmJit's 32-bit opcode value.
32 //!
33 //! This schema is AsmJit specific and has been designed to allow encoding of
34 //! all X86 instructions available. X86, MMX, and SSE+ instructions always use
35 //! `MM` and `PP` fields, which are encoded to corresponding prefixes needed
36 //! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields
37 //! in a VEX prefix, and AVX-512 instructions embed `MM` and `PP` in EVEX prefix.
38 //!
39 //! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1
40 //! byte is needed by most of the instructions, 2 bytes are only used by legacy
41 //! X87-FPU instructions. This means that a second byte is free to by used by
42 //! instructions encoded by using VEX and/or EVEX prefix.
43 //!
44 //! The fields description:
45 //!
46 //! - `MM` field is used to encode prefixes needed by the instruction or as
47 //! a part of VEX/EVEX prefix. Described as `mm` and `mmmmm` in instruction
48 //! manuals.
49 //!
50 //! NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least
51 //! significant bits are used by VEX and EVEX prefixes, and additional 4th
52 //! bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own
53 //! purposes. These bits will probably never be used in future encodings as
54 //! AVX512 uses only `000mm` from `mmmmm`.
55 //!
56 //! - `PP` field is used to encode prefixes needed by the instruction or as a
57 //! part of VEX/EVEX prefix. Described as `pp` in instruction manuals.
58 //!
59 //! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It
60 //! describes vector size, which is `L.128` for XMM register, `L.256` for
61 //! for YMM register, and `L.512` for ZMM register. The `LL` field is omitted
62 //! in case that instruction supports multiple vector lengths, however, if the
63 //! instruction requires specific `L` value it must be specified as a part of
64 //! the opcode.
65 //!
66 //! NOTE: `LL` having value `11` is not defined yet.
67 //!
68 //! - `W` field is the most complicated. It was added by 64-bit architecture
69 //! to promote default operation width (instructions that perform 32-bit
70 //! operation by default require to override the width to 64-bit explicitly).
71 //! There is nothing wrong on this, however, some instructions introduced
72 //! implicit `W` override, for example a `cdqe` instruction is basically a
73 //! `cwde` instruction with overridden `W` (set to 1). There are some others
74 //! in the base X86 instruction set. More recent instruction sets started
75 //! using `W` field more often:
76 //!
77 //! - AVX instructions started using `W` field as an extended opcode for FMA,
78 //! GATHER, PERM, and other instructions. It also uses `W` field to override
79 //! the default operation width in instructions like `vmovq`.
80 //!
81 //! - AVX-512 instructions started using `W` field as an extended opcode for
82 //! all new instructions. This wouldn't have been an issue if the `W` field
83 //! of AVX-512 have matched AVX, but this is not always the case.
84 //!
85 //! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE.
86 //!
87 //! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is
88 //! defined for each instruction that is AVX-512 encodable (EVEX) and contains
89 //! a base N shift (base shift to perform the calculation). The `CDTT` field
90 //! is derived from instruction specification and describes additional shift
91 //! to calculate the final `CDSHL` that will be used in SIB byte.
92 //!
93 //! \note Don't reorder any fields here, the shifts and masks were defined
94 //! carefully to make encoding of X86 instructions fast, especially to construct
95 //! REX, VEX, and EVEX prefixes in the most efficient way. Changing values defined
96 //! by these enums many cause AsmJit to emit invalid binary representations of
97 //! instructions passed to `x86::Assembler::_emit`.
98 enum Bits : uint32_t {
99 // MM & VEX & EVEX & XOP
100 // ---------------------
101 //
102 // Two meanings:
103 // * Part of a legacy opcode (prefixes emitted before the main opcode byte).
104 // * `MMMMM` field in VEX|EVEX|XOP instruction.
105 //
106 // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use
107 // 2 bits and XOP 3 bits. AVX-512 shrinks `MMMMM` field into `MM` so it's
108 // safe to assume that bits [4:2] of `MM` field won't be used in future
109 // extensions, which will most probably use EVEX encoding. AsmJit divides
110 // MM field into this layout:
111 //
112 // [1:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and
113 // 2 bits of MM field.
114 // [2] - Used to force 3-BYTE VEX prefix, but then cleared to zero before
115 // the prefix is emitted. This bit is not used by any instruction
116 // so it can be used for any purpose by AsmJit. Also, this bit is
117 // used as an extension to `MM` field describing 0F|0F38|0F3A to also
118 // describe 0F01 as used by some legacy instructions (instructions
119 // not using VEX/EVEX prefix).
120 // [3] - Required by XOP instructions, so we use this bit also to indicate
121 // that this is a XOP opcode.
122 kMM_Shift = 8,
123 kMM_Mask = 0x1Fu << kMM_Shift,
124 kMM_00 = 0x00u << kMM_Shift,
125 kMM_0F = 0x01u << kMM_Shift,
126 kMM_0F38 = 0x02u << kMM_Shift,
127 kMM_0F3A = 0x03u << kMM_Shift, // Described also as XOP.M3 in AMD manuals.
128 kMM_0F01 = 0x04u << kMM_Shift, // AsmJit way to describe 0F01 (never VEX/EVEX).
129
130 // `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We
131 // know that only XOP encoding uses bit 0b1000 of MM field and that no VEX
132 // and EVEX instruction uses such bit, so we can use this bit to force XOP
133 // prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined
134 // in `x86assembler.cpp`.
135 kMM_XOP08 = 0x08u << kMM_Shift, // XOP.M8.
136 kMM_XOP09 = 0x09u << kMM_Shift, // XOP.M9.
137 kMM_XOP0A = 0x0Au << kMM_Shift, // XOP.MA.
138
139 kMM_IsXOP_Shift= kMM_Shift + 3,
140 kMM_IsXOP = kMM_XOP08,
141
142 // NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some
143 // cases (similar to forcing REX prefix). Force EVEX will force emitting
144 // EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have
145 // ForceEvex always set, however. instructions that can be encoded by
146 // either VEX or EVEX prefix should not have ForceEvex set.
147
148 kMM_ForceVex3 = 0x04u << kMM_Shift, // Force 3-BYTE VEX prefix.
149 kMM_ForceEvex = 0x10u << kMM_Shift, // Force 4-BYTE EVEX prefix.
150
151 // FPU_2B - Second-Byte of the Opcode used by FPU
152 // ----------------------------------------------
153 //
154 // Second byte opcode. This BYTE is ONLY used by FPU instructions and
155 // collides with 3 bits from `MM` and 5 bits from 'CDSHL' and 'CDTT'.
156 // It's fine as FPU and AVX512 flags are never used at the same time.
157 kFPU_2B_Shift = 10,
158 kFPU_2B_Mask = 0xFF << kFPU_2B_Shift,
159
160 // CDSHL & CDTT
161 // ------------
162 //
163 // Compressed displacement bits.
164 //
165 // Each opcode defines the base size (N) shift:
166 // [0]: BYTE (1 byte).
167 // [1]: WORD (2 bytes).
168 // [2]: DWORD (4 bytes - float/int32).
169 // [3]: QWORD (8 bytes - double/int64).
170 // [4]: OWORD (16 bytes - used by FV|FVM|M128).
171 //
172 // Which is then scaled by the instruction's TT (TupleType) into possible:
173 // [5]: YWORD (32 bytes)
174 // [6]: ZWORD (64 bytes)
175 //
176 // These bits are then adjusted before calling EmitModSib or EmitModVSib.
177 kCDSHL_Shift = 13,
178 kCDSHL_Mask = 0x7u << kCDSHL_Shift,
179
180 kCDSHL__ = 0x0u << kCDSHL_Shift, // Base element size not used.
181 kCDSHL_0 = 0x0u << kCDSHL_Shift, // N << 0.
182 kCDSHL_1 = 0x1u << kCDSHL_Shift, // N << 1.
183 kCDSHL_2 = 0x2u << kCDSHL_Shift, // N << 2.
184 kCDSHL_3 = 0x3u << kCDSHL_Shift, // N << 3.
185 kCDSHL_4 = 0x4u << kCDSHL_Shift, // N << 4.
186 kCDSHL_5 = 0x5u << kCDSHL_Shift, // N << 5.
187
188 // Compressed displacement tuple-type (specific to AsmJit).
189 //
190 // Since we store the base offset independently of CDTT we can simplify the
191 // number of 'TUPLE_TYPE' groups significantly and just handle special cases.
192 kCDTT_Shift = 16,
193 kCDTT_Mask = 0x3u << kCDTT_Shift,
194 kCDTT_None = 0x0u << kCDTT_Shift, // Does nothing.
195 kCDTT_ByLL = 0x1u << kCDTT_Shift, // Scales by LL (1x 2x 4x).
196 kCDTT_T1W = 0x2u << kCDTT_Shift, // Used to add 'W' to the shift.
197 kCDTT_DUP = 0x3u << kCDTT_Shift, // Special 'VMOVDDUP' case.
198
199 // Aliases that match names used in instruction manuals.
200 kCDTT__ = kCDTT_None,
201 kCDTT_FV = kCDTT_ByLL,
202 kCDTT_HV = kCDTT_ByLL,
203 kCDTT_FVM = kCDTT_ByLL,
204 kCDTT_T1S = kCDTT_None,
205 kCDTT_T1F = kCDTT_None,
206 kCDTT_T1_4X = kCDTT_None,
207 kCDTT_T2 = kCDTT_None,
208 kCDTT_T4 = kCDTT_None,
209 kCDTT_T8 = kCDTT_None,
210 kCDTT_HVM = kCDTT_ByLL,
211 kCDTT_QVM = kCDTT_ByLL,
212 kCDTT_OVM = kCDTT_ByLL,
213 kCDTT_128 = kCDTT_None,
214
215 kCDTT_T4X = kCDTT_T1_4X, // Alias to have only 3 letters.
216
217 // `O` Field in MorR/M
218 // -------------------
219
220 kO_Shift = 18,
221 kO_Mask = 0x7u << kO_Shift,
222
223 kO__ = 0x0u,
224 kO_0 = 0x0u << kO_Shift,
225 kO_1 = 0x1u << kO_Shift,
226 kO_2 = 0x2u << kO_Shift,
227 kO_3 = 0x3u << kO_Shift,
228 kO_4 = 0x4u << kO_Shift,
229 kO_5 = 0x5u << kO_Shift,
230 kO_6 = 0x6u << kO_Shift,
231 kO_7 = 0x7u << kO_Shift,
232
233 // `PP` Field
234 // ----------
235 //
236 // These fields are stored deliberately right after each other as it makes
237 // it easier to construct VEX prefix from the opcode value stored in the
238 // instruction database.
239 //
240 // Two meanings:
241 // * "PP" field in AVX/XOP/AVX-512 instruction.
242 // * Mandatory Prefix in legacy encoding.
243 //
244 // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1
245 // more bit that is used to emit 9B prefix for some X87-FPU instructions.
246
247 kPP_Shift = 21,
248 kPP_VEXMask = 0x03u << kPP_Shift, // PP field mask used by VEX/EVEX.
249 kPP_FPUMask = 0x07u << kPP_Shift, // Mask used by EMIT_PP, also includes '0x9B'.
250 kPP_00 = 0x00u << kPP_Shift,
251 kPP_66 = 0x01u << kPP_Shift,
252 kPP_F3 = 0x02u << kPP_Shift,
253 kPP_F2 = 0x03u << kPP_Shift,
254
255 kPP_9B = 0x07u << kPP_Shift, // AsmJit specific to emit FPU's '9B' byte.
256
257 // REX|VEX|EVEX B|X|R|W Bits
258 // -------------------------
259 //
260 // NOTE: REX.[B|X|R] are never stored within the opcode itself, they are
261 // reserved by AsmJit are are added dynamically to the opcode to represent
262 // [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes
263 // part of the opcode itself.
264
265 // These must be binary compatible with instruction options.
266 kREX_Shift = 24,
267 kREX_Mask = 0x0Fu << kREX_Shift,
268 kB = 0x01u << kREX_Shift, // Never stored in DB, used by encoder.
269 kX = 0x02u << kREX_Shift, // Never stored in DB, used by encoder.
270 kR = 0x04u << kREX_Shift, // Never stored in DB, used by encoder.
271 kW = 0x08u << kREX_Shift,
272 kW_Shift = kREX_Shift + 3,
273
274 kW__ = 0u << kW_Shift, // REX.W/VEX.W is unspecified.
275 kW_x = 0u << kW_Shift, // REX.W/VEX.W is based on instruction operands.
276 kW_I = 0u << kW_Shift, // REX.W/VEX.W is ignored (WIG).
277 kW_0 = 0u << kW_Shift, // REX.W/VEX.W is 0 (W0).
278 kW_1 = 1u << kW_Shift, // REX.W/VEX.W is 1 (W1).
279
280 // EVEX.W Field
281 // ------------
282 //
283 // `W` field used by EVEX instruction encoding.
284
285 kEvex_W_Shift = 28,
286 kEvex_W_Mask = 1u << kEvex_W_Shift,
287
288 kEvex_W__ = 0u << kEvex_W_Shift, // EVEX.W is unspecified (not EVEX instruction).
289 kEvex_W_x = 0u << kEvex_W_Shift, // EVEX.W is based on instruction operands.
290 kEvex_W_I = 0u << kEvex_W_Shift, // EVEX.W is ignored (WIG).
291 kEvex_W_0 = 0u << kEvex_W_Shift, // EVEX.W is 0 (W0).
292 kEvex_W_1 = 1u << kEvex_W_Shift, // EVEX.W is 1 (W1).
293
294 // `L` or `LL` field in AVX/XOP/AVX-512
295 // ------------------------------------
296 //
297 // VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix
298 // prefix makes it possible to use also `L.512`.
299 //
300 // If the instruction set manual describes an instruction by `LIG` it means
301 // that the `L` field is ignored and AsmJit defaults to `0` in such case.
302 kLL_Shift = 29,
303 kLL_Mask = 0x3u << kLL_Shift,
304
305 kLL__ = 0x0u << kLL_Shift, // LL is unspecified.
306 kLL_x = 0x0u << kLL_Shift, // LL is based on instruction operands.
307 kLL_I = 0x0u << kLL_Shift, // LL is ignored (LIG).
308 kLL_0 = 0x0u << kLL_Shift, // LL is 0 (L.128).
309 kLL_1 = 0x1u << kLL_Shift, // LL is 1 (L.256).
310 kLL_2 = 0x2u << kLL_Shift, // LL is 2 (L.512).
311
312 // Opcode Combinations
313 // -------------------
314
315 k0 = 0, // '__' (no prefix, used internally).
316 k000000 = kPP_00 | kMM_00, // '__' (no prefix, to be the same width as others).
317 k000F00 = kPP_00 | kMM_0F, // '0F'
318 k000F01 = kPP_00 | kMM_0F01, // '0F01'
319 k000F0F = kPP_00 | kMM_0F, // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect.
320 k000F38 = kPP_00 | kMM_0F38, // '0F38'
321 k000F3A = kPP_00 | kMM_0F3A, // '0F3A'
322 k660000 = kPP_66 | kMM_00, // '66'
323 k660F00 = kPP_66 | kMM_0F, // '660F'
324 k660F38 = kPP_66 | kMM_0F38, // '660F38'
325 k660F3A = kPP_66 | kMM_0F3A, // '660F3A'
326 kF20000 = kPP_F2 | kMM_00, // 'F2'
327 kF20F00 = kPP_F2 | kMM_0F, // 'F20F'
328 kF20F38 = kPP_F2 | kMM_0F38, // 'F20F38'
329 kF20F3A = kPP_F2 | kMM_0F3A, // 'F20F3A'
330 kF30000 = kPP_F3 | kMM_00, // 'F3'
331 kF30F00 = kPP_F3 | kMM_0F, // 'F30F'
332 kF30F38 = kPP_F3 | kMM_0F38, // 'F30F38'
333 kF30F3A = kPP_F3 | kMM_0F3A, // 'F30F3A'
334 kFPU_00 = kPP_00 | kMM_00, // '__' (FPU)
335 kFPU_9B = kPP_9B | kMM_00, // '9B' (FPU)
336 kXOP_M8 = kPP_00 | kMM_XOP08, // 'M8' (XOP)
337 kXOP_M9 = kPP_00 | kMM_XOP09, // 'M9' (XOP)
338 kXOP_MA = kPP_00 | kMM_XOP0A // 'MA' (XOP)
339 };
340
341 // --------------------------------------------------------------------------
342 // [Opcode Builder]
343 // --------------------------------------------------------------------------
344
345 ASMJIT_INLINE uint32_t get() const noexcept { return v; }
346
347 ASMJIT_INLINE bool hasW() const noexcept { return (v & kW) != 0; }
348 ASMJIT_INLINE bool has66h() const noexcept { return (v & kPP_66) != 0; }
349
350 ASMJIT_INLINE Opcode& add(uint32_t x) noexcept { return operator+=(x); }
351
352 ASMJIT_INLINE Opcode& add66h() noexcept { return operator|=(kPP_66); }
353 template<typename T>
354 ASMJIT_INLINE Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); }
355 template<typename T>
356 ASMJIT_INLINE Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); }
357
358 ASMJIT_INLINE Opcode& addW() noexcept { return operator|=(kW); }
359 template<typename T>
360 ASMJIT_INLINE Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); }
361 template<typename T>
362 ASMJIT_INLINE Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); }
363
364 template<typename T>
365 ASMJIT_INLINE Opcode& addPrefixBySize(T size) noexcept {
366 static const uint32_t mask[16] = {
367 0, // #0
368 0, // #1 -> nothing (already handled or not possible)
369 kPP_66, // #2 -> 66H
370 0, // #3
371 0, // #4 -> nothing
372 0, // #5
373 0, // #6
374 0, // #7
375 kW // #8 -> REX.W
376 };
377 return operator|=(mask[size & 0xF]);
378 }
379
380 template<typename T>
381 ASMJIT_INLINE Opcode& addArithBySize(T size) noexcept {
382 static const uint32_t mask[16] = {
383 0, // #0
384 0, // #1 -> nothing
385 1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H
386 0, // #3
387 1, // #4 -> NOT_BYTE_OP(1)
388 0, // #5
389 0, // #6
390 0, // #7
391 1 | kW // #8 -> NOT_BYTE_OP(1) and REX.W
392 };
393 return operator|=(mask[size & 0xF]);
394 }
395
396 //! Extract `O` field from the opcode.
397 ASMJIT_INLINE uint32_t extractO() const noexcept {
398 return (v >> kO_Shift) & 0x07;
399 }
400
401 //! Extract `REX` prefix from opcode combined with `options`.
402 ASMJIT_INLINE uint32_t extractRex(uint32_t options) const noexcept {
403 // kREX was designed in a way that when shifted there will be no bytes
404 // set except REX.[B|X|R|W]. The returned value forms a real REX prefix byte.
405 // This case should be unit-tested as well.
406 return (v | options) >> kREX_Shift;
407 }
408
409 ASMJIT_INLINE uint32_t extractLLMM(uint32_t options) const noexcept {
410 uint32_t x = v & (kLL_Mask | kMM_Mask);
411 uint32_t y = options & (Inst::kOptionVex3 | Inst::kOptionEvex);
412 return (x | y) >> kMM_Shift;
413 }
414
415 ASMJIT_INLINE Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
416 ASMJIT_INLINE Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; }
417 ASMJIT_INLINE Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; }
418 ASMJIT_INLINE Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
419 ASMJIT_INLINE Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
420 ASMJIT_INLINE Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
421
422 ASMJIT_INLINE uint32_t operator&(uint32_t x) const noexcept { return v & x; }
423 ASMJIT_INLINE uint32_t operator|(uint32_t x) const noexcept { return v | x; }
424 ASMJIT_INLINE uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
425 ASMJIT_INLINE uint32_t operator<<(uint32_t x) const noexcept { return v << x; }
426 ASMJIT_INLINE uint32_t operator>>(uint32_t x) const noexcept { return v >> x; }
427 };
428
429 //! \}
430 //! \endcond
431
432 ASMJIT_END_SUB_NAMESPACE
433
434 #endif // _ASMJIT_X86_X86OPCODE_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #ifdef ASMJIT_BUILD_X86
8
9 #include "../core/misc_p.h"
10 #include "../x86/x86operand.h"
11
12 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
13
14 // ============================================================================
15 // [asmjit::x86::OpData]
16 // ============================================================================
17
18 const OpData opData = {
19 {
20 // RegInfo[]
21 #define VALUE(X) { RegTraits<X>::kSignature }
22 { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) },
23 #undef VALUE
24
25 // RegCount[]
26 #define VALUE(X) RegTraits<X>::kCount
27 { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) },
28 #undef VALUE
29
30 // RegTypeToTypeId[]
31 #define VALUE(X) RegTraits<X>::kTypeId
32 { ASMJIT_LOOKUP_TABLE_32(VALUE, 0) }
33 #undef VALUE
34 }
35 };
36
37 // ============================================================================
38 // [asmjit::x86::Operand - Unit]
39 // ============================================================================
40
41 #if defined(ASMJIT_TEST)
42 UNIT(x86_operand) {
43 Label L(1000); // Label with some ID.
44
45 INFO("Checking basic properties of built-in X86 registers");
46 EXPECT(gpb(Gp::kIdAx) == al);
47 EXPECT(gpb(Gp::kIdBx) == bl);
48 EXPECT(gpb(Gp::kIdCx) == cl);
49 EXPECT(gpb(Gp::kIdDx) == dl);
50
51 EXPECT(gpb_lo(Gp::kIdAx) == al);
52 EXPECT(gpb_lo(Gp::kIdBx) == bl);
53 EXPECT(gpb_lo(Gp::kIdCx) == cl);
54 EXPECT(gpb_lo(Gp::kIdDx) == dl);
55
56 EXPECT(gpb_hi(Gp::kIdAx) == ah);
57 EXPECT(gpb_hi(Gp::kIdBx) == bh);
58 EXPECT(gpb_hi(Gp::kIdCx) == ch);
59 EXPECT(gpb_hi(Gp::kIdDx) == dh);
60
61 EXPECT(gpw(Gp::kIdAx) == ax);
62 EXPECT(gpw(Gp::kIdBx) == bx);
63 EXPECT(gpw(Gp::kIdCx) == cx);
64 EXPECT(gpw(Gp::kIdDx) == dx);
65
66 EXPECT(gpd(Gp::kIdAx) == eax);
67 EXPECT(gpd(Gp::kIdBx) == ebx);
68 EXPECT(gpd(Gp::kIdCx) == ecx);
69 EXPECT(gpd(Gp::kIdDx) == edx);
70
71 EXPECT(gpq(Gp::kIdAx) == rax);
72 EXPECT(gpq(Gp::kIdBx) == rbx);
73 EXPECT(gpq(Gp::kIdCx) == rcx);
74 EXPECT(gpq(Gp::kIdDx) == rdx);
75
76 EXPECT(gpb(Gp::kIdAx) != dl);
77 EXPECT(gpw(Gp::kIdBx) != cx);
78 EXPECT(gpd(Gp::kIdCx) != ebx);
79 EXPECT(gpq(Gp::kIdDx) != rax);
80
81 INFO("Checking if x86::reg(...) matches built-in IDs");
82 EXPECT(gpb(5) == bpl);
83 EXPECT(gpw(5) == bp);
84 EXPECT(gpd(5) == ebp);
85 EXPECT(gpq(5) == rbp);
86 EXPECT(st(5) == st5);
87 EXPECT(mm(5) == mm5);
88 EXPECT(k(5) == k5);
89 EXPECT(cr(5) == cr5);
90 EXPECT(dr(5) == dr5);
91 EXPECT(xmm(5) == xmm5);
92 EXPECT(ymm(5) == ymm5);
93 EXPECT(zmm(5) == zmm5);
94
95 INFO("Checking x86::Gp register properties");
96 EXPECT(Gp().isReg() == true);
97 EXPECT(eax.isReg() == true);
98 EXPECT(eax.id() == 0);
99 EXPECT(eax.size() == 4);
100 EXPECT(eax.type() == Reg::kTypeGpd);
101 EXPECT(eax.group() == Reg::kGroupGp);
102
103 INFO("Checking x86::Xmm register properties");
104 EXPECT(Xmm().isReg() == true);
105 EXPECT(xmm4.isReg() == true);
106 EXPECT(xmm4.id() == 4);
107 EXPECT(xmm4.size() == 16);
108 EXPECT(xmm4.type() == Reg::kTypeXmm);
109 EXPECT(xmm4.group() == Reg::kGroupVec);
110 EXPECT(xmm4.isVec());
111
112 INFO("Checking x86::Ymm register properties");
113 EXPECT(Ymm().isReg() == true);
114 EXPECT(ymm5.isReg() == true);
115 EXPECT(ymm5.id() == 5);
116 EXPECT(ymm5.size() == 32);
117 EXPECT(ymm5.type() == Reg::kTypeYmm);
118 EXPECT(ymm5.group() == Reg::kGroupVec);
119 EXPECT(ymm5.isVec());
120
121 INFO("Checking x86::Zmm register properties");
122 EXPECT(Zmm().isReg() == true);
123 EXPECT(zmm6.isReg() == true);
124 EXPECT(zmm6.id() == 6);
125 EXPECT(zmm6.size() == 64);
126 EXPECT(zmm6.type() == Reg::kTypeZmm);
127 EXPECT(zmm6.group() == Reg::kGroupVec);
128 EXPECT(zmm6.isVec());
129
130 INFO("Checking x86::Vec register properties");
131 EXPECT(Vec().isReg() == true);
132 // Converts a VEC register to a type of the passed register, but keeps the ID.
133 EXPECT(xmm4.cloneAs(ymm10) == ymm4);
134 EXPECT(xmm4.cloneAs(zmm11) == zmm4);
135 EXPECT(ymm5.cloneAs(xmm12) == xmm5);
136 EXPECT(ymm5.cloneAs(zmm13) == zmm5);
137 EXPECT(zmm6.cloneAs(xmm14) == xmm6);
138 EXPECT(zmm6.cloneAs(ymm15) == ymm6);
139
140 EXPECT(xmm7.xmm() == xmm7);
141 EXPECT(xmm7.ymm() == ymm7);
142 EXPECT(xmm7.zmm() == zmm7);
143
144 EXPECT(ymm7.xmm() == xmm7);
145 EXPECT(ymm7.ymm() == ymm7);
146 EXPECT(ymm7.zmm() == zmm7);
147
148 EXPECT(zmm7.xmm() == xmm7);
149 EXPECT(zmm7.ymm() == ymm7);
150 EXPECT(zmm7.zmm() == zmm7);
151
152 INFO("Checking x86::FpMm register properties");
153 EXPECT(Mm().isReg() == true);
154 EXPECT(mm2.isReg() == true);
155 EXPECT(mm2.id() == 2);
156 EXPECT(mm2.size() == 8);
157 EXPECT(mm2.type() == Reg::kTypeMm);
158 EXPECT(mm2.group() == Reg::kGroupMm);
159
160 INFO("Checking x86::KReg register properties");
161 EXPECT(KReg().isReg() == true);
162 EXPECT(k3.isReg() == true);
163 EXPECT(k3.id() == 3);
164 EXPECT(k3.size() == 0);
165 EXPECT(k3.type() == Reg::kTypeKReg);
166 EXPECT(k3.group() == Reg::kGroupKReg);
167
168 INFO("Checking x86::St register properties");
169 EXPECT(St().isReg() == true);
170 EXPECT(st1.isReg() == true);
171 EXPECT(st1.id() == 1);
172 EXPECT(st1.size() == 10);
173 EXPECT(st1.type() == Reg::kTypeSt);
174 EXPECT(st1.group() == Reg::kGroupSt);
175
176 INFO("Checking if default constructed regs behave as expected");
177 EXPECT(Reg().isValid() == false);
178 EXPECT(Gp().isValid() == false);
179 EXPECT(Xmm().isValid() == false);
180 EXPECT(Ymm().isValid() == false);
181 EXPECT(Zmm().isValid() == false);
182 EXPECT(Mm().isValid() == false);
183 EXPECT(KReg().isValid() == false);
184 EXPECT(SReg().isValid() == false);
185 EXPECT(CReg().isValid() == false);
186 EXPECT(DReg().isValid() == false);
187 EXPECT(St().isValid() == false);
188 EXPECT(Bnd().isValid() == false);
189
190 INFO("Checking x86::Mem operand");
191 Mem m;
192 EXPECT(m == Mem(), "Two default constructed x86::Mem operands must be equal");
193
194 m = ptr(L);
195 EXPECT(m.hasBase() == true);
196 EXPECT(m.hasBaseReg() == false);
197 EXPECT(m.hasBaseLabel() == true);
198 EXPECT(m.hasOffset() == false);
199 EXPECT(m.isOffset64Bit() == false);
200 EXPECT(m.offset() == 0);
201 EXPECT(m.offsetLo32() == 0);
202
203 m = ptr(0x0123456789ABCDEFu);
204 EXPECT(m.hasBase() == false);
205 EXPECT(m.hasBaseReg() == false);
206 EXPECT(m.hasIndex() == false);
207 EXPECT(m.hasIndexReg() == false);
208 EXPECT(m.hasOffset() == true);
209 EXPECT(m.isOffset64Bit() == true);
210 EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
211 EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
212 m.addOffset(1);
213 EXPECT(m.offset() == int64_t(0x0123456789ABCDF0u));
214
215 m = ptr(0x0123456789ABCDEFu, rdi, 4);
216 EXPECT(m.hasBase() == false);
217 EXPECT(m.hasBaseReg() == false);
218 EXPECT(m.hasIndex() == true);
219 EXPECT(m.hasIndexReg() == true);
220 EXPECT(m.indexType() == rdi.type());
221 EXPECT(m.indexId() == rdi.id());
222 EXPECT(m.hasOffset() == true);
223 EXPECT(m.isOffset64Bit() == true);
224 EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
225 EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
226 m.resetIndex();
227 EXPECT(m.hasIndex() == false);
228 EXPECT(m.hasIndexReg() == false);
229
230 m = ptr(rax);
231 EXPECT(m.hasBase() == true);
232 EXPECT(m.hasBaseReg() == true);
233 EXPECT(m.baseType() == rax.type());
234 EXPECT(m.baseId() == rax.id());
235 EXPECT(m.hasIndex() == false);
236 EXPECT(m.hasIndexReg() == false);
237 EXPECT(m.indexType() == 0);
238 EXPECT(m.indexId() == 0);
239 EXPECT(m.hasOffset() == false);
240 EXPECT(m.isOffset64Bit() == false);
241 EXPECT(m.offset() == 0);
242 EXPECT(m.offsetLo32() == 0);
243 m.setIndex(rsi);
244 EXPECT(m.hasIndex() == true);
245 EXPECT(m.hasIndexReg() == true);
246 EXPECT(m.indexType() == rsi.type());
247 EXPECT(m.indexId() == rsi.id());
248 }
249 #endif
250
251 ASMJIT_END_SUB_NAMESPACE
252
253 #endif // ASMJIT_BUILD_X86
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_OPERAND_H
7 #define _ASMJIT_X86_OPERAND_H
8
9 #include "../core/arch.h"
10 #include "../core/operand.h"
11 #include "../core/type.h"
12 #include "../x86/x86globals.h"
13
14 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
15
16 // ============================================================================
17 // [Forward Declarations]
18 // ============================================================================
19
20 class Reg;
21 class Mem;
22
23 class Gp;
24 class Gpb;
25 class GpbLo;
26 class GpbHi;
27 class Gpw;
28 class Gpd;
29 class Gpq;
30 class Vec;
31 class Xmm;
32 class Ymm;
33 class Zmm;
34 class Mm;
35 class KReg;
36 class SReg;
37 class CReg;
38 class DReg;
39 class St;
40 class Bnd;
41 class Rip;
42
43 //! \addtogroup asmjit_x86
44 //! \{
45
46 // ============================================================================
47 // [asmjit::x86::RegTraits]
48 // ============================================================================
49
50 //! Register traits (X86).
51 //!
52 //! Register traits contains information about a particular register type. It's
53 //! used by asmjit to setup register information on-the-fly and to populate
54 //! tables that contain register information (this way it's possible to change
55 //! register types and groups without having to reorder these tables).
56 template<uint32_t REG_TYPE>
57 struct RegTraits : public BaseRegTraits {};
58
59 //! \cond
60 // <--------------------+-----+-------------------------+------------------------+---+---+----------------+
61 // | Reg | Reg-Type | Reg-Group |Sz |Cnt| TypeId |
62 // <--------------------+-----+-------------------------+------------------------+---+---+----------------+
63 ASMJIT_DEFINE_REG_TRAITS(GpbLo, BaseReg::kTypeGp8Lo , BaseReg::kGroupGp , 1 , 16, Type::kIdI8 );
64 ASMJIT_DEFINE_REG_TRAITS(GpbHi, BaseReg::kTypeGp8Hi , BaseReg::kGroupGp , 1 , 4 , Type::kIdI8 );
65 ASMJIT_DEFINE_REG_TRAITS(Gpw , BaseReg::kTypeGp16 , BaseReg::kGroupGp , 2 , 16, Type::kIdI16 );
66 ASMJIT_DEFINE_REG_TRAITS(Gpd , BaseReg::kTypeGp32 , BaseReg::kGroupGp , 4 , 16, Type::kIdI32 );
67 ASMJIT_DEFINE_REG_TRAITS(Gpq , BaseReg::kTypeGp64 , BaseReg::kGroupGp , 8 , 16, Type::kIdI64 );
68 ASMJIT_DEFINE_REG_TRAITS(Xmm , BaseReg::kTypeVec128 , BaseReg::kGroupVec , 16, 32, Type::kIdI32x4 );
69 ASMJIT_DEFINE_REG_TRAITS(Ymm , BaseReg::kTypeVec256 , BaseReg::kGroupVec , 32, 32, Type::kIdI32x8 );
70 ASMJIT_DEFINE_REG_TRAITS(Zmm , BaseReg::kTypeVec512 , BaseReg::kGroupVec , 64, 32, Type::kIdI32x16);
71 ASMJIT_DEFINE_REG_TRAITS(Mm , BaseReg::kTypeOther0 , BaseReg::kGroupOther0 , 8 , 8 , Type::kIdMmx64 );
72 ASMJIT_DEFINE_REG_TRAITS(KReg , BaseReg::kTypeOther1 , BaseReg::kGroupOther1 , 0 , 8 , Type::kIdVoid );
73 ASMJIT_DEFINE_REG_TRAITS(SReg , BaseReg::kTypeCustom + 0, BaseReg::kGroupVirt + 0, 2 , 7 , Type::kIdVoid );
74 ASMJIT_DEFINE_REG_TRAITS(CReg , BaseReg::kTypeCustom + 1, BaseReg::kGroupVirt + 1, 0 , 16, Type::kIdVoid );
75 ASMJIT_DEFINE_REG_TRAITS(DReg , BaseReg::kTypeCustom + 2, BaseReg::kGroupVirt + 2, 0 , 16, Type::kIdVoid );
76 ASMJIT_DEFINE_REG_TRAITS(St , BaseReg::kTypeCustom + 3, BaseReg::kGroupVirt + 3, 10, 8 , Type::kIdF80 );
77 ASMJIT_DEFINE_REG_TRAITS(Bnd , BaseReg::kTypeCustom + 4, BaseReg::kGroupVirt + 4, 16, 4 , Type::kIdVoid );
78 ASMJIT_DEFINE_REG_TRAITS(Rip , BaseReg::kTypeIP , BaseReg::kGroupVirt + 5, 0 , 1 , Type::kIdVoid );
79 //! \endcond
80
81 // ============================================================================
82 // [asmjit::x86::Reg]
83 // ============================================================================
84
85 //! Register (X86).
86 class Reg : public BaseReg {
87 public:
88 ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
89
90 //! Register type.
91 enum RegType : uint32_t {
92 kTypeNone = BaseReg::kTypeNone, //!< No register type or invalid register.
93 kTypeGpbLo = BaseReg::kTypeGp8Lo, //!< Low GPB register (AL, BL, CL, DL, ...).
94 kTypeGpbHi = BaseReg::kTypeGp8Hi, //!< High GPB register (AH, BH, CH, DH only).
95 kTypeGpw = BaseReg::kTypeGp16, //!< GPW register.
96 kTypeGpd = BaseReg::kTypeGp32, //!< GPD register.
97 kTypeGpq = BaseReg::kTypeGp64, //!< GPQ register (64-bit).
98 kTypeXmm = BaseReg::kTypeVec128, //!< XMM register (SSE+).
99 kTypeYmm = BaseReg::kTypeVec256, //!< YMM register (AVX+).
100 kTypeZmm = BaseReg::kTypeVec512, //!< ZMM register (AVX512+).
101 kTypeMm = BaseReg::kTypeOther0, //!< MMX register.
102 kTypeKReg = BaseReg::kTypeOther1, //!< K register (AVX512+).
103 kTypeSReg = BaseReg::kTypeCustom+0, //!< Segment register (None, ES, CS, SS, DS, FS, GS).
104 kTypeCReg = BaseReg::kTypeCustom+1, //!< Control register (CR).
105 kTypeDReg = BaseReg::kTypeCustom+2, //!< Debug register (DR).
106 kTypeSt = BaseReg::kTypeCustom+3, //!< FPU (x87) register.
107 kTypeBnd = BaseReg::kTypeCustom+4, //!< Bound register (BND).
108 kTypeRip = BaseReg::kTypeIP, //!< Instruction pointer (EIP, RIP).
109 kTypeCount = BaseReg::kTypeCustom+5 //!< Count of register types.
110 };
111
112 //! Register group.
113 enum RegGroup : uint32_t {
114 kGroupGp = BaseReg::kGroupGp, //!< GP register group or none (universal).
115 kGroupVec = BaseReg::kGroupVec, //!< XMM|YMM|ZMM register group (universal).
116 kGroupMm = BaseReg::kGroupOther0, //!< MMX register group (legacy).
117 kGroupKReg = BaseReg::kGroupOther1, //!< K register group.
118
119 // These are not managed by BaseCompiler nor used by Func-API:
120 kGroupSReg = BaseReg::kGroupVirt+0, //!< Segment register group.
121 kGroupCReg = BaseReg::kGroupVirt+1, //!< Control register group.
122 kGroupDReg = BaseReg::kGroupVirt+2, //!< Debug register group.
123 kGroupSt = BaseReg::kGroupVirt+3, //!< FPU register group.
124 kGroupBnd = BaseReg::kGroupVirt+4, //!< Bound register group.
125 kGroupRip = BaseReg::kGroupVirt+5, //!< Instrucion pointer (IP).
126 kGroupCount //!< Count of all register groups.
127 };
128
129 //! Tests whether the register is a GPB register (8-bit).
130 constexpr bool isGpb() const noexcept { return size() == 1; }
131 //! Tests whether the register is a low GPB register (8-bit).
132 constexpr bool isGpbLo() const noexcept { return hasSignature(RegTraits<kTypeGpbLo>::kSignature); }
133 //! Tests whether the register is a high GPB register (8-bit).
134 constexpr bool isGpbHi() const noexcept { return hasSignature(RegTraits<kTypeGpbHi>::kSignature); }
135 //! Tests whether the register is a GPW register (16-bit).
136 constexpr bool isGpw() const noexcept { return hasSignature(RegTraits<kTypeGpw>::kSignature); }
137 //! Tests whether the register is a GPD register (32-bit).
138 constexpr bool isGpd() const noexcept { return hasSignature(RegTraits<kTypeGpd>::kSignature); }
139 //! Tests whether the register is a GPQ register (64-bit).
140 constexpr bool isGpq() const noexcept { return hasSignature(RegTraits<kTypeGpq>::kSignature); }
141 //! Tests whether the register is an XMM register (128-bit).
142 constexpr bool isXmm() const noexcept { return hasSignature(RegTraits<kTypeXmm>::kSignature); }
143 //! Tests whether the register is a YMM register (256-bit).
144 constexpr bool isYmm() const noexcept { return hasSignature(RegTraits<kTypeYmm>::kSignature); }
145 //! Tests whether the register is a ZMM register (512-bit).
146 constexpr bool isZmm() const noexcept { return hasSignature(RegTraits<kTypeZmm>::kSignature); }
147 //! Tests whether the register is an MMX register (64-bit).
148 constexpr bool isMm() const noexcept { return hasSignature(RegTraits<kTypeMm>::kSignature); }
149 //! Tests whether the register is a K register (64-bit).
150 constexpr bool isKReg() const noexcept { return hasSignature(RegTraits<kTypeKReg>::kSignature); }
151 //! Tests whether the register is a segment register.
152 constexpr bool isSReg() const noexcept { return hasSignature(RegTraits<kTypeSReg>::kSignature); }
153 //! Tests whether the register is a control register.
154 constexpr bool isCReg() const noexcept { return hasSignature(RegTraits<kTypeCReg>::kSignature); }
155 //! Tests whether the register is a debug register.
156 constexpr bool isDReg() const noexcept { return hasSignature(RegTraits<kTypeDReg>::kSignature); }
157 //! Tests whether the register is an FPU register (80-bit).
158 constexpr bool isSt() const noexcept { return hasSignature(RegTraits<kTypeSt>::kSignature); }
159 //! Tests whether the register is a bound register.
160 constexpr bool isBnd() const noexcept { return hasSignature(RegTraits<kTypeBnd>::kSignature); }
161 //! Tests whether the register is RIP.
162 constexpr bool isRip() const noexcept { return hasSignature(RegTraits<kTypeRip>::kSignature); }
163
164 template<uint32_t REG_TYPE>
165 inline void setRegT(uint32_t rId) noexcept {
166 setSignature(RegTraits<REG_TYPE>::kSignature);
167 setId(rId);
168 }
169
170 inline void setTypeAndId(uint32_t rType, uint32_t rId) noexcept {
171 ASMJIT_ASSERT(rType < kTypeCount);
172 setSignature(signatureOf(rType));
173 setId(rId);
174 }
175
176 static inline uint32_t groupOf(uint32_t rType) noexcept;
177 template<uint32_t REG_TYPE>
178 static inline uint32_t groupOfT() noexcept { return RegTraits<REG_TYPE>::kGroup; }
179
180 static inline uint32_t typeIdOf(uint32_t rType) noexcept;
181 template<uint32_t REG_TYPE>
182 static inline uint32_t typeIdOfT() noexcept { return RegTraits<REG_TYPE>::kTypeId; }
183
184 static inline uint32_t signatureOf(uint32_t rType) noexcept;
185 template<uint32_t REG_TYPE>
186 static inline uint32_t signatureOfT() noexcept { return RegTraits<REG_TYPE>::kSignature; }
187
188 static inline uint32_t signatureOfVecByType(uint32_t typeId) noexcept {
189 return typeId <= Type::_kIdVec128End ? RegTraits<kTypeXmm>::kSignature :
190 typeId <= Type::_kIdVec256End ? RegTraits<kTypeYmm>::kSignature : RegTraits<kTypeZmm>::kSignature;
191 }
192
193 static inline uint32_t signatureOfVecBySize(uint32_t size) noexcept {
194 return size <= 16 ? RegTraits<kTypeXmm>::kSignature :
195 size <= 32 ? RegTraits<kTypeYmm>::kSignature : RegTraits<kTypeZmm>::kSignature;
196 }
197
198 //! Tests whether the `op` operand is either a low or high 8-bit GPB register.
199 static inline bool isGpb(const Operand_& op) noexcept {
200 // Check operand type, register group, and size. Not interested in register type.
201 const uint32_t kSgn = (Operand::kOpReg << kSignatureOpShift ) |
202 (1 << kSignatureSizeShift) ;
203 return (op.signature() & (kSignatureOpMask | kSignatureSizeMask)) == kSgn;
204 }
205
206 static inline bool isGpbLo(const Operand_& op) noexcept { return op.as<Reg>().isGpbLo(); }
207 static inline bool isGpbHi(const Operand_& op) noexcept { return op.as<Reg>().isGpbHi(); }
208 static inline bool isGpw(const Operand_& op) noexcept { return op.as<Reg>().isGpw(); }
209 static inline bool isGpd(const Operand_& op) noexcept { return op.as<Reg>().isGpd(); }
210 static inline bool isGpq(const Operand_& op) noexcept { return op.as<Reg>().isGpq(); }
211 static inline bool isXmm(const Operand_& op) noexcept { return op.as<Reg>().isXmm(); }
212 static inline bool isYmm(const Operand_& op) noexcept { return op.as<Reg>().isYmm(); }
213 static inline bool isZmm(const Operand_& op) noexcept { return op.as<Reg>().isZmm(); }
214 static inline bool isMm(const Operand_& op) noexcept { return op.as<Reg>().isMm(); }
215 static inline bool isKReg(const Operand_& op) noexcept { return op.as<Reg>().isKReg(); }
216 static inline bool isSReg(const Operand_& op) noexcept { return op.as<Reg>().isSReg(); }
217 static inline bool isCReg(const Operand_& op) noexcept { return op.as<Reg>().isCReg(); }
218 static inline bool isDReg(const Operand_& op) noexcept { return op.as<Reg>().isDReg(); }
219 static inline bool isSt(const Operand_& op) noexcept { return op.as<Reg>().isSt(); }
220 static inline bool isBnd(const Operand_& op) noexcept { return op.as<Reg>().isBnd(); }
221 static inline bool isRip(const Operand_& op) noexcept { return op.as<Reg>().isRip(); }
222
223 static inline bool isGpb(const Operand_& op, uint32_t rId) noexcept { return isGpb(op) & (op.id() == rId); }
224 static inline bool isGpbLo(const Operand_& op, uint32_t rId) noexcept { return isGpbLo(op) & (op.id() == rId); }
225 static inline bool isGpbHi(const Operand_& op, uint32_t rId) noexcept { return isGpbHi(op) & (op.id() == rId); }
226 static inline bool isGpw(const Operand_& op, uint32_t rId) noexcept { return isGpw(op) & (op.id() == rId); }
227 static inline bool isGpd(const Operand_& op, uint32_t rId) noexcept { return isGpd(op) & (op.id() == rId); }
228 static inline bool isGpq(const Operand_& op, uint32_t rId) noexcept { return isGpq(op) & (op.id() == rId); }
229 static inline bool isXmm(const Operand_& op, uint32_t rId) noexcept { return isXmm(op) & (op.id() == rId); }
230 static inline bool isYmm(const Operand_& op, uint32_t rId) noexcept { return isYmm(op) & (op.id() == rId); }
231 static inline bool isZmm(const Operand_& op, uint32_t rId) noexcept { return isZmm(op) & (op.id() == rId); }
232 static inline bool isMm(const Operand_& op, uint32_t rId) noexcept { return isMm(op) & (op.id() == rId); }
233 static inline bool isKReg(const Operand_& op, uint32_t rId) noexcept { return isKReg(op) & (op.id() == rId); }
234 static inline bool isSReg(const Operand_& op, uint32_t rId) noexcept { return isSReg(op) & (op.id() == rId); }
235 static inline bool isCReg(const Operand_& op, uint32_t rId) noexcept { return isCReg(op) & (op.id() == rId); }
236 static inline bool isDReg(const Operand_& op, uint32_t rId) noexcept { return isDReg(op) & (op.id() == rId); }
237 static inline bool isSt(const Operand_& op, uint32_t rId) noexcept { return isSt(op) & (op.id() == rId); }
238 static inline bool isBnd(const Operand_& op, uint32_t rId) noexcept { return isBnd(op) & (op.id() == rId); }
239 static inline bool isRip(const Operand_& op, uint32_t rId) noexcept { return isRip(op) & (op.id() == rId); }
240 };
241
242 //! General purpose register (X86).
243 class Gp : public Reg {
244 public:
245 ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
246
247 //! Physical id (X86).
248 //!
249 //! \note Register indexes have been reduced to only support general purpose
250 //! registers. There is no need to have enumerations with number suffix that
251 //! expands to the exactly same value as the suffix value itself.
252 enum Id : uint32_t {
253 kIdAx = 0, //!< Physical id of AL|AH|AX|EAX|RAX registers.
254 kIdCx = 1, //!< Physical id of CL|CH|CX|ECX|RCX registers.
255 kIdDx = 2, //!< Physical id of DL|DH|DX|EDX|RDX registers.
256 kIdBx = 3, //!< Physical id of BL|BH|BX|EBX|RBX registers.
257 kIdSp = 4, //!< Physical id of SPL|SP|ESP|RSP registers.
258 kIdBp = 5, //!< Physical id of BPL|BP|EBP|RBP registers.
259 kIdSi = 6, //!< Physical id of SIL|SI|ESI|RSI registers.
260 kIdDi = 7, //!< Physical id of DIL|DI|EDI|RDI registers.
261 kIdR8 = 8, //!< Physical id of R8B|R8W|R8D|R8 registers (64-bit only).
262 kIdR9 = 9, //!< Physical id of R9B|R9W|R9D|R9 registers (64-bit only).
263 kIdR10 = 10, //!< Physical id of R10B|R10W|R10D|R10 registers (64-bit only).
264 kIdR11 = 11, //!< Physical id of R11B|R11W|R11D|R11 registers (64-bit only).
265 kIdR12 = 12, //!< Physical id of R12B|R12W|R12D|R12 registers (64-bit only).
266 kIdR13 = 13, //!< Physical id of R13B|R13W|R13D|R13 registers (64-bit only).
267 kIdR14 = 14, //!< Physical id of R14B|R14W|R14D|R14 registers (64-bit only).
268 kIdR15 = 15 //!< Physical id of R15B|R15W|R15D|R15 registers (64-bit only).
269 };
270
271 //! Casts this register to 8-bit (LO) part.
272 inline GpbLo r8() const noexcept;
273 //! Casts this register to 8-bit (LO) part.
274 inline GpbLo r8Lo() const noexcept;
275 //! Casts this register to 8-bit (HI) part.
276 inline GpbHi r8Hi() const noexcept;
277 //! Casts this register to 16-bit.
278 inline Gpw r16() const noexcept;
279 //! Casts this register to 32-bit.
280 inline Gpd r32() const noexcept;
281 //! Casts this register to 64-bit.
282 inline Gpq r64() const noexcept;
283 };
284
285 //! Vector register (XMM|YMM|ZMM) (X86).
286 class Vec : public Reg {
287 ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
288
289 //! Casts this register to XMM (clone).
290 inline Xmm xmm() const noexcept;
291 //! Casts this register to YMM.
292 inline Ymm ymm() const noexcept;
293 //! Casts this register to ZMM.
294 inline Zmm zmm() const noexcept;
295
296 //! Casts this register to a register that has half the size (or XMM if it's already XMM).
297 inline Vec half() const noexcept {
298 return Vec(type() == kTypeZmm ? signatureOf(kTypeYmm) : signatureOf(kTypeXmm), id());
299 }
300 };
301
302 //! Segment register (X86).
303 class SReg : public Reg {
304 ASMJIT_DEFINE_FINAL_REG(SReg, Reg, RegTraits<kTypeSReg>)
305
306 //! X86 segment id.
307 enum Id : uint32_t {
308 kIdNone = 0, //!< No segment (default).
309 kIdEs = 1, //!< ES segment.
310 kIdCs = 2, //!< CS segment.
311 kIdSs = 3, //!< SS segment.
312 kIdDs = 4, //!< DS segment.
313 kIdFs = 5, //!< FS segment.
314 kIdGs = 6, //!< GS segment.
315
316 //! Count of segment registers supported by AsmJit.
317 //!
318 //! \note X86 architecture has 6 segment registers - ES, CS, SS, DS, FS, GS.
319 //! X64 architecture lowers them down to just FS and GS. AsmJit supports 7
320 //! segment registers - all addressable in both and X64 modes and one
321 //! extra called `SReg::kIdNone`, which is AsmJit specific and means that
322 //! there is no segment register specified.
323 kIdCount = 7
324 };
325 };
326
327 //! GPB low or high register (X86).
328 class Gpb : public Gp { ASMJIT_DEFINE_ABSTRACT_REG(Gpb, Gp) };
329 //! GPB low register (X86).
330 class GpbLo : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbLo, Gpb, RegTraits<kTypeGpbLo>) };
331 //! GPB high register (X86).
332 class GpbHi : public Gpb { ASMJIT_DEFINE_FINAL_REG(GpbHi, Gpb, RegTraits<kTypeGpbHi>) };
333 //! GPW register (X86).
334 class Gpw : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpw, Gp, RegTraits<kTypeGpw>) };
335 //! GPD register (X86).
336 class Gpd : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpd, Gp, RegTraits<kTypeGpd>) };
337 //! GPQ register (X86_64).
338 class Gpq : public Gp { ASMJIT_DEFINE_FINAL_REG(Gpq, Gp, RegTraits<kTypeGpq>) };
339
340 //! 128-bit XMM register (SSE+).
341 class Xmm : public Vec {
342 ASMJIT_DEFINE_FINAL_REG(Xmm, Vec, RegTraits<kTypeXmm>)
343 //! Casts this register to a register that has half the size (XMM).
344 inline Xmm half() const noexcept { return Xmm(id()); }
345 };
346
347 //! 256-bit YMM register (AVX+).
348 class Ymm : public Vec {
349 ASMJIT_DEFINE_FINAL_REG(Ymm, Vec, RegTraits<kTypeYmm>)
350 //! Casts this register to a register that has half the size (XMM).
351 inline Xmm half() const noexcept { return Xmm(id()); }
352 };
353
354 //! 512-bit ZMM register (AVX512+).
355 class Zmm : public Vec {
356 ASMJIT_DEFINE_FINAL_REG(Zmm, Vec, RegTraits<kTypeZmm>)
357 //! Casts this register to a register that has half the size (YMM).
358 inline Ymm half() const noexcept { return Ymm(id()); }
359 };
360
361 //! 64-bit MMX register (MMX+).
362 class Mm : public Reg { ASMJIT_DEFINE_FINAL_REG(Mm, Reg, RegTraits<kTypeMm>) };
363 //! 64-bit K register (AVX512+).
364 class KReg : public Reg { ASMJIT_DEFINE_FINAL_REG(KReg, Reg, RegTraits<kTypeKReg>) };
365 //! 32-bit or 64-bit control register (X86).
366 class CReg : public Reg { ASMJIT_DEFINE_FINAL_REG(CReg, Reg, RegTraits<kTypeCReg>) };
367 //! 32-bit or 64-bit debug register (X86).
368 class DReg : public Reg { ASMJIT_DEFINE_FINAL_REG(DReg, Reg, RegTraits<kTypeDReg>) };
369 //! 80-bit FPU register (X86).
370 class St : public Reg { ASMJIT_DEFINE_FINAL_REG(St, Reg, RegTraits<kTypeSt>) };
371 //! 128-bit BND register (BND+).
372 class Bnd : public Reg { ASMJIT_DEFINE_FINAL_REG(Bnd, Reg, RegTraits<kTypeBnd>) };
373 //! RIP register (X86).
374 class Rip : public Reg { ASMJIT_DEFINE_FINAL_REG(Rip, Reg, RegTraits<kTypeRip>) };
375
376 //! \cond
377 inline GpbLo Gp::r8() const noexcept { return GpbLo(id()); }
378 inline GpbLo Gp::r8Lo() const noexcept { return GpbLo(id()); }
379 inline GpbHi Gp::r8Hi() const noexcept { return GpbHi(id()); }
380 inline Gpw Gp::r16() const noexcept { return Gpw(id()); }
381 inline Gpd Gp::r32() const noexcept { return Gpd(id()); }
382 inline Gpq Gp::r64() const noexcept { return Gpq(id()); }
383 inline Xmm Vec::xmm() const noexcept { return Xmm(id()); }
384 inline Ymm Vec::ymm() const noexcept { return Ymm(id()); }
385 inline Zmm Vec::zmm() const noexcept { return Zmm(id()); }
386 //! \endcond
387
388 // ============================================================================
389 // [asmjit::x86::Mem]
390 // ============================================================================
391
392 //! Memory operand.
393 class Mem : public BaseMem {
394 public:
395 //! Additional bits of operand's signature used by `Mem`.
396 enum AdditionalBits : uint32_t {
397 kSignatureMemSegmentShift = 16,
398 kSignatureMemSegmentMask = 0x07u << kSignatureMemSegmentShift,
399
400 kSignatureMemShiftShift = 19,
401 kSignatureMemShiftMask = 0x03u << kSignatureMemShiftShift,
402
403 kSignatureMemBroadcastShift = 21,
404 kSignatureMemBroadcastMask = 0x7u << kSignatureMemBroadcastShift
405 };
406
407 enum Broadcast : uint32_t {
408 kBroadcast1To1 = 0,
409 kBroadcast1To2 = 1,
410 kBroadcast1To4 = 2,
411 kBroadcast1To8 = 3,
412 kBroadcast1To16 = 4,
413 kBroadcast1To32 = 5,
414 kBroadcast1To64 = 6
415 };
416
417 // --------------------------------------------------------------------------
418 // [Construction / Destruction]
419 // --------------------------------------------------------------------------
420
421 //! Creates a default `Mem` operand that points to [0].
422 constexpr Mem() noexcept
423 : BaseMem() {}
424
425 constexpr Mem(const Mem& other) noexcept
426 : BaseMem(other) {}
427
428 //! \cond INTERNAL
429 //!
430 //! A constructor used internally to create `Mem` operand from `Decomposed` data.
431 constexpr explicit Mem(const Decomposed& d) noexcept
432 : BaseMem(d) {}
433 //! \endcond
434
435 constexpr Mem(const Label& base, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept
436 : BaseMem(Decomposed { Label::kLabelTag, base.id(), 0, 0, off, size, flags }) {}
437
438 constexpr Mem(const Label& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept
439 : BaseMem(Decomposed { Label::kLabelTag, base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftShift) }) {}
440
441 constexpr Mem(const BaseReg& base, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept
442 : BaseMem(Decomposed { base.type(), base.id(), 0, 0, off, size, flags }) {}
443
444 constexpr Mem(const BaseReg& base, const BaseReg& index, uint32_t shift, int32_t off, uint32_t size = 0, uint32_t flags = 0) noexcept
445 : BaseMem(Decomposed { base.type(), base.id(), index.type(), index.id(), off, size, flags | (shift << kSignatureMemShiftShift) }) {}
446
447 constexpr explicit Mem(uint64_t base, uint32_t size = 0, uint32_t flags = 0) noexcept
448 : BaseMem(Decomposed { 0, uint32_t(base >> 32), 0, 0, int32_t(uint32_t(base & 0xFFFFFFFFu)), size, flags }) {}
449
450 constexpr Mem(uint64_t base, const BaseReg& index, uint32_t shift = 0, uint32_t size = 0, uint32_t flags = 0) noexcept
451 : BaseMem(Decomposed { 0, uint32_t(base >> 32), index.type(), index.id(), int32_t(uint32_t(base & 0xFFFFFFFFu)), size, flags | (shift << kSignatureMemShiftShift) }) {}
452
453 constexpr Mem(Globals::Init_, uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3) noexcept
454 : BaseMem(Globals::Init, u0, u1, u2, u3) {}
455
456 inline explicit Mem(Globals::NoInit_) noexcept
457 : BaseMem(Globals::NoInit) {}
458
459 //! Clones the memory operand.
460 constexpr Mem clone() const noexcept { return Mem(*this); }
461
462 //! Creates a new copy of this memory operand adjusted by `off`.
463 inline Mem cloneAdjusted(int64_t off) const noexcept {
464 Mem result(*this);
465 result.addOffset(off);
466 return result;
467 }
468
469 //! Converts memory `baseType` and `baseId` to `x86::Reg` instance.
470 //!
471 //! The memory must have a valid base register otherwise the result will be wrong.
472 inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
473
474 //! Converts memory `indexType` and `indexId` to `x86::Reg` instance.
475 //!
476 //! The memory must have a valid index register otherwise the result will be wrong.
477 inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
478
479 constexpr Mem _1to1() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To1 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
480 constexpr Mem _1to2() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To2 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
481 constexpr Mem _1to4() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To4 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
482 constexpr Mem _1to8() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To8 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
483 constexpr Mem _1to16() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To16 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
484 constexpr Mem _1to32() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To32 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
485 constexpr Mem _1to64() const noexcept { return Mem(Globals::Init, (_signature & ~kSignatureMemBroadcastMask) | (kBroadcast1To64 << kSignatureMemBroadcastShift), _baseId, _data[0], _data[1]); }
486
487 // --------------------------------------------------------------------------
488 // [Mem]
489 // --------------------------------------------------------------------------
490
491 using BaseMem::setIndex;
492
493 inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
494 setIndex(index);
495 setShift(shift);
496 }
497
498 //! Tests whether the memory operand has a segment override.
499 constexpr bool hasSegment() const noexcept { return _hasSignaturePart<kSignatureMemSegmentMask>(); }
500 //! Returns the associated segment override as `SReg` operand.
501 constexpr SReg segment() const noexcept { return SReg(segmentId()); }
502 //! Returns segment override register id, see `SReg::Id`.
503 constexpr uint32_t segmentId() const noexcept { return _getSignaturePart<kSignatureMemSegmentMask>(); }
504
505 //! Sets the segment override to `seg`.
506 inline void setSegment(const SReg& seg) noexcept { setSegment(seg.id()); }
507 //! Sets the segment override to `id`.
508 inline void setSegment(uint32_t rId) noexcept { _setSignaturePart<kSignatureMemSegmentMask>(rId); }
509 //! Resets the segment override.
510 inline void resetSegment() noexcept { _setSignaturePart<kSignatureMemSegmentMask>(0); }
511
512 //! Tests whether the memory operand has shift (aka scale) value.
513 constexpr bool hasShift() const noexcept { return _hasSignaturePart<kSignatureMemShiftMask>(); }
514 //! Returns the memory operand's shift (aka scale) value.
515 constexpr uint32_t shift() const noexcept { return _getSignaturePart<kSignatureMemShiftMask>(); }
516 //! Sets the memory operand's shift (aka scale) value.
517 inline void setShift(uint32_t shift) noexcept { _setSignaturePart<kSignatureMemShiftMask>(shift); }
518 //! Resets the memory operand's shift (aka scale) value to zero.
519 inline void resetShift() noexcept { _setSignaturePart<kSignatureMemShiftMask>(0); }
520
521 //! Tests whether the memory operand has broadcast {1tox}.
522 constexpr bool hasBroadcast() const noexcept { return _hasSignaturePart<kSignatureMemBroadcastMask>(); }
523 //! Returns the memory operand's broadcast.
524 constexpr uint32_t getBroadcast() const noexcept { return _getSignaturePart<kSignatureMemBroadcastMask>(); }
525 //! Sets the memory operand's broadcast.
526 inline void setBroadcast(uint32_t bcst) noexcept { _setSignaturePart<kSignatureMemBroadcastMask>(bcst); }
527 //! Resets the memory operand's broadcast to none.
528 inline void resetBroadcast() noexcept { _setSignaturePart<kSignatureMemBroadcastMask>(0); }
529
530 // --------------------------------------------------------------------------
531 // [Operator Overload]
532 // --------------------------------------------------------------------------
533
534 inline Mem& operator=(const Mem& other) noexcept = default;
535 };
536
537 // ============================================================================
538 // [asmjit::x86::OpData]
539 // ============================================================================
540
541 struct OpData {
542 //! Information about all architecture registers.
543 ArchRegs archRegs;
544 };
545 ASMJIT_VARAPI const OpData opData;
546
547 //! \cond
548 // ... Reg methods that require `opData`.
549 inline uint32_t Reg::groupOf(uint32_t rType) noexcept {
550 ASMJIT_ASSERT(rType <= BaseReg::kTypeMax);
551 return opData.archRegs.regInfo[rType].group();
552 }
553
554 inline uint32_t Reg::typeIdOf(uint32_t rType) noexcept {
555 ASMJIT_ASSERT(rType <= BaseReg::kTypeMax);
556 return opData.archRegs.regTypeToTypeId[rType];
557 }
558
559 inline uint32_t Reg::signatureOf(uint32_t rType) noexcept {
560 ASMJIT_ASSERT(rType <= BaseReg::kTypeMax);
561 return opData.archRegs.regInfo[rType].signature();
562 }
563 //! \endcond
564
565 // ============================================================================
566 // [asmjit::x86::regs]
567 // ============================================================================
568
569 namespace regs {
570
571 //! Creates an 8-bit low GPB register operand.
572 static constexpr GpbLo gpb(uint32_t rId) noexcept { return GpbLo(rId); }
573 //! Creates an 8-bit low GPB register operand.
574 static constexpr GpbLo gpb_lo(uint32_t rId) noexcept { return GpbLo(rId); }
575 //! Creates an 8-bit high GPB register operand.
576 static constexpr GpbHi gpb_hi(uint32_t rId) noexcept { return GpbHi(rId); }
577 //! Creates a 16-bit GPW register operand.
578 static constexpr Gpw gpw(uint32_t rId) noexcept { return Gpw(rId); }
579 //! Creates a 32-bit GPD register operand.
580 static constexpr Gpd gpd(uint32_t rId) noexcept { return Gpd(rId); }
581 //! Creates a 64-bit GPQ register operand (64-bit).
582 static constexpr Gpq gpq(uint32_t rId) noexcept { return Gpq(rId); }
583 //! Creates a 128-bit XMM register operand.
584 static constexpr Xmm xmm(uint32_t rId) noexcept { return Xmm(rId); }
585 //! Creates a 256-bit YMM register operand.
586 static constexpr Ymm ymm(uint32_t rId) noexcept { return Ymm(rId); }
587 //! Creates a 512-bit ZMM register operand.
588 static constexpr Zmm zmm(uint32_t rId) noexcept { return Zmm(rId); }
589 //! Creates a 64-bit Mm register operand.
590 static constexpr Mm mm(uint32_t rId) noexcept { return Mm(rId); }
591 //! Creates a 64-bit K register operand.
592 static constexpr KReg k(uint32_t rId) noexcept { return KReg(rId); }
593 //! Creates a 32-bit or 64-bit control register operand.
594 static constexpr CReg cr(uint32_t rId) noexcept { return CReg(rId); }
595 //! Creates a 32-bit or 64-bit debug register operand.
596 static constexpr DReg dr(uint32_t rId) noexcept { return DReg(rId); }
597 //! Creates an 80-bit st register operand.
598 static constexpr St st(uint32_t rId) noexcept { return St(rId); }
599 //! Creates a 128-bit bound register operand.
600 static constexpr Bnd bnd(uint32_t rId) noexcept { return Bnd(rId); }
601
602 static constexpr Gp al(GpbLo::kSignature, Gp::kIdAx);
603 static constexpr Gp bl(GpbLo::kSignature, Gp::kIdBx);
604 static constexpr Gp cl(GpbLo::kSignature, Gp::kIdCx);
605 static constexpr Gp dl(GpbLo::kSignature, Gp::kIdDx);
606 static constexpr Gp spl(GpbLo::kSignature, Gp::kIdSp);
607 static constexpr Gp bpl(GpbLo::kSignature, Gp::kIdBp);
608 static constexpr Gp sil(GpbLo::kSignature, Gp::kIdSi);
609 static constexpr Gp dil(GpbLo::kSignature, Gp::kIdDi);
610 static constexpr Gp r8b(GpbLo::kSignature, Gp::kIdR8);
611 static constexpr Gp r9b(GpbLo::kSignature, Gp::kIdR9);
612 static constexpr Gp r10b(GpbLo::kSignature, Gp::kIdR10);
613 static constexpr Gp r11b(GpbLo::kSignature, Gp::kIdR11);
614 static constexpr Gp r12b(GpbLo::kSignature, Gp::kIdR12);
615 static constexpr Gp r13b(GpbLo::kSignature, Gp::kIdR13);
616 static constexpr Gp r14b(GpbLo::kSignature, Gp::kIdR14);
617 static constexpr Gp r15b(GpbLo::kSignature, Gp::kIdR15);
618
619 static constexpr Gp ah(GpbHi::kSignature, Gp::kIdAx);
620 static constexpr Gp bh(GpbHi::kSignature, Gp::kIdBx);
621 static constexpr Gp ch(GpbHi::kSignature, Gp::kIdCx);
622 static constexpr Gp dh(GpbHi::kSignature, Gp::kIdDx);
623
624 static constexpr Gp ax(Gpw::kSignature, Gp::kIdAx);
625 static constexpr Gp bx(Gpw::kSignature, Gp::kIdBx);
626 static constexpr Gp cx(Gpw::kSignature, Gp::kIdCx);
627 static constexpr Gp dx(Gpw::kSignature, Gp::kIdDx);
628 static constexpr Gp sp(Gpw::kSignature, Gp::kIdSp);
629 static constexpr Gp bp(Gpw::kSignature, Gp::kIdBp);
630 static constexpr Gp si(Gpw::kSignature, Gp::kIdSi);
631 static constexpr Gp di(Gpw::kSignature, Gp::kIdDi);
632 static constexpr Gp r8w(Gpw::kSignature, Gp::kIdR8);
633 static constexpr Gp r9w(Gpw::kSignature, Gp::kIdR9);
634 static constexpr Gp r10w(Gpw::kSignature, Gp::kIdR10);
635 static constexpr Gp r11w(Gpw::kSignature, Gp::kIdR11);
636 static constexpr Gp r12w(Gpw::kSignature, Gp::kIdR12);
637 static constexpr Gp r13w(Gpw::kSignature, Gp::kIdR13);
638 static constexpr Gp r14w(Gpw::kSignature, Gp::kIdR14);
639 static constexpr Gp r15w(Gpw::kSignature, Gp::kIdR15);
640
641 static constexpr Gp eax(Gpd::kSignature, Gp::kIdAx);
642 static constexpr Gp ebx(Gpd::kSignature, Gp::kIdBx);
643 static constexpr Gp ecx(Gpd::kSignature, Gp::kIdCx);
644 static constexpr Gp edx(Gpd::kSignature, Gp::kIdDx);
645 static constexpr Gp esp(Gpd::kSignature, Gp::kIdSp);
646 static constexpr Gp ebp(Gpd::kSignature, Gp::kIdBp);
647 static constexpr Gp esi(Gpd::kSignature, Gp::kIdSi);
648 static constexpr Gp edi(Gpd::kSignature, Gp::kIdDi);
649 static constexpr Gp r8d(Gpd::kSignature, Gp::kIdR8);
650 static constexpr Gp r9d(Gpd::kSignature, Gp::kIdR9);
651 static constexpr Gp r10d(Gpd::kSignature, Gp::kIdR10);
652 static constexpr Gp r11d(Gpd::kSignature, Gp::kIdR11);
653 static constexpr Gp r12d(Gpd::kSignature, Gp::kIdR12);
654 static constexpr Gp r13d(Gpd::kSignature, Gp::kIdR13);
655 static constexpr Gp r14d(Gpd::kSignature, Gp::kIdR14);
656 static constexpr Gp r15d(Gpd::kSignature, Gp::kIdR15);
657
658 static constexpr Gp rax(Gpq::kSignature, Gp::kIdAx);
659 static constexpr Gp rbx(Gpq::kSignature, Gp::kIdBx);
660 static constexpr Gp rcx(Gpq::kSignature, Gp::kIdCx);
661 static constexpr Gp rdx(Gpq::kSignature, Gp::kIdDx);
662 static constexpr Gp rsp(Gpq::kSignature, Gp::kIdSp);
663 static constexpr Gp rbp(Gpq::kSignature, Gp::kIdBp);
664 static constexpr Gp rsi(Gpq::kSignature, Gp::kIdSi);
665 static constexpr Gp rdi(Gpq::kSignature, Gp::kIdDi);
666 static constexpr Gp r8(Gpq::kSignature, Gp::kIdR8);
667 static constexpr Gp r9(Gpq::kSignature, Gp::kIdR9);
668 static constexpr Gp r10(Gpq::kSignature, Gp::kIdR10);
669 static constexpr Gp r11(Gpq::kSignature, Gp::kIdR11);
670 static constexpr Gp r12(Gpq::kSignature, Gp::kIdR12);
671 static constexpr Gp r13(Gpq::kSignature, Gp::kIdR13);
672 static constexpr Gp r14(Gpq::kSignature, Gp::kIdR14);
673 static constexpr Gp r15(Gpq::kSignature, Gp::kIdR15);
674
675 static constexpr Xmm xmm0(0);
676 static constexpr Xmm xmm1(1);
677 static constexpr Xmm xmm2(2);
678 static constexpr Xmm xmm3(3);
679 static constexpr Xmm xmm4(4);
680 static constexpr Xmm xmm5(5);
681 static constexpr Xmm xmm6(6);
682 static constexpr Xmm xmm7(7);
683 static constexpr Xmm xmm8(8);
684 static constexpr Xmm xmm9(9);
685 static constexpr Xmm xmm10(10);
686 static constexpr Xmm xmm11(11);
687 static constexpr Xmm xmm12(12);
688 static constexpr Xmm xmm13(13);
689 static constexpr Xmm xmm14(14);
690 static constexpr Xmm xmm15(15);
691 static constexpr Xmm xmm16(16);
692 static constexpr Xmm xmm17(17);
693 static constexpr Xmm xmm18(18);
694 static constexpr Xmm xmm19(19);
695 static constexpr Xmm xmm20(20);
696 static constexpr Xmm xmm21(21);
697 static constexpr Xmm xmm22(22);
698 static constexpr Xmm xmm23(23);
699 static constexpr Xmm xmm24(24);
700 static constexpr Xmm xmm25(25);
701 static constexpr Xmm xmm26(26);
702 static constexpr Xmm xmm27(27);
703 static constexpr Xmm xmm28(28);
704 static constexpr Xmm xmm29(29);
705 static constexpr Xmm xmm30(30);
706 static constexpr Xmm xmm31(31);
707
708 static constexpr Ymm ymm0(0);
709 static constexpr Ymm ymm1(1);
710 static constexpr Ymm ymm2(2);
711 static constexpr Ymm ymm3(3);
712 static constexpr Ymm ymm4(4);
713 static constexpr Ymm ymm5(5);
714 static constexpr Ymm ymm6(6);
715 static constexpr Ymm ymm7(7);
716 static constexpr Ymm ymm8(8);
717 static constexpr Ymm ymm9(9);
718 static constexpr Ymm ymm10(10);
719 static constexpr Ymm ymm11(11);
720 static constexpr Ymm ymm12(12);
721 static constexpr Ymm ymm13(13);
722 static constexpr Ymm ymm14(14);
723 static constexpr Ymm ymm15(15);
724 static constexpr Ymm ymm16(16);
725 static constexpr Ymm ymm17(17);
726 static constexpr Ymm ymm18(18);
727 static constexpr Ymm ymm19(19);
728 static constexpr Ymm ymm20(20);
729 static constexpr Ymm ymm21(21);
730 static constexpr Ymm ymm22(22);
731 static constexpr Ymm ymm23(23);
732 static constexpr Ymm ymm24(24);
733 static constexpr Ymm ymm25(25);
734 static constexpr Ymm ymm26(26);
735 static constexpr Ymm ymm27(27);
736 static constexpr Ymm ymm28(28);
737 static constexpr Ymm ymm29(29);
738 static constexpr Ymm ymm30(30);
739 static constexpr Ymm ymm31(31);
740
741 static constexpr Zmm zmm0(0);
742 static constexpr Zmm zmm1(1);
743 static constexpr Zmm zmm2(2);
744 static constexpr Zmm zmm3(3);
745 static constexpr Zmm zmm4(4);
746 static constexpr Zmm zmm5(5);
747 static constexpr Zmm zmm6(6);
748 static constexpr Zmm zmm7(7);
749 static constexpr Zmm zmm8(8);
750 static constexpr Zmm zmm9(9);
751 static constexpr Zmm zmm10(10);
752 static constexpr Zmm zmm11(11);
753 static constexpr Zmm zmm12(12);
754 static constexpr Zmm zmm13(13);
755 static constexpr Zmm zmm14(14);
756 static constexpr Zmm zmm15(15);
757 static constexpr Zmm zmm16(16);
758 static constexpr Zmm zmm17(17);
759 static constexpr Zmm zmm18(18);
760 static constexpr Zmm zmm19(19);
761 static constexpr Zmm zmm20(20);
762 static constexpr Zmm zmm21(21);
763 static constexpr Zmm zmm22(22);
764 static constexpr Zmm zmm23(23);
765 static constexpr Zmm zmm24(24);
766 static constexpr Zmm zmm25(25);
767 static constexpr Zmm zmm26(26);
768 static constexpr Zmm zmm27(27);
769 static constexpr Zmm zmm28(28);
770 static constexpr Zmm zmm29(29);
771 static constexpr Zmm zmm30(30);
772 static constexpr Zmm zmm31(31);
773
774 static constexpr Mm mm0(0);
775 static constexpr Mm mm1(1);
776 static constexpr Mm mm2(2);
777 static constexpr Mm mm3(3);
778 static constexpr Mm mm4(4);
779 static constexpr Mm mm5(5);
780 static constexpr Mm mm6(6);
781 static constexpr Mm mm7(7);
782
783 static constexpr KReg k0(0);
784 static constexpr KReg k1(1);
785 static constexpr KReg k2(2);
786 static constexpr KReg k3(3);
787 static constexpr KReg k4(4);
788 static constexpr KReg k5(5);
789 static constexpr KReg k6(6);
790 static constexpr KReg k7(7);
791
792 static constexpr SReg no_seg(SReg::kIdNone);
793 static constexpr SReg es(SReg::kIdEs);
794 static constexpr SReg cs(SReg::kIdCs);
795 static constexpr SReg ss(SReg::kIdSs);
796 static constexpr SReg ds(SReg::kIdDs);
797 static constexpr SReg fs(SReg::kIdFs);
798 static constexpr SReg gs(SReg::kIdGs);
799
800 static constexpr CReg cr0(0);
801 static constexpr CReg cr1(1);
802 static constexpr CReg cr2(2);
803 static constexpr CReg cr3(3);
804 static constexpr CReg cr4(4);
805 static constexpr CReg cr5(5);
806 static constexpr CReg cr6(6);
807 static constexpr CReg cr7(7);
808 static constexpr CReg cr8(8);
809 static constexpr CReg cr9(9);
810 static constexpr CReg cr10(10);
811 static constexpr CReg cr11(11);
812 static constexpr CReg cr12(12);
813 static constexpr CReg cr13(13);
814 static constexpr CReg cr14(14);
815 static constexpr CReg cr15(15);
816
817 static constexpr DReg dr0(0);
818 static constexpr DReg dr1(1);
819 static constexpr DReg dr2(2);
820 static constexpr DReg dr3(3);
821 static constexpr DReg dr4(4);
822 static constexpr DReg dr5(5);
823 static constexpr DReg dr6(6);
824 static constexpr DReg dr7(7);
825 static constexpr DReg dr8(8);
826 static constexpr DReg dr9(9);
827 static constexpr DReg dr10(10);
828 static constexpr DReg dr11(11);
829 static constexpr DReg dr12(12);
830 static constexpr DReg dr13(13);
831 static constexpr DReg dr14(14);
832 static constexpr DReg dr15(15);
833
834 static constexpr St st0(0);
835 static constexpr St st1(1);
836 static constexpr St st2(2);
837 static constexpr St st3(3);
838 static constexpr St st4(4);
839 static constexpr St st5(5);
840 static constexpr St st6(6);
841 static constexpr St st7(7);
842
843 static constexpr Bnd bnd0(0);
844 static constexpr Bnd bnd1(1);
845 static constexpr Bnd bnd2(2);
846 static constexpr Bnd bnd3(3);
847
848 static constexpr Rip rip(0);
849
850 } // {regs}
851
852 // Make `x86::regs` accessible through `x86` namespace as well.
853 using namespace regs;
854
855 // ============================================================================
856 // [asmjit::x86::ptr]
857 // ============================================================================
858
859 //! Creates `[base.reg + offset]` memory operand.
860 static constexpr Mem ptr(const Gp& base, int32_t offset = 0, uint32_t size = 0) noexcept {
861 return Mem(base, offset, size);
862 }
863 //! Creates `[base.reg + (index << shift) + offset]` memory operand (scalar index).
864 static constexpr Mem ptr(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
865 return Mem(base, index, shift, offset, size);
866 }
867 //! Creates `[base.reg + (index << shift) + offset]` memory operand (vector index).
868 static constexpr Mem ptr(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
869 return Mem(base, index, shift, offset, size);
870 }
871
872 //! Creates `[base + offset]` memory operand.
873 static constexpr Mem ptr(const Label& base, int32_t offset = 0, uint32_t size = 0) noexcept {
874 return Mem(base, offset, size);
875 }
876 //! Creates `[base + (index << shift) + offset]` memory operand.
877 static constexpr Mem ptr(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
878 return Mem(base, index, shift, offset, size);
879 }
880 //! Creates `[base + (index << shift) + offset]` memory operand.
881 static constexpr Mem ptr(const Label& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0, uint32_t size = 0) noexcept {
882 return Mem(base, index, shift, offset, size);
883 }
884
885 //! Creates `[rip + offset]` memory operand.
886 static constexpr Mem ptr(const Rip& rip_, int32_t offset = 0, uint32_t size = 0) noexcept {
887 return Mem(rip_, offset, size);
888 }
889
890 //! Creates `[base]` absolute memory operand.
891 static constexpr Mem ptr(uint64_t base, uint32_t size = 0) noexcept {
892 return Mem(base, size);
893 }
894 //! Creates `[base + (index.reg << shift)]` absolute memory operand.
895 static constexpr Mem ptr(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
896 return Mem(base, index, shift, size);
897 }
898 //! Creates `[base + (index.reg << shift)]` absolute memory operand.
899 static constexpr Mem ptr(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
900 return Mem(base, index, shift, size);
901 }
902
903 //! Creates `[base]` absolute memory operand (absolute).
904 static constexpr Mem ptr_abs(uint64_t base, uint32_t size = 0) noexcept {
905 return Mem(base, size, BaseMem::kSignatureMemAbs);
906 }
907 //! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
908 static constexpr Mem ptr_abs(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
909 return Mem(base, index, shift, size, BaseMem::kSignatureMemAbs);
910 }
911 //! Creates `[base + (index.reg << shift)]` absolute memory operand (absolute).
912 static constexpr Mem ptr_abs(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
913 return Mem(base, index, shift, size, BaseMem::kSignatureMemAbs);
914 }
915
916 //! Creates `[base]` relative memory operand (relative).
917 static constexpr Mem ptr_rel(uint64_t base, uint32_t size = 0) noexcept {
918 return Mem(base, size, BaseMem::kSignatureMemRel);
919 }
920 //! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
921 static constexpr Mem ptr_rel(uint64_t base, const Reg& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
922 return Mem(base, index, shift, size, BaseMem::kSignatureMemRel);
923 }
924 //! Creates `[base + (index.reg << shift)]` relative memory operand (relative).
925 static constexpr Mem ptr_rel(uint64_t base, const Vec& index, uint32_t shift = 0, uint32_t size = 0) noexcept {
926 return Mem(base, index, shift, size, BaseMem::kSignatureMemRel);
927 }
928
929 #define ASMJIT_MEM_PTR(FUNC, SIZE) \
930 /*! Creates `[base + offset]` memory operand. */ \
931 static constexpr Mem FUNC(const Gp& base, int32_t offset = 0) noexcept { \
932 return Mem(base, offset, SIZE); \
933 } \
934 /*! Creates `[base + (index << shift) + offset]` memory operand. */ \
935 static constexpr Mem FUNC(const Gp& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
936 return Mem(base, index, shift, offset, SIZE); \
937 } \
938 /*! Creates `[base + (vec_index << shift) + offset]` memory operand. */ \
939 static constexpr Mem FUNC(const Gp& base, const Vec& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
940 return Mem(base, index, shift, offset, SIZE); \
941 } \
942 /*! Creates `[base + offset]` memory operand. */ \
943 static constexpr Mem FUNC(const Label& base, int32_t offset = 0) noexcept { \
944 return Mem(base, offset, SIZE); \
945 } \
946 /*! Creates `[base + (index << shift) + offset]` memory operand. */ \
947 static constexpr Mem FUNC(const Label& base, const Gp& index, uint32_t shift = 0, int32_t offset = 0) noexcept { \
948 return Mem(base, index, shift, offset, SIZE); \
949 } \
950 /*! Creates `[rip + offset]` memory operand. */ \
951 static constexpr Mem FUNC(const Rip& rip_, int32_t offset = 0) noexcept { \
952 return Mem(rip_, offset, SIZE); \
953 } \
954 /*! Creates `[ptr]` memory operand. */ \
955 static constexpr Mem FUNC(uint64_t base) noexcept { \
956 return Mem(base, SIZE); \
957 } \
958 /*! Creates `[base + (index << shift) + offset]` memory operand. */ \
959 static constexpr Mem FUNC(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
960 return Mem(base, index, shift, SIZE); \
961 } \
962 /*! Creates `[base + (vec_index << shift) + offset]` memory operand. */ \
963 static constexpr Mem FUNC(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
964 return Mem(base, index, shift, SIZE); \
965 } \
966 \
967 /*! Creates `[base + offset]` memory operand (absolute). */ \
968 static constexpr Mem FUNC##_abs(uint64_t base) noexcept { \
969 return Mem(base, SIZE, BaseMem::kSignatureMemAbs); \
970 } \
971 /*! Creates `[base + (index << shift) + offset]` memory operand (absolute). */ \
972 static constexpr Mem FUNC##_abs(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
973 return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemAbs); \
974 } \
975 /*! Creates `[base + (vec_index << shift) + offset]` memory operand (absolute). */ \
976 static constexpr Mem FUNC##_abs(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
977 return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemAbs); \
978 } \
979 \
980 /*! Creates `[base + offset]` memory operand (relative). */ \
981 static constexpr Mem FUNC##_rel(uint64_t base) noexcept { \
982 return Mem(base, SIZE, BaseMem::kSignatureMemRel); \
983 } \
984 /*! Creates `[base + (index << shift) + offset]` memory operand (relative). */ \
985 static constexpr Mem FUNC##_rel(uint64_t base, const Gp& index, uint32_t shift = 0) noexcept { \
986 return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemRel); \
987 } \
988 /*! Creates `[base + (vec_index << shift) + offset]` memory operand (relative). */ \
989 static constexpr Mem FUNC##_rel(uint64_t base, const Vec& index, uint32_t shift = 0) noexcept { \
990 return Mem(base, index, shift, SIZE, BaseMem::kSignatureMemRel); \
991 }
992
993 // Definition of memory operand constructors that use platform independent naming.
994 ASMJIT_MEM_PTR(ptr_8, 1)
995 ASMJIT_MEM_PTR(ptr_16, 2)
996 ASMJIT_MEM_PTR(ptr_32, 4)
997 ASMJIT_MEM_PTR(ptr_48, 6)
998 ASMJIT_MEM_PTR(ptr_64, 8)
999 ASMJIT_MEM_PTR(ptr_80, 10)
1000 ASMJIT_MEM_PTR(ptr_128, 16)
1001 ASMJIT_MEM_PTR(ptr_256, 32)
1002 ASMJIT_MEM_PTR(ptr_512, 64)
1003
1004 // Definition of memory operand constructors that use X86-specific convention.
1005 ASMJIT_MEM_PTR(byte_ptr, 1)
1006 ASMJIT_MEM_PTR(word_ptr, 2)
1007 ASMJIT_MEM_PTR(dword_ptr, 4)
1008 ASMJIT_MEM_PTR(qword_ptr, 8)
1009 ASMJIT_MEM_PTR(tword_ptr, 10)
1010 ASMJIT_MEM_PTR(oword_ptr, 16)
1011 ASMJIT_MEM_PTR(dqword_ptr, 16)
1012 ASMJIT_MEM_PTR(qqword_ptr, 32)
1013 ASMJIT_MEM_PTR(xmmword_ptr, 16)
1014 ASMJIT_MEM_PTR(ymmword_ptr, 32)
1015 ASMJIT_MEM_PTR(zmmword_ptr, 64)
1016
1017 #undef ASMJIT_MEM_PTR
1018
1019 //! \}
1020
1021 ASMJIT_END_SUB_NAMESPACE
1022
1023 // ============================================================================
1024 // [asmjit::Type::IdOfT<x86::Reg>]
1025 // ============================================================================
1026
1027 //! \cond INTERNAL
1028
1029 ASMJIT_BEGIN_NAMESPACE
1030 ASMJIT_DEFINE_TYPE_ID(x86::Gpb, kIdI8);
1031 ASMJIT_DEFINE_TYPE_ID(x86::Gpw, kIdI16);
1032 ASMJIT_DEFINE_TYPE_ID(x86::Gpd, kIdI32);
1033 ASMJIT_DEFINE_TYPE_ID(x86::Gpq, kIdI64);
1034 ASMJIT_DEFINE_TYPE_ID(x86::Mm , kIdMmx64);
1035 ASMJIT_DEFINE_TYPE_ID(x86::Xmm, kIdI32x4);
1036 ASMJIT_DEFINE_TYPE_ID(x86::Ymm, kIdI32x8);
1037 ASMJIT_DEFINE_TYPE_ID(x86::Zmm, kIdI32x16);
1038 ASMJIT_END_NAMESPACE
1039
1040 //! \endcond
1041
1042 #endif // _ASMJIT_X86_OPERAND_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "../core/api-build_p.h"
7 #if defined(ASMJIT_BUILD_X86) && !defined(ASMJIT_NO_COMPILER)
8
9 #include "../core/cpuinfo.h"
10 #include "../core/support.h"
11 #include "../core/type.h"
12 #include "../x86/x86assembler.h"
13 #include "../x86/x86compiler.h"
14 #include "../x86/x86instapi_p.h"
15 #include "../x86/x86instdb_p.h"
16 #include "../x86/x86internal_p.h"
17 #include "../x86/x86rapass_p.h"
18
19 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
20
21 // ============================================================================
22 // [asmjit::x86::X86RAPass - Helpers]
23 // ============================================================================
24
25 static ASMJIT_INLINE uint64_t raImmMaskFromSize(uint32_t size) noexcept {
26 ASMJIT_ASSERT(size > 0 && size < 256);
27 static const uint64_t masks[] = {
28 0x00000000000000FFu, // 1
29 0x000000000000FFFFu, // 2
30 0x00000000FFFFFFFFu, // 4
31 0xFFFFFFFFFFFFFFFFu, // 8
32 0x0000000000000000u, // 16
33 0x0000000000000000u, // 32
34 0x0000000000000000u, // 64
35 0x0000000000000000u, // 128
36 0x0000000000000000u // 256
37 };
38 return masks[Support::ctz(size)];
39 }
40
41 static ASMJIT_INLINE uint32_t raUseOutFlagsFromRWFlags(uint32_t rwFlags) noexcept {
42 static const uint32_t map[] = {
43 0,
44 RATiedReg::kRead | RATiedReg::kUse, // kRead
45 RATiedReg::kWrite | RATiedReg::kOut, // kWrite
46 RATiedReg::kRW | RATiedReg::kUse, // kRW
47 0,
48 RATiedReg::kRead | RATiedReg::kUse | RATiedReg::kUseRM, // kRead | kRegMem
49 RATiedReg::kWrite | RATiedReg::kOut | RATiedReg::kOutRM, // kWrite | kRegMem
50 RATiedReg::kRW | RATiedReg::kUse | RATiedReg::kUseRM // kRW | kRegMem
51 };
52
53 return map[rwFlags & (OpRWInfo::kRW | OpRWInfo::kRegMem)];
54 }
55
56 static ASMJIT_INLINE uint32_t raRegRwFlags(uint32_t flags) noexcept {
57 return raUseOutFlagsFromRWFlags(flags);
58 }
59
60 static ASMJIT_INLINE uint32_t raMemBaseRwFlags(uint32_t flags) noexcept {
61 constexpr uint32_t shift = Support::constCtz(OpRWInfo::kMemBaseRW);
62 return raUseOutFlagsFromRWFlags((flags >> shift) & OpRWInfo::kRW);
63 }
64
65 static ASMJIT_INLINE uint32_t raMemIndexRwFlags(uint32_t flags) noexcept {
66 constexpr uint32_t shift = Support::constCtz(OpRWInfo::kMemIndexRW);
67 return raUseOutFlagsFromRWFlags((flags >> shift) & OpRWInfo::kRW);
68 }
69
70 // ============================================================================
71 // [asmjit::x86::X86RACFGBuilder]
72 // ============================================================================
73
74 class X86RACFGBuilder : public RACFGBuilder<X86RACFGBuilder> {
75 public:
76 uint32_t _archId;
77 bool _is64Bit;
78 bool _avxEnabled;
79
80 inline X86RACFGBuilder(X86RAPass* pass) noexcept
81 : RACFGBuilder<X86RACFGBuilder>(pass),
82 _archId(pass->cc()->archId()),
83 _is64Bit(pass->gpSize() == 8),
84 _avxEnabled(pass->_avxEnabled) {}
85
86 inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
87
88 inline uint32_t choose(uint32_t sseInst, uint32_t avxInst) const noexcept {
89 return _avxEnabled ? avxInst : sseInst;
90 }
91
92 Error onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& ib) noexcept;
93
94 Error onBeforeCall(FuncCallNode* call) noexcept;
95 Error onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept;
96
97 Error moveImmToRegArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
98 Error moveImmToStackArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_) noexcept;
99 Error moveRegToStackArg(FuncCallNode* call, const FuncValue& arg, const BaseReg& reg) noexcept;
100
101 Error onBeforeRet(FuncRetNode* funcRet) noexcept;
102 Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
103 };
104
105 // ============================================================================
106 // [asmjit::x86::X86RACFGBuilder - OnInst]
107 // ============================================================================
108
109 Error X86RACFGBuilder::onInst(InstNode* inst, uint32_t& controlType, RAInstBuilder& ib) noexcept {
110 InstRWInfo rwInfo;
111
112 uint32_t instId = inst->id();
113 if (Inst::isDefinedId(instId)) {
114 uint32_t opCount = inst->opCount();
115 const Operand* opArray = inst->operands();
116 ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_archId, inst->baseInst(), opArray, opCount, rwInfo));
117
118 const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
119 bool hasGpbHiConstraint = false;
120 uint32_t singleRegOps = 0;
121
122 if (opCount) {
123 for (uint32_t i = 0; i < opCount; i++) {
124 const Operand& op = opArray[i];
125 const OpRWInfo& opRwInfo = rwInfo.operand(i);
126
127 if (op.isReg()) {
128 // Register Operand
129 // ----------------
130 const Reg& reg = op.as<Reg>();
131
132 uint32_t flags = raRegRwFlags(opRwInfo.opFlags());
133 uint32_t allowedRegs = 0xFFFFFFFFu;
134
135 // X86-specific constraints related to LO|HI general purpose registers.
136 // This is only required when the register is part of the encoding. If
137 // the register is fixed we won't restrict anything as it doesn't restrict
138 // encoding of other registers.
139 if (reg.isGpb() && !(opRwInfo.opFlags() & OpRWInfo::kRegPhysId)) {
140 flags |= RATiedReg::kX86Gpb;
141 if (!_is64Bit) {
142 // Restrict to first four - AL|AH|BL|BH|CL|CH|DL|DH. In 32-bit mode
143 // it's not possible to access SIL|DIL, etc, so this is just enough.
144 allowedRegs = 0x0Fu;
145 }
146 else {
147 // If we encountered GPB-HI register the situation is much more
148 // complicated than in 32-bit mode. We need to patch all registers
149 // to not use ID higher than 7 and all GPB-LO registers to not use
150 // index higher than 3. Instead of doing the patching here we just
151 // set a flag and will do it later, to not complicate this loop.
152 if (reg.isGpbHi()) {
153 hasGpbHiConstraint = true;
154 allowedRegs = 0x0Fu;
155 }
156 }
157 }
158
159 uint32_t vIndex = Operand::virtIdToIndex(reg.id());
160 if (vIndex < Operand::kVirtIdCount) {
161 RAWorkReg* workReg;
162 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
163
164 // Use RW instead of Write in case that not the whole register is
165 // overwritten. This is important for liveness as we cannot kill a
166 // register that will be used. For example `mov al, 0xFF` is not a
167 // write-only operation if user allocated the whole `rax` register.
168 if ((flags & RATiedReg::kRW) == RATiedReg::kWrite) {
169 if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
170 // Not write-only operation.
171 flags = (flags & ~RATiedReg::kOut) | (RATiedReg::kRead | RATiedReg::kUse);
172 }
173 }
174
175 // Do not use RegMem flag if changing Reg to Mem requires additional
176 // CPU feature that may not be enabled.
177 if (rwInfo.rmFeature() && (flags & (RATiedReg::kUseRM | RATiedReg::kOutRM))) {
178 flags &= ~(RATiedReg::kUseRM | RATiedReg::kOutRM);
179 }
180
181 uint32_t group = workReg->group();
182 uint32_t allocable = _pass->_availableRegs[group] & allowedRegs;
183
184 uint32_t useId = BaseReg::kIdBad;
185 uint32_t outId = BaseReg::kIdBad;
186
187 uint32_t useRewriteMask = 0;
188 uint32_t outRewriteMask = 0;
189
190 if (flags & RATiedReg::kUse) {
191 useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
192 if (opRwInfo.opFlags() & OpRWInfo::kRegPhysId) {
193 useId = opRwInfo.physId();
194 flags |= RATiedReg::kUseFixed;
195 }
196 }
197 else {
198 outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
199 if (opRwInfo.opFlags() & OpRWInfo::kRegPhysId) {
200 outId = opRwInfo.physId();
201 flags |= RATiedReg::kOutFixed;
202 }
203 }
204
205 ASMJIT_PROPAGATE(ib.add(workReg, flags, allocable, useId, useRewriteMask, outId, outRewriteMask, opRwInfo.rmSize()));
206 if (singleRegOps == i)
207 singleRegOps++;
208 }
209 }
210 else if (op.isMem()) {
211 // Memory Operand
212 // --------------
213 const Mem& mem = op.as<Mem>();
214 ib.addForbiddenFlags(RATiedReg::kUseRM | RATiedReg::kOutRM);
215
216 if (mem.isRegHome()) {
217 RAWorkReg* workReg;
218 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
219 _pass->getOrCreateStackSlot(workReg);
220 }
221 else if (mem.hasBaseReg()) {
222 uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
223 if (vIndex < Operand::kVirtIdCount) {
224 RAWorkReg* workReg;
225 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
226
227 uint32_t flags = raMemBaseRwFlags(opRwInfo.opFlags());
228 uint32_t group = workReg->group();
229 uint32_t allocable = _pass->_availableRegs[group];
230
231 uint32_t useId = BaseReg::kIdBad;
232 uint32_t outId = BaseReg::kIdBad;
233
234 uint32_t useRewriteMask = 0;
235 uint32_t outRewriteMask = 0;
236
237 if (flags & RATiedReg::kUse) {
238 useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
239 if (opRwInfo.opFlags() & OpRWInfo::kMemPhysId) {
240 useId = opRwInfo.physId();
241 flags |= RATiedReg::kUseFixed;
242 }
243 }
244 else {
245 outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
246 if (opRwInfo.opFlags() & OpRWInfo::kMemPhysId) {
247 outId = opRwInfo.physId();
248 flags |= RATiedReg::kOutFixed;
249 }
250 }
251
252 ASMJIT_PROPAGATE(ib.add(workReg, flags, allocable, useId, useRewriteMask, outId, outRewriteMask));
253 }
254 }
255
256 if (mem.hasIndexReg()) {
257 uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
258 if (vIndex < Operand::kVirtIdCount) {
259 RAWorkReg* workReg;
260 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
261
262 uint32_t flags = raMemIndexRwFlags(opRwInfo.opFlags());
263 uint32_t group = workReg->group();
264 uint32_t allocable = _pass->_availableRegs[group];
265
266 // Index registers have never fixed id on X86/x64.
267 const uint32_t useId = BaseReg::kIdBad;
268 const uint32_t outId = BaseReg::kIdBad;
269
270 uint32_t useRewriteMask = 0;
271 uint32_t outRewriteMask = 0;
272
273 if (flags & RATiedReg::kUse)
274 useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
275 else
276 outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
277
278 ASMJIT_PROPAGATE(ib.add(workReg, RATiedReg::kUse | RATiedReg::kRead, allocable, useId, useRewriteMask, outId, outRewriteMask));
279 }
280 }
281 }
282 }
283 }
284
285 // Handle extra operand (either REP {cx|ecx|rcx} or AVX-512 {k} selector).
286 if (inst->hasExtraReg()) {
287 uint32_t vIndex = Operand::virtIdToIndex(inst->extraReg().id());
288 if (vIndex < Operand::kVirtIdCount) {
289 RAWorkReg* workReg;
290 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
291
292 uint32_t group = workReg->group();
293 uint32_t rewriteMask = Support::bitMask(inst->getRewriteIndex(&inst->extraReg()._id));
294
295 if (group == Gp::kGroupKReg) {
296 // AVX-512 mask selector {k} register - read-only, allocable to any register except {k0}.
297 uint32_t allocableRegs= _pass->_availableRegs[group] & ~Support::bitMask(0);
298 ASMJIT_PROPAGATE(ib.add(workReg, RATiedReg::kUse | RATiedReg::kRead, allocableRegs, BaseReg::kIdBad, rewriteMask, BaseReg::kIdBad, 0));
299 singleRegOps = 0;
300 }
301 else {
302 // REP {cx|ecx|rcx} register - read & write, allocable to {cx|ecx|rcx} only.
303 ASMJIT_PROPAGATE(ib.add(workReg, RATiedReg::kUse | RATiedReg::kRW, 0, Gp::kIdCx, rewriteMask, Gp::kIdBad, 0));
304 }
305 }
306 else {
307 uint32_t group = inst->extraReg().group();
308 if (group == Gp::kGroupKReg && inst->extraReg().id() != 0)
309 singleRegOps = 0;
310 }
311 }
312
313 // Handle X86 constraints.
314 if (hasGpbHiConstraint) {
315 for (uint32_t i = 0; i < ib.tiedRegCount(); i++) {
316 RATiedReg* tiedReg = ib[i];
317 tiedReg->_allocableRegs &= tiedReg->hasFlag(RATiedReg::kX86Gpb) ? 0x0Fu : 0xFFu;
318 }
319 }
320
321 if (ib.tiedRegCount() == 1) {
322 // Handle special cases of some instructions where all operands share the same
323 // register. In such case the single operand becomes read-only or write-only.
324 uint32_t singleRegCase = InstDB::kSingleRegNone;
325 if (singleRegOps == opCount) {
326 singleRegCase = instInfo.singleRegCase();
327 }
328 else if (opCount == 2 && inst->opType(1).isImm()) {
329 // Handle some tricks used by X86 asm.
330 const BaseReg& reg = inst->opType(0).as<BaseReg>();
331 const Imm& imm = inst->opType(1).as<Imm>();
332
333 const RAWorkReg* workReg = _pass->workRegById(ib[0]->workId());
334 uint32_t workRegSize = workReg->info().size();
335
336 switch (inst->id()) {
337 case Inst::kIdOr: {
338 // Sets the value of the destination register to -1, previous content unused.
339 if (reg.size() >= 4 || reg.size() >= workRegSize) {
340 if (imm.i64() == -1 || imm.u64() == raImmMaskFromSize(reg.size()))
341 singleRegCase = InstDB::kSingleRegWO;
342 }
343 ASMJIT_FALLTHROUGH;
344 }
345
346 case Inst::kIdAdd:
347 case Inst::kIdAnd:
348 case Inst::kIdRol:
349 case Inst::kIdRor:
350 case Inst::kIdSar:
351 case Inst::kIdShl:
352 case Inst::kIdShr:
353 case Inst::kIdSub:
354 case Inst::kIdXor: {
355 // Updates [E|R]FLAGS without changing the content.
356 if (reg.size() != 4 || reg.size() >= workRegSize) {
357 if (imm.u64() == 0)
358 singleRegCase = InstDB::kSingleRegRO;
359 }
360 break;
361 }
362 }
363 }
364
365 switch (singleRegCase) {
366 case InstDB::kSingleRegNone:
367 break;
368 case InstDB::kSingleRegRO:
369 ib[0]->makeReadOnly();
370 break;
371 case InstDB::kSingleRegWO:
372 ib[0]->makeWriteOnly();
373 break;
374 }
375 }
376
377 controlType = instInfo.controlType();
378 }
379
380 return kErrorOk;
381 }
382
383 // ============================================================================
384 // [asmjit::x86::X86RACFGBuilder - OnCall]
385 // ============================================================================
386
387 Error X86RACFGBuilder::onBeforeCall(FuncCallNode* call) noexcept {
388 uint32_t argCount = call->argCount();
389 uint32_t retCount = call->retCount();
390 const FuncDetail& fd = call->detail();
391
392 cc()->_setCursor(call->prev());
393
394 for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
395 for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) {
396 if (!fd.hasArg(argIndex + argHi))
397 continue;
398
399 const FuncValue& arg = fd.arg(argIndex + argHi);
400 const Operand& op = call->arg(argIndex + argHi);
401
402 if (op.isNone())
403 continue;
404
405 if (op.isReg()) {
406 const Reg& reg = op.as<Reg>();
407 RAWorkReg* workReg;
408 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
409
410 if (arg.isReg()) {
411 uint32_t regGroup = workReg->group();
412 uint32_t argGroup = Reg::groupOf(arg.regType());
413
414 if (regGroup != argGroup) {
415 // TODO:
416 ASMJIT_ASSERT(false);
417 }
418 }
419 else {
420 ASMJIT_PROPAGATE(moveRegToStackArg(call, arg, op.as<BaseReg>()));
421 }
422 }
423 else if (op.isImm()) {
424 if (arg.isReg()) {
425 BaseReg reg;
426 ASMJIT_PROPAGATE(moveImmToRegArg(call, arg, op.as<Imm>(), &reg));
427 call->_args[argIndex + argHi] = reg;
428 }
429 else {
430 ASMJIT_PROPAGATE(moveImmToStackArg(call, arg, op.as<Imm>()));
431 }
432 }
433 }
434 }
435
436 cc()->_setCursor(call);
437 if (fd.hasFlag(CallConv::kFlagCalleePopsStack))
438 ASMJIT_PROPAGATE(cc()->sub(cc()->zsp(), fd.argStackSize()));
439
440 for (uint32_t retIndex = 0; retIndex < retCount; retIndex++) {
441 const FuncValue& ret = fd.ret(retIndex);
442 const Operand& op = call->ret(retIndex);
443
444 if (op.isReg()) {
445 const Reg& reg = op.as<Reg>();
446 RAWorkReg* workReg;
447 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
448
449 if (ret.isReg()) {
450 if (ret.regType() == Reg::kTypeSt) {
451 if (workReg->group() != Reg::kGroupVec)
452 return DebugUtils::errored(kErrorInvalidAssignment);
453
454 Reg dst = Reg(workReg->signature(), workReg->virtId());
455 Mem mem;
456
457 uint32_t typeId = Type::baseOf(workReg->typeId());
458 if (ret.hasTypeId())
459 typeId = ret.typeId();
460
461 switch (typeId) {
462 case Type::kIdF32:
463 ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
464 mem.setSize(4);
465 ASMJIT_PROPAGATE(cc()->fstp(mem));
466 ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), dst.as<Xmm>(), mem));
467 break;
468
469 case Type::kIdF64:
470 ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
471 mem.setSize(8);
472 ASMJIT_PROPAGATE(cc()->fstp(mem));
473 ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), dst.as<Xmm>(), mem));
474 break;
475
476 default:
477 return DebugUtils::errored(kErrorInvalidAssignment);
478 }
479 }
480 else {
481 uint32_t regGroup = workReg->group();
482 uint32_t retGroup = Reg::groupOf(ret.regType());
483
484 if (regGroup != retGroup) {
485 // TODO:
486 ASMJIT_ASSERT(false);
487 }
488 }
489 }
490 }
491 }
492
493 // This block has function call(s).
494 _curBlock->addFlags(RABlock::kFlagHasFuncCalls);
495 _pass->func()->frame().addAttributes(FuncFrame::kAttrHasFuncCalls);
496 _pass->func()->frame().updateCallStackSize(fd.argStackSize());
497
498 return kErrorOk;
499 }
500
501 Error X86RACFGBuilder::onCall(FuncCallNode* call, RAInstBuilder& ib) noexcept {
502 uint32_t argCount = call->argCount();
503 uint32_t retCount = call->retCount();
504 const FuncDetail& fd = call->detail();
505
506 for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
507 for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) {
508 if (!fd.hasArg(argIndex + argHi))
509 continue;
510
511 const FuncValue& arg = fd.arg(argIndex + argHi);
512 const Operand& op = call->arg(argIndex + argHi);
513
514 if (op.isNone())
515 continue;
516
517 if (op.isReg()) {
518 const Reg& reg = op.as<Reg>();
519 RAWorkReg* workReg;
520 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
521
522 if (arg.isReg()) {
523 uint32_t regGroup = workReg->group();
524 uint32_t argGroup = Reg::groupOf(arg.regType());
525
526 if (regGroup == argGroup) {
527 ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
528 }
529 }
530 }
531 }
532 }
533
534 for (uint32_t retIndex = 0; retIndex < retCount; retIndex++) {
535 const FuncValue& ret = fd.ret(retIndex);
536 const Operand& op = call->ret(retIndex);
537
538 // Not handled here...
539 if (ret.regType() == Reg::kTypeSt)
540 continue;
541
542 if (op.isReg()) {
543 const Reg& reg = op.as<Reg>();
544 RAWorkReg* workReg;
545 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
546
547 if (ret.isReg()) {
548 uint32_t regGroup = workReg->group();
549 uint32_t retGroup = Reg::groupOf(ret.regType());
550
551 if (regGroup == retGroup) {
552 ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
553 }
554 }
555 else {
556 return DebugUtils::errored(kErrorInvalidAssignment);
557 }
558 }
559 }
560
561 // Setup clobbered registers.
562 ib._clobbered[0] = Support::lsbMask<uint32_t>(_pass->_physRegCount[0]) & ~fd.preservedRegs(0);
563 ib._clobbered[1] = Support::lsbMask<uint32_t>(_pass->_physRegCount[1]) & ~fd.preservedRegs(1);
564 ib._clobbered[2] = Support::lsbMask<uint32_t>(_pass->_physRegCount[2]) & ~fd.preservedRegs(2);
565 ib._clobbered[3] = Support::lsbMask<uint32_t>(_pass->_physRegCount[3]) & ~fd.preservedRegs(3);
566
567 return kErrorOk;
568 }
569
570 // ============================================================================
571 // [asmjit::x86::X86RACFGBuilder - MoveImmToRegArg]
572 // ============================================================================
573
574 Error X86RACFGBuilder::moveImmToRegArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
575 ASMJIT_UNUSED(call);
576 ASMJIT_ASSERT(arg.isReg());
577
578 Imm imm(imm_);
579 uint32_t rTypeId = Type::kIdU32;
580
581 switch (arg.typeId()) {
582 case Type::kIdI8: imm.signExtend8Bits(); goto MovU32;
583 case Type::kIdU8: imm.zeroExtend8Bits(); goto MovU32;
584 case Type::kIdI16: imm.signExtend16Bits(); goto MovU32;
585 case Type::kIdU16: imm.zeroExtend16Bits(); goto MovU32;
586
587 case Type::kIdI32:
588 case Type::kIdU32:
589 MovU32:
590 imm.zeroExtend32Bits();
591 break;
592
593 case Type::kIdI64:
594 case Type::kIdU64:
595 // Moving to GPD automatically zero extends in 64-bit mode.
596 if (imm.isUInt32()) {
597 imm.zeroExtend32Bits();
598 break;
599 }
600
601 rTypeId = Type::kIdU64;
602 break;
603
604 default:
605 return DebugUtils::errored(kErrorInvalidState);
606 }
607
608 ASMJIT_PROPAGATE(cc()->_newReg(*out, rTypeId, nullptr));
609 cc()->virtRegById(out->id())->setWeight(RAPass::kCallArgWeight);
610
611 return cc()->mov(out->as<x86::Gp>(), imm);
612 }
613
614 // ============================================================================
615 // [asmjit::x86::X86RACFGBuilder - MoveImmToStackArg]
616 // ============================================================================
617
618 Error X86RACFGBuilder::moveImmToStackArg(FuncCallNode* call, const FuncValue& arg, const Imm& imm_) noexcept {
619 ASMJIT_UNUSED(call);
620 ASMJIT_ASSERT(arg.isStack());
621
622 Mem mem = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
623 Imm imm[2];
624
625 mem.setSize(4);
626 imm[0] = imm_;
627 uint32_t nMovs = 0;
628
629 // One stack entry has the same size as the native register size. That means
630 // that if we want to move a 32-bit integer on the stack in 64-bit mode, we
631 // need to extend it to a 64-bit integer first. In 32-bit mode, pushing a
632 // 64-bit on stack is done in two steps by pushing low and high parts
633 // separately.
634 switch (arg.typeId()) {
635 case Type::kIdI8: imm[0].signExtend8Bits(); goto MovU32;
636 case Type::kIdU8: imm[0].zeroExtend8Bits(); goto MovU32;
637 case Type::kIdI16: imm[0].signExtend16Bits(); goto MovU32;
638 case Type::kIdU16: imm[0].zeroExtend16Bits(); goto MovU32;
639
640 case Type::kIdI32:
641 case Type::kIdU32:
642 case Type::kIdF32:
643 MovU32:
644 imm[0].zeroExtend32Bits();
645 nMovs = 1;
646 break;
647
648 case Type::kIdI64:
649 case Type::kIdU64:
650 case Type::kIdF64:
651 case Type::kIdMmx32:
652 case Type::kIdMmx64:
653 if (_is64Bit && imm[0].isInt32()) {
654 mem.setSize(8);
655 nMovs = 1;
656 break;
657 }
658
659 imm[1].setU32(imm[0].u32Hi());
660 imm[0].zeroExtend32Bits();
661 nMovs = 2;
662 break;
663
664 default:
665 return DebugUtils::errored(kErrorInvalidState);
666 }
667
668 for (uint32_t i = 0; i < nMovs; i++) {
669 ASMJIT_PROPAGATE(cc()->mov(mem, imm[i]));
670 mem.addOffsetLo32(int32_t(mem.size()));
671 }
672
673 return kErrorOk;
674 }
675
676 // ============================================================================
677 // [asmjit::x86::X86RACFGBuilder - MoveRegToStackArg]
678 // ============================================================================
679
680 Error X86RACFGBuilder::moveRegToStackArg(FuncCallNode* call, const FuncValue& arg, const BaseReg& reg) noexcept {
681 ASMJIT_UNUSED(call);
682 ASMJIT_ASSERT(arg.isStack());
683
684 Mem mem = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
685 Reg r0, r1;
686
687 VirtReg* vr = cc()->virtRegById(reg.id());
688 uint32_t gpSize = cc()->gpSize();
689 uint32_t instId = 0;
690
691 uint32_t dstTypeId = arg.typeId();
692 uint32_t srcTypeId = vr->typeId();
693
694 switch (dstTypeId) {
695 case Type::kIdI64:
696 case Type::kIdU64:
697 // Extend BYTE->QWORD (GP).
698 if (Type::isGp8(srcTypeId)) {
699 r1.setRegT<Reg::kTypeGpbLo>(reg.id());
700
701 instId = (dstTypeId == Type::kIdI64 && srcTypeId == Type::kIdI8) ? Inst::kIdMovsx : Inst::kIdMovzx;
702 goto ExtendMovGpXQ;
703 }
704
705 // Extend WORD->QWORD (GP).
706 if (Type::isGp16(srcTypeId)) {
707 r1.setRegT<Reg::kTypeGpw>(reg.id());
708
709 instId = (dstTypeId == Type::kIdI64 && srcTypeId == Type::kIdI16) ? Inst::kIdMovsx : Inst::kIdMovzx;
710 goto ExtendMovGpXQ;
711 }
712
713 // Extend DWORD->QWORD (GP).
714 if (Type::isGp32(srcTypeId)) {
715 r1.setRegT<Reg::kTypeGpd>(reg.id());
716
717 instId = Inst::kIdMovsxd;
718 if (dstTypeId == Type::kIdI64 && srcTypeId == Type::kIdI32)
719 goto ExtendMovGpXQ;
720 else
721 goto ZeroExtendGpDQ;
722 }
723
724 // Move QWORD (GP).
725 if (Type::isGp64(srcTypeId)) goto MovGpQ;
726 if (Type::isMmx(srcTypeId)) goto MovMmQ;
727 if (Type::isVec(srcTypeId)) goto MovXmmQ;
728 break;
729
730 case Type::kIdI32:
731 case Type::kIdU32:
732 case Type::kIdI16:
733 case Type::kIdU16:
734 // DWORD <- WORD (Zero|Sign Extend).
735 if (Type::isGp16(srcTypeId)) {
736 bool isDstSigned = dstTypeId == Type::kIdI16 || dstTypeId == Type::kIdI32;
737 bool isSrcSigned = srcTypeId == Type::kIdI8 || srcTypeId == Type::kIdI16;
738
739 r1.setRegT<Reg::kTypeGpw>(reg.id());
740 instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
741 goto ExtendMovGpD;
742 }
743
744 // DWORD <- BYTE (Zero|Sign Extend).
745 if (Type::isGp8(srcTypeId)) {
746 bool isDstSigned = dstTypeId == Type::kIdI16 || dstTypeId == Type::kIdI32;
747 bool isSrcSigned = srcTypeId == Type::kIdI8 || srcTypeId == Type::kIdI16;
748
749 r1.setRegT<Reg::kTypeGpbLo>(reg.id());
750 instId = isDstSigned && isSrcSigned ? Inst::kIdMovsx : Inst::kIdMovzx;
751 goto ExtendMovGpD;
752 }
753 ASMJIT_FALLTHROUGH;
754
755 case Type::kIdI8:
756 case Type::kIdU8:
757 if (Type::isInt(srcTypeId)) goto MovGpD;
758 if (Type::isMmx(srcTypeId)) goto MovMmD;
759 if (Type::isVec(srcTypeId)) goto MovXmmD;
760 break;
761
762 case Type::kIdMmx32:
763 case Type::kIdMmx64:
764 // Extend BYTE->QWORD (GP).
765 if (Type::isGp8(srcTypeId)) {
766 r1.setRegT<Reg::kTypeGpbLo>(reg.id());
767
768 instId = Inst::kIdMovzx;
769 goto ExtendMovGpXQ;
770 }
771
772 // Extend WORD->QWORD (GP).
773 if (Type::isGp16(srcTypeId)) {
774 r1.setRegT<Reg::kTypeGpw>(reg.id());
775
776 instId = Inst::kIdMovzx;
777 goto ExtendMovGpXQ;
778 }
779
780 if (Type::isGp32(srcTypeId)) goto ExtendMovGpDQ;
781 if (Type::isGp64(srcTypeId)) goto MovGpQ;
782 if (Type::isMmx(srcTypeId)) goto MovMmQ;
783 if (Type::isVec(srcTypeId)) goto MovXmmQ;
784 break;
785
786 case Type::kIdF32:
787 case Type::kIdF32x1:
788 if (Type::isVec(srcTypeId)) goto MovXmmD;
789 break;
790
791 case Type::kIdF64:
792 case Type::kIdF64x1:
793 if (Type::isVec(srcTypeId)) goto MovXmmQ;
794 break;
795
796 default:
797 // TODO: Vector types by stack.
798 break;
799 }
800 return DebugUtils::errored(kErrorInvalidState);
801
802 // Extend+Move Gp.
803 ExtendMovGpD:
804 mem.setSize(4);
805 r0.setRegT<Reg::kTypeGpd>(reg.id());
806
807 ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
808 ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0));
809 return kErrorOk;
810
811 ExtendMovGpXQ:
812 if (gpSize == 8) {
813 mem.setSize(8);
814 r0.setRegT<Reg::kTypeGpq>(reg.id());
815
816 ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
817 ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0));
818 }
819 else {
820 mem.setSize(4);
821 r0.setRegT<Reg::kTypeGpd>(reg.id());
822
823 ASMJIT_PROPAGATE(cc()->emit(instId, r0, r1));
824
825 ExtendMovGpDQ:
826 ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, mem, r0));
827 mem.addOffsetLo32(4);
828 ASMJIT_PROPAGATE(cc()->emit(Inst::kIdAnd, mem, 0));
829 }
830 return kErrorOk;
831
832 ZeroExtendGpDQ:
833 mem.setSize(4);
834 r0.setRegT<Reg::kTypeGpd>(reg.id());
835 goto ExtendMovGpDQ;
836
837 MovGpD:
838 mem.setSize(4);
839 r0.setRegT<Reg::kTypeGpd>(reg.id());
840 return cc()->emit(Inst::kIdMov, mem, r0);
841
842 MovGpQ:
843 mem.setSize(8);
844 r0.setRegT<Reg::kTypeGpq>(reg.id());
845 return cc()->emit(Inst::kIdMov, mem, r0);
846
847 MovMmD:
848 mem.setSize(4);
849 r0.setRegT<Reg::kTypeMm>(reg.id());
850 return cc()->emit(choose(Inst::kIdMovd, Inst::kIdVmovd), mem, r0);
851
852 MovMmQ:
853 mem.setSize(8);
854 r0.setRegT<Reg::kTypeMm>(reg.id());
855 return cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), mem, r0);
856
857 MovXmmD:
858 mem.setSize(4);
859 r0.setRegT<Reg::kTypeXmm>(reg.id());
860 return cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, r0);
861
862 MovXmmQ:
863 mem.setSize(8);
864 r0.setRegT<Reg::kTypeXmm>(reg.id());
865 return cc()->emit(choose(Inst::kIdMovlps, Inst::kIdVmovlps), mem, r0);
866 }
867
868 // ============================================================================
869 // [asmjit::x86::X86RACFGBuilder - OnReg]
870 // ============================================================================
871
872 Error X86RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
873 const FuncDetail& funcDetail = _pass->func()->detail();
874 const Operand* opArray = funcRet->operands();
875 uint32_t opCount = funcRet->opCount();
876
877 cc()->_setCursor(funcRet->prev());
878
879 for (uint32_t i = 0; i < opCount; i++) {
880 const Operand& op = opArray[i];
881 const FuncValue& ret = funcDetail.ret(i);
882
883 if (!op.isReg())
884 continue;
885
886 if (ret.regType() == Reg::kTypeSt) {
887 const Reg& reg = op.as<Reg>();
888 uint32_t vIndex = Operand::virtIdToIndex(reg.id());
889
890 if (vIndex < Operand::kVirtIdCount) {
891 RAWorkReg* workReg;
892 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
893
894 if (workReg->group() != Reg::kGroupVec)
895 return DebugUtils::errored(kErrorInvalidAssignment);
896
897 Reg src = Reg(workReg->signature(), workReg->virtId());
898 Mem mem;
899
900 uint32_t typeId = Type::baseOf(workReg->typeId());
901 if (ret.hasTypeId())
902 typeId = ret.typeId();
903
904 switch (typeId) {
905 case Type::kIdF32:
906 ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 4, 4));
907 mem.setSize(4);
908 ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovss, Inst::kIdVmovss), mem, src.as<Xmm>()));
909 ASMJIT_PROPAGATE(cc()->fld(mem));
910 break;
911
912 case Type::kIdF64:
913 ASMJIT_PROPAGATE(_pass->useTemporaryMem(mem, 8, 4));
914 mem.setSize(8);
915 ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovsd, Inst::kIdVmovsd), mem, src.as<Xmm>()));
916 ASMJIT_PROPAGATE(cc()->fld(mem));
917 break;
918
919 default:
920 return DebugUtils::errored(kErrorInvalidAssignment);
921 }
922 }
923 }
924 }
925
926 return kErrorOk;
927 }
928
929 Error X86RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
930 const FuncDetail& funcDetail = _pass->func()->detail();
931 const Operand* opArray = funcRet->operands();
932 uint32_t opCount = funcRet->opCount();
933
934 for (uint32_t i = 0; i < opCount; i++) {
935 const Operand& op = opArray[i];
936 if (op.isNone()) continue;
937
938 const FuncValue& ret = funcDetail.ret(i);
939 if (ASMJIT_UNLIKELY(!ret.isReg()))
940 return DebugUtils::errored(kErrorInvalidAssignment);
941
942 // Not handled here...
943 if (ret.regType() == Reg::kTypeSt)
944 continue;
945
946 if (op.isReg()) {
947 // Register return value.
948 const Reg& reg = op.as<Reg>();
949 uint32_t vIndex = Operand::virtIdToIndex(reg.id());
950
951 if (vIndex < Operand::kVirtIdCount) {
952 RAWorkReg* workReg;
953 ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
954
955 uint32_t group = workReg->group();
956 uint32_t allocable = _pass->_availableRegs[group];
957 ASMJIT_PROPAGATE(ib.add(workReg, RATiedReg::kUse | RATiedReg::kRead, allocable, ret.regId(), 0, BaseReg::kIdBad, 0));
958 }
959 }
960 else {
961 return DebugUtils::errored(kErrorInvalidAssignment);
962 }
963 }
964
965 return kErrorOk;
966 }
967
968 // ============================================================================
969 // [asmjit::x86::X86RAPass - Construction / Destruction]
970 // ============================================================================
971
972 X86RAPass::X86RAPass() noexcept
973 : RAPass(),
974 _avxEnabled(false) {}
975 X86RAPass::~X86RAPass() noexcept {}
976
977 // ============================================================================
978 // [asmjit::x86::X86RAPass - OnInit / OnDone]
979 // ============================================================================
980
981 void X86RAPass::onInit() noexcept {
982 uint32_t archId = cc()->archId();
983 uint32_t baseRegCount = archId == ArchInfo::kIdX86 ? 8u : 16u;
984
985 _archRegsInfo = &opData.archRegs;
986 _archTraits[Reg::kGroupGp] |= RAArchTraits::kHasSwap;
987
988 _physRegCount.set(Reg::kGroupGp , baseRegCount);
989 _physRegCount.set(Reg::kGroupVec , baseRegCount);
990 _physRegCount.set(Reg::kGroupMm , 8);
991 _physRegCount.set(Reg::kGroupKReg, 8);
992 _buildPhysIndex();
993
994 _availableRegCount = _physRegCount;
995 _availableRegs[Reg::kGroupGp ] = Support::lsbMask<uint32_t>(_physRegCount.get(Reg::kGroupGp ));
996 _availableRegs[Reg::kGroupVec ] = Support::lsbMask<uint32_t>(_physRegCount.get(Reg::kGroupVec ));
997 _availableRegs[Reg::kGroupMm ] = Support::lsbMask<uint32_t>(_physRegCount.get(Reg::kGroupMm ));
998 _availableRegs[Reg::kGroupKReg] = Support::lsbMask<uint32_t>(_physRegCount.get(Reg::kGroupKReg));
999
1000 // The architecture specific setup makes implicitly all registers available. So
1001 // make unavailable all registers that are special and cannot be used in general.
1002 bool hasFP = _func->frame().hasPreservedFP();
1003
1004 makeUnavailable(Reg::kGroupGp, Gp::kIdSp); // ESP|RSP used as a stack-pointer (SP).
1005 if (hasFP) makeUnavailable(Reg::kGroupGp, Gp::kIdBp); // EBP|RBP used as a frame-pointer (FP).
1006
1007 _sp = cc()->zsp();
1008 _fp = cc()->zbp();
1009 _avxEnabled = _func->frame().isAvxEnabled();
1010 }
1011
1012 void X86RAPass::onDone() noexcept {}
1013
1014 // ============================================================================
1015 // [asmjit::x86::X86RAPass - BuildCFG]
1016 // ============================================================================
1017
1018 Error X86RAPass::buildCFG() noexcept {
1019 return X86RACFGBuilder(this).run();
1020 }
1021
1022 // ============================================================================
1023 // [asmjit::x86::X86RAPass - OnEmit]
1024 // ============================================================================
1025
1026 Error X86RAPass::onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
1027 RAWorkReg* wReg = workRegById(workId);
1028 BaseReg dst(wReg->info().signature(), dstPhysId);
1029 BaseReg src(wReg->info().signature(), srcPhysId);
1030
1031 const char* comment = nullptr;
1032
1033 #ifndef ASMJIT_NO_LOGGING
1034 if (_loggerFlags & FormatOptions::kFlagAnnotations) {
1035 _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
1036 comment = _tmpString.data();
1037 }
1038 #endif
1039
1040 return X86Internal::emitRegMove(cc()->as<Emitter>(), dst, src, wReg->typeId(), _avxEnabled, comment);
1041 }
1042
1043 Error X86RAPass::onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
1044 RAWorkReg* waReg = workRegById(aWorkId);
1045 RAWorkReg* wbReg = workRegById(bWorkId);
1046
1047 bool is64Bit = Support::max(waReg->typeId(), wbReg->typeId()) >= Type::kIdI64;
1048 uint32_t sign = is64Bit ? uint32_t(RegTraits<Reg::kTypeGpq>::kSignature)
1049 : uint32_t(RegTraits<Reg::kTypeGpd>::kSignature);
1050
1051 #ifndef ASMJIT_NO_LOGGING
1052 if (_loggerFlags & FormatOptions::kFlagAnnotations) {
1053 _tmpString.assignFormat("<SWAP> %s, %s", waReg->name(), wbReg->name());
1054 cc()->setInlineComment(_tmpString.data());
1055 }
1056 #endif
1057
1058 return cc()->emit(Inst::kIdXchg, Reg(sign, aPhysId), Reg(sign, bPhysId));
1059 }
1060
1061 Error X86RAPass::onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
1062 RAWorkReg* wReg = workRegById(workId);
1063 BaseReg dstReg(wReg->info().signature(), dstPhysId);
1064 BaseMem srcMem(workRegAsMem(wReg));
1065
1066 const char* comment = nullptr;
1067
1068 #ifndef ASMJIT_NO_LOGGING
1069 if (_loggerFlags & FormatOptions::kFlagAnnotations) {
1070 _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
1071 comment = _tmpString.data();
1072 }
1073 #endif
1074
1075 return X86Internal::emitRegMove(cc()->as<Emitter>(), dstReg, srcMem, wReg->typeId(), _avxEnabled, comment);
1076 }
1077
1078 Error X86RAPass::onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
1079 RAWorkReg* wReg = workRegById(workId);
1080 BaseMem dstMem(workRegAsMem(wReg));
1081 BaseReg srcReg(wReg->info().signature(), srcPhysId);
1082
1083 const char* comment = nullptr;
1084
1085 #ifndef ASMJIT_NO_LOGGING
1086 if (_loggerFlags & FormatOptions::kFlagAnnotations) {
1087 _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
1088 comment = _tmpString.data();
1089 }
1090 #endif
1091
1092 return X86Internal::emitRegMove(cc()->as<Emitter>(), dstMem, srcReg, wReg->typeId(), _avxEnabled, comment);
1093 }
1094
1095 Error X86RAPass::onEmitJump(const Label& label) noexcept {
1096 return cc()->jmp(label);
1097 }
1098
1099 Error X86RAPass::onEmitPreCall(FuncCallNode* call) noexcept {
1100 if (call->detail().hasVarArgs()) {
1101 uint32_t argCount = call->argCount();
1102 const FuncDetail& fd = call->detail();
1103
1104 switch (call->detail().callConv().id()) {
1105 case CallConv::kIdX86SysV64: {
1106 // AL register contains the number of arguments passed in XMM register(s).
1107 uint32_t n = 0;
1108 for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1109 for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) {
1110 if (!fd.hasArg(argIndex + argHi))
1111 continue;
1112
1113 const FuncValue& arg = fd.arg(argIndex + argHi);
1114 if (arg.isReg() && Reg::groupOf(arg.regType()) == Reg::kGroupVec)
1115 n++;
1116 }
1117 }
1118
1119 if (!n)
1120 ASMJIT_PROPAGATE(cc()->xor_(eax, eax));
1121 else
1122 ASMJIT_PROPAGATE(cc()->mov(eax, n));
1123 break;
1124 }
1125
1126 case CallConv::kIdX86Win64: {
1127 // Each double-precision argument passed in XMM must be also passed in GP.
1128 for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
1129 for (uint32_t argHi = 0; argHi <= kFuncArgHi; argHi += kFuncArgHi) {
1130 if (!fd.hasArg(argIndex + argHi))
1131 continue;
1132
1133 const FuncValue& arg = fd.arg(argIndex + argHi);
1134 if (arg.isReg() && Reg::groupOf(arg.regType()) == Reg::kGroupVec) {
1135 Gp dst = gpq(fd.callConv().passedOrder(Reg::kGroupGp)[argIndex]);
1136 Xmm src = xmm(arg.regId());
1137 ASMJIT_PROPAGATE(cc()->emit(choose(Inst::kIdMovq, Inst::kIdVmovq), dst, src));
1138 }
1139 }
1140 }
1141 break;
1142 }
1143 }
1144 }
1145
1146 return kErrorOk;
1147 }
1148
1149 ASMJIT_END_SUB_NAMESPACE
1150
1151 #endif // ASMJIT_BUILD_X86 && !ASMJIT_NO_COMPILER
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_X86RAPASS_P_H
7 #define _ASMJIT_X86_X86RAPASS_P_H
8
9 #include "../core/api-config.h"
10 #ifndef ASMJIT_NO_COMPILER
11
12 #include "../core/compiler.h"
13 #include "../core/rabuilders_p.h"
14 #include "../core/rapass_p.h"
15 #include "../x86/x86assembler.h"
16 #include "../x86/x86compiler.h"
17
18 ASMJIT_BEGIN_SUB_NAMESPACE(x86)
19
20 //! \cond INTERNAL
21
22 //! \defgroup asmjit_x86_ra X86 RA
23 //! \ingroup asmjit_x86
24 //!
25 //! \brief X86/X64 register allocation.
26
27 //! \addtogroup asmjit_x86_ra
28 //! \{
29
30 // ============================================================================
31 // [asmjit::X86RAPass]
32 // ============================================================================
33
34 //! X86 register allocation pass.
35 //!
36 //! Takes care of generating function prologs and epilogs, and also performs
37 //! register allocation.
38 class X86RAPass : public RAPass {
39 public:
40 ASMJIT_NONCOPYABLE(X86RAPass)
41 typedef RAPass Base;
42
43 bool _avxEnabled;
44
45 // --------------------------------------------------------------------------
46 // [Construction / Destruction]
47 // --------------------------------------------------------------------------
48
49 X86RAPass() noexcept;
50 virtual ~X86RAPass() noexcept;
51
52 // --------------------------------------------------------------------------
53 // [Accessors]
54 // --------------------------------------------------------------------------
55
56 //! Returns the compiler casted to `x86::Compiler`.
57 inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
58
59 // --------------------------------------------------------------------------
60 // [Utilities]
61 // --------------------------------------------------------------------------
62
63 inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept {
64 return _avxEnabled ? avxInstId : sseInstId;
65 }
66
67 // --------------------------------------------------------------------------
68 // [OnInit / OnDone]
69 // --------------------------------------------------------------------------
70
71 void onInit() noexcept override;
72 void onDone() noexcept override;
73
74 // --------------------------------------------------------------------------
75 // [CFG]
76 // --------------------------------------------------------------------------
77
78 Error buildCFG() noexcept override;
79
80 // --------------------------------------------------------------------------
81 // [Emit]
82 // --------------------------------------------------------------------------
83
84 Error onEmitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
85 Error onEmitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
86
87 Error onEmitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
88 Error onEmitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
89
90 Error onEmitJump(const Label& label) noexcept override;
91 Error onEmitPreCall(FuncCallNode* node) noexcept override;
92 };
93
94 //! \}
95 //! \endcond
96
97 ASMJIT_END_SUB_NAMESPACE
98
99 #endif // !ASMJIT_NO_COMPILER
100 #endif // _ASMJIT_X86_X86RAPASS_P_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_X86_H
7 #define _ASMJIT_X86_H
8
9 //! \defgroup asmjit_x86 X86
10 //!
11 //! \brief X86/X64 Backend.
12
13 #include "./core.h"
14
15 #include "./x86/x86assembler.h"
16 #include "./x86/x86builder.h"
17 #include "./x86/x86compiler.h"
18 #include "./x86/x86emitter.h"
19 #include "./x86/x86features.h"
20 #include "./x86/x86globals.h"
21 #include "./x86/x86instdb.h"
22 #include "./x86/x86operand.h"
23
24 #endif // _ASMJIT_X86_H
0 #include "../src/asmjit/asmjit.h"
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include <stdio.h>
7 #include <string.h>
8
9 #include "./asmjit.h"
10 #include "./asmjit_test_misc.h"
11 #include "./asmjit_test_opcode.h"
12
13 using namespace asmjit;
14
15 // ============================================================================
16 // [Configuration]
17 // ============================================================================
18
19 static constexpr uint32_t kNumRepeats = 25;
20 static constexpr uint32_t kNumIterations = 1000;
21
22 // ============================================================================
23 // [BenchUtils]
24 // ============================================================================
25
26 namespace BenchUtils {
27 class Performance {
28 public:
29 inline Performance() noexcept { reset(); }
30
31 inline void reset() noexcept {
32 tick = 0u;
33 best = 0xFFFFFFFFu;
34 }
35
36 inline uint32_t start() noexcept { return (tick = now()); }
37 inline uint32_t diff() const noexcept { return now() - tick; }
38
39 inline uint32_t end() noexcept {
40 tick = diff();
41 if (best > tick)
42 best = tick;
43 return tick;
44 }
45
46 static inline uint32_t now() noexcept {
47 return OSUtils::getTickCount();
48 }
49
50 uint32_t tick;
51 uint32_t best;
52 };
53
54 static double mbps(uint32_t time, uint64_t outputSize) noexcept {
55 if (!time) return 0.0;
56
57 double bytesTotal = double(outputSize);
58 return (bytesTotal * 1000) / (double(time) * 1024 * 1024);
59 }
60
61 template<typename EmitterT, typename FuncT>
62 static void bench(CodeHolder& code, uint32_t archId, const char* testName, const FuncT& func) noexcept {
63 EmitterT emitter;
64
65 const char* archName =
66 archId == ArchInfo::kIdX86 ? "X86" :
67 archId == ArchInfo::kIdX64 ? "X64" : "???";
68
69 const char* emitterName =
70 emitter.isAssembler() ? "Assembler" :
71 emitter.isCompiler() ? "Compiler" :
72 emitter.isBuilder() ? "Builder" : "Unknown";
73
74 Performance perf;
75 uint64_t codeSize = 0;
76
77 CodeInfo codeInfo(archId);
78 codeInfo.setCdeclCallConv(archId == ArchInfo::kIdX86 ? CallConv::kIdX86CDecl : CallConv::kIdX86SysV64);
79
80 for (uint32_t r = 0; r < kNumRepeats; r++) {
81 perf.start();
82 codeSize = 0;
83 for (uint32_t i = 0; i < kNumIterations; i++) {
84 code.init(codeInfo);
85 code.attach(&emitter);
86
87 func(emitter);
88 codeSize += code.codeSize();
89
90 code.reset();
91 }
92 perf.end();
93 }
94
95 printf("[%s] %-9s %-8s | Time:%6u [ms] | ", archName, emitterName, testName, perf.best);
96 if (codeSize)
97 printf("Speed: %7.3f [MB/s]", mbps(perf.best, codeSize));
98 else
99 printf("Speed: N/A");
100 printf("\n");
101 }
102 }
103
104 // ============================================================================
105 // [Main]
106 // ============================================================================
107
108 #ifdef ASMJIT_BUILD_X86
109 static void benchX86(uint32_t archId) noexcept {
110 CodeHolder code;
111
112 BenchUtils::bench<x86::Assembler>(code, archId, "[raw]", [](x86::Assembler& a) {
113 asmtest::generateOpcodes(a.as<x86::Emitter>());
114 });
115
116 BenchUtils::bench<x86::Builder>(code, archId, "[raw]", [](x86::Builder& cb) {
117 asmtest::generateOpcodes(cb.as<x86::Emitter>());
118 });
119
120 BenchUtils::bench<x86::Builder>(code, archId, "[final]", [](x86::Builder& cb) {
121 asmtest::generateOpcodes(cb.as<x86::Emitter>());
122 cb.finalize();
123 });
124
125 BenchUtils::bench<x86::Compiler>(code, archId, "[raw]", [](x86::Compiler& cc) {
126 asmtest::generateAlphaBlend(cc);
127 });
128
129 BenchUtils::bench<x86::Compiler>(code, archId, "[final]", [](x86::Compiler& cc) {
130 asmtest::generateAlphaBlend(cc);
131 cc.finalize();
132 });
133 }
134 #endif
135
136 int main(int argc, char* argv[]) {
137 ASMJIT_UNUSED(argc);
138 ASMJIT_UNUSED(argv);
139
140 #ifdef ASMJIT_BUILD_X86
141 benchX86(ArchInfo::kIdX86);
142 benchX86(ArchInfo::kIdX64);
143 #endif
144
145 return 0;
146 }
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_TEST_MISC_H
7 #define _ASMJIT_TEST_MISC_H
8
9 #include "./asmjit.h"
10
11 namespace asmtest {
12
13 // Generate a typical alpha blend function using SSE2 instruction set. Used
14 // for benchmarking and also in test86. The generated code should be stable
15 // and fully functional.
16 static void generateAlphaBlend(asmjit::x86::Compiler& cc) {
17 using namespace asmjit;
18 using namespace asmjit::x86;
19
20 Gp dst = cc.newIntPtr("dst");
21 Gp src = cc.newIntPtr("src");
22
23 Gp i = cc.newIntPtr("i");
24 Gp j = cc.newIntPtr("j");
25 Gp t = cc.newIntPtr("t");
26
27 Xmm vzero = cc.newXmm("vzero");
28 Xmm v0080 = cc.newXmm("v0080");
29 Xmm v0101 = cc.newXmm("v0101");
30
31 Label L_SmallLoop = cc.newLabel();
32 Label L_SmallEnd = cc.newLabel();
33 Label L_LargeLoop = cc.newLabel();
34 Label L_LargeEnd = cc.newLabel();
35 Label L_DataPool = cc.newLabel();
36
37 cc.addFunc(FuncSignatureT<void, void*, const void*, size_t>(cc.codeInfo().cdeclCallConv()));
38
39 cc.setArg(0, dst);
40 cc.setArg(1, src);
41 cc.setArg(2, i);
42
43 // How many pixels have to be processed to make the loop aligned.
44 cc.lea(t, x86::ptr(L_DataPool));
45 cc.xorps(vzero, vzero);
46 cc.movaps(v0080, x86::ptr(t, 0));
47 cc.movaps(v0101, x86::ptr(t, 16));
48
49 cc.xor_(j, j);
50 cc.sub(j, dst);
51 cc.and_(j, 15);
52 cc.shr(j, 2);
53 cc.jz(L_SmallEnd);
54
55 cc.cmp(j, i);
56 cc.cmovg(j, i); // j = min(i, j).
57 cc.sub(i, j); // i -= j.
58
59 // Small loop.
60 cc.bind(L_SmallLoop);
61 {
62 Xmm x0 = cc.newXmm("x0");
63 Xmm y0 = cc.newXmm("y0");
64 Xmm a0 = cc.newXmm("a0");
65
66 cc.movd(y0, x86::ptr(src));
67 cc.movd(x0, x86::ptr(dst));
68
69 cc.pcmpeqb(a0, a0);
70 cc.pxor(a0, y0);
71 cc.psrlw(a0, 8);
72 cc.punpcklbw(x0, vzero);
73
74 cc.pshuflw(a0, a0, x86::Predicate::shuf(1, 1, 1, 1));
75 cc.punpcklbw(y0, vzero);
76
77 cc.pmullw(x0, a0);
78 cc.paddsw(x0, v0080);
79 cc.pmulhuw(x0, v0101);
80
81 cc.paddw(x0, y0);
82 cc.packuswb(x0, x0);
83
84 cc.movd(x86::ptr(dst), x0);
85
86 cc.add(dst, 4);
87 cc.add(src, 4);
88
89 cc.dec(j);
90 cc.jnz(L_SmallLoop);
91 }
92
93 // Second section, prepare for an aligned loop.
94 cc.bind(L_SmallEnd);
95
96 cc.test(i, i);
97 cc.mov(j, i);
98 cc.jz(cc.func()->exitLabel());
99
100 cc.and_(j, 3);
101 cc.shr(i, 2);
102 cc.jz(L_LargeEnd);
103
104 // Aligned loop.
105 cc.bind(L_LargeLoop);
106 {
107 Xmm x0 = cc.newXmm("x0");
108 Xmm x1 = cc.newXmm("x1");
109 Xmm y0 = cc.newXmm("y0");
110 Xmm a0 = cc.newXmm("a0");
111 Xmm a1 = cc.newXmm("a1");
112
113 cc.movups(y0, x86::ptr(src));
114 cc.movaps(x0, x86::ptr(dst));
115
116 cc.pcmpeqb(a0, a0);
117 cc.xorps(a0, y0);
118 cc.movaps(x1, x0);
119
120 cc.psrlw(a0, 8);
121 cc.punpcklbw(x0, vzero);
122
123 cc.movaps(a1, a0);
124 cc.punpcklwd(a0, a0);
125
126 cc.punpckhbw(x1, vzero);
127 cc.punpckhwd(a1, a1);
128
129 cc.pshufd(a0, a0, x86::Predicate::shuf(3, 3, 1, 1));
130 cc.pshufd(a1, a1, x86::Predicate::shuf(3, 3, 1, 1));
131
132 cc.pmullw(x0, a0);
133 cc.pmullw(x1, a1);
134
135 cc.paddsw(x0, v0080);
136 cc.paddsw(x1, v0080);
137
138 cc.pmulhuw(x0, v0101);
139 cc.pmulhuw(x1, v0101);
140
141 cc.add(src, 16);
142 cc.packuswb(x0, x1);
143
144 cc.paddw(x0, y0);
145 cc.movaps(x86::ptr(dst), x0);
146
147 cc.add(dst, 16);
148
149 cc.dec(i);
150 cc.jnz(L_LargeLoop);
151 }
152
153 cc.bind(L_LargeEnd);
154 cc.test(j, j);
155 cc.jnz(L_SmallLoop);
156
157 cc.endFunc();
158
159 // Data.
160 cc.align(kAlignData, 16);
161 cc.bind(L_DataPool);
162 cc.dxmm(Data128::fromI16(0x0080));
163 cc.dxmm(Data128::fromI16(0x0101));
164 }
165
166 } // {asmtest}
167
168 #endif // _ASMJIT_TEST_MISC_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 // This file is used to test opcodes generated by AsmJit. Output can be
7 // disassembled in your IDE or by your favorite disassembler. Instructions
8 // are grouped by category and then sorted alphabetically.
9
10 #include <stdio.h>
11 #include <stdlib.h>
12
13 #include "./asmjit.h"
14 #include "./asmjit_test_opcode.h"
15
16 using namespace asmjit;
17
18 struct OpcodeDumpInfo {
19 uint32_t archId;
20 bool useRex1;
21 bool useRex2;
22 };
23
24 static const char* archIdToString(uint32_t archId) {
25 switch (archId) {
26 case ArchInfo::kIdNone: return "None";
27 case ArchInfo::kIdX86 : return "X86";
28 case ArchInfo::kIdX64 : return "X64";
29 case ArchInfo::kIdA32 : return "A32";
30 case ArchInfo::kIdA64 : return "A64";
31
32 default:
33 return "<unknown>";
34 }
35 }
36
37 struct TestErrorHandler : public ErrorHandler {
38 virtual void handleError(Error err, const char* message, BaseEmitter* origin) {
39 (void)origin;
40 printf("ERROR 0x%08X: %s\n", err, message);
41 }
42 };
43
44 typedef void (*VoidFunc)(void);
45
46 int main(int argc, char* argv[]) {
47 ASMJIT_UNUSED(argc);
48 ASMJIT_UNUSED(argv);
49
50 TestErrorHandler eh;
51
52 OpcodeDumpInfo infoList[] = {
53 { ArchInfo::kIdX86, false, false },
54 { ArchInfo::kIdX64, false, false },
55 { ArchInfo::kIdX64, false, true },
56 { ArchInfo::kIdX64, true , false },
57 { ArchInfo::kIdX64, true , true }
58 };
59
60 for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(infoList); i++) {
61 const OpcodeDumpInfo& info = infoList[i];
62
63 printf("Opcodes [ARCH=%s REX1=%s REX2=%s]\n",
64 archIdToString(info.archId),
65 info.useRex1 ? "true" : "false",
66 info.useRex2 ? "true" : "false");
67
68 CodeHolder code;
69 code.init(CodeInfo(info.archId));
70 code.setErrorHandler(&eh);
71
72 #ifndef ASMJIT_NO_LOGGING
73 FileLogger logger(stdout);
74 logger.addFlags(FormatOptions::kFlagMachineCode);
75 code.setLogger(&logger);
76 #endif
77
78 x86::Assembler a(&code);
79 asmtest::generateOpcodes(a.as<x86::Emitter>(), info.useRex1, info.useRex2);
80
81 // If this is the host architecture the code generated can be executed
82 // for debugging purposes (the first instruction is ret anyway).
83 if (code.archId() == ArchInfo::kIdHost) {
84 JitRuntime runtime;
85 VoidFunc p;
86
87 Error err = runtime.add(&p, &code);
88 if (err == kErrorOk) p();
89 }
90 }
91
92 return 0;
93 }
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #ifndef _ASMJIT_TEST_OPCODE_H
7 #define _ASMJIT_TEST_OPCODE_H
8
9 #include "./asmjit.h"
10
11 namespace asmtest {
12
13 // Generate all instructions asmjit can emit.
14 static void generateOpcodes(asmjit::x86::Emitter* e, bool useRex1 = false, bool useRex2 = false) {
15 using namespace asmjit;
16 using namespace asmjit::x86;
17
18 bool isX64 = e->is64Bit();
19
20 // Prevent a crash when the generated function is called to see the disassembly.
21 e->ret();
22
23 // All instructions use the following register that can be changed to see if
24 // `x86::Assembler` can properly encode all possible combinations. If the given
25 // `useRexRegs` argument is true the `A` version will in most cases contain
26 // a register having index 8 or greater to force REX prefix.
27 Gp gLoA = useRex1 ? r8b : al;
28 Gp gLoB = useRex2 ? r9b : bl;
29
30 Gp gHiA = ah;
31 Gp gHiB = bh;
32
33 Gp gwA = useRex1 ? r8w : ax;
34 Gp gwB = useRex2 ? r9w : bx;
35
36 Gp gdA = useRex1 ? r8d : eax;
37 Gp gdB = useRex2 ? r9d : ebx;
38 Gp gdC = useRex2 ? r10d : ecx;
39
40 Gp gzA = useRex1 ? r8 : e->zax();
41 Gp gzB = useRex2 ? r9 : e->zbx();
42 Gp gzC = useRex2 ? r10 : e->zcx();
43 Gp gzD = useRex2 ? r11 : e->zdx();
44
45 KReg kA = k1;
46 KReg kB = k2;
47 KReg kC = k3;
48
49 Mem anyptr_gpA = ptr(gzA);
50 Mem anyptr_gpB = ptr(gzB);
51 Mem anyptr_gpC = ptr(gzC);
52 Mem anyptr_gpD = ptr(gzD);
53
54 Mem intptr_gpA = e->intptr_ptr(gzA);
55 Mem intptr_gpB = e->intptr_ptr(gzB);
56
57 St stA = st0;
58 St stB = st7;
59
60 Mm mmA = mm0;
61 Mm mmB = mm1;
62
63 Xmm xmmA = useRex1 ? xmm8 : xmm0;
64 Xmm xmmB = useRex2 ? xmm9 : xmm1;
65 Xmm xmmC = useRex2 ? xmm10 : xmm2;
66 Xmm xmmD = useRex2 ? xmm11 : xmm3;
67
68 Ymm ymmA = useRex1 ? ymm8 : ymm0;
69 Ymm ymmB = useRex2 ? ymm9 : ymm1;
70 Ymm ymmC = useRex2 ? ymm10 : ymm2;
71 Ymm ymmD = useRex2 ? ymm11 : ymm3;
72
73 Zmm zmmA = useRex1 ? zmm8 : zmm0;
74 Zmm zmmB = useRex2 ? zmm9 : zmm1;
75 Zmm zmmC = useRex2 ? zmm10 : zmm2;
76 Zmm zmmD = useRex2 ? zmm11 : zmm3;
77
78 Mem vx_ptr = ptr(gzB, xmmB);
79 Mem vy_ptr = ptr(gzB, ymmB);
80 Mem vz_ptr = ptr(gzB, zmmB);
81
82 Label L;
83
84 // Base.
85 e->adc(gLoA, 1);
86 e->adc(gLoB, 1);
87 e->adc(gHiA, 1);
88 e->adc(gHiB, 1);
89 e->adc(gwA, 1);
90 e->adc(gwB, 1);
91 e->adc(gdA, 1);
92 e->adc(gdB, 1);
93 e->adc(gzA, 1);
94 e->adc(gzA, gzB);
95 e->adc(gzA, intptr_gpB);
96 e->adc(intptr_gpA, 1);
97 e->adc(intptr_gpA, gzB);
98 e->add(gLoA, 1);
99 e->add(gLoB, 1);
100 e->add(gHiA, 1);
101 e->add(gHiB, 1);
102 e->add(gwA, 1);
103 e->add(gwB, 1);
104 e->add(gdA, 1);
105 e->add(gdB, 1);
106 e->add(gzA, 1);
107 e->add(gzA, gzB);
108 e->add(gzA, intptr_gpB);
109 e->add(intptr_gpA, 1);
110 e->add(intptr_gpA, gzB);
111 e->and_(gLoA, 1);
112 e->and_(gLoB, 1);
113 e->and_(gHiA, 1);
114 e->and_(gHiB, 1);
115 e->and_(gwA, 1);
116 e->and_(gwB, 1);
117 e->and_(gdA, 1);
118 e->and_(gdB, 1);
119 e->and_(gzA, 1);
120 e->and_(gzA, gzB);
121 e->and_(gzA, intptr_gpB);
122 e->and_(intptr_gpA, 1);
123 e->and_(intptr_gpA, gzB);
124 e->bswap(gzA);
125 e->bt(gdA, 1);
126 e->bt(gzA, 1);
127 e->bt(gdA, gdB);
128 e->bt(gzA, gzB);
129 e->bt(intptr_gpA, 1);
130 e->bt(anyptr_gpA, gdB);
131 e->bt(intptr_gpA, gzB);
132 e->btc(gdA, 1);
133 e->btc(gzA, 1);
134 e->btc(gdA, gdB);
135 e->btc(gzA, gzB);
136 e->btc(intptr_gpA, 1);
137 e->btc(anyptr_gpA, gdB);
138 e->btc(intptr_gpA, gzB);
139 e->btr(gdA, 1);
140 e->btr(gzA, 1);
141 e->btr(gdA, gdB);
142 e->btr(gzA, gzB);
143 e->btr(intptr_gpA, 1);
144 e->btr(anyptr_gpA, gdB);
145 e->btr(intptr_gpA, gzB);
146 e->bts(gdA, 1);
147 e->bts(gzA, 1);
148 e->bts(gdA, gdB);
149 e->bts(gzA, gzB);
150 e->bts(intptr_gpA, 1);
151 e->bts(anyptr_gpA, gdB);
152 e->bts(intptr_gpA, gzB);
153 e->call(gzA);
154 e->call(intptr_gpA);
155 e->cbw(); // Implicit AX <- Sign Extend AL.
156 e->cbw(ax); // Explicit AX <- Sign Extend AL.
157 e->cdq(); // Implicit EDX:EAX <- Sign Extend EAX.
158 e->cdq(edx, eax); // Explicit EDX:EAX <- Sign Extend EAX.
159 if (isX64) e->cdqe(); // Implicit RAX <- Sign Extend EAX.
160 if (isX64) e->cdqe(eax); // Explicit RAX <- Sign Extend EAX.
161 e->cwd(); // Implicit DX:AX <- Sign Extend AX.
162 e->cwd(dx, ax); // Explicit DX:AX <- Sign Extend AX.
163 e->cwde(); // Implicit EAX <- Sign Extend AX.
164 e->cwde(eax); // Explicit EAX <- Sign Extend AX.
165 if (isX64) e->cqo(); // Implicit RDX:RAX <- Sign Extend RAX.
166 if (isX64) e->cqo(rdx, rax); // Explicit RDX:RAX <- Sign Extend RAX.
167 e->clc();
168 e->cld();
169 e->cmc();
170 e->cmp(gLoA, 1);
171 e->cmp(gLoB, 1);
172 e->cmp(gHiA, 1);
173 e->cmp(gHiB, 1);
174 e->cmp(gwA, 1);
175 e->cmp(gwB, 1);
176 e->cmp(gdA, 1);
177 e->cmp(gdB, 1);
178 e->cmp(gzA, 1);
179 e->cmp(gLoA, gLoB);
180 e->cmp(gHiA, gHiB);
181 e->cmp(gwA, gwB);
182 e->cmp(gdA, gdB);
183 e->cmp(gzA, gzB);
184 e->cmp(gdA, anyptr_gpB);
185 e->cmp(gzA, intptr_gpB);
186 e->cmp(intptr_gpA, 1);
187 e->cmp(anyptr_gpA, gdB);
188 e->cmp(intptr_gpA, gzB);
189 e->cmpxchg(gdA, gdB); // Implicit regA, regB, <EAX>
190 e->cmpxchg(gzA, gzB); // Implicit regA, regB, <ZAX>
191 e->cmpxchg(gdA, gdB, eax); // Explicit regA, regB, <EAX>
192 e->cmpxchg(gzA, gzB, e->zax()); // Explicit regA, regB, <ZAX>
193 e->cmpxchg(anyptr_gpA, gdB); // Implicit mem , regB, <EAX>
194 e->cmpxchg(anyptr_gpA, gzB); // Implicit mem , regB, <ZAX>
195 e->cmpxchg(anyptr_gpA, gdB, eax); // Explicit mem , regB, <EAX>
196 e->cmpxchg(anyptr_gpA, gzB, e->zax()); // Explicit mem , regB, <ZAX>
197 e->cmpxchg8b(anyptr_gpA); // Implicit mem , <EDX>, <EAX>, <ECX>, <EBX>
198 e->cmpxchg8b(anyptr_gpA,
199 x86::edx, x86::eax,
200 x86::ecx, x86::ebx); // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
201 if (isX64) e->cmpxchg16b(anyptr_gpA); // Implicit mem , <RDX>, <RAX>, <RCX>, <RBX>
202 if (isX64) e->cmpxchg16b(anyptr_gpA,
203 x86::rdx, x86::rax,
204 x86::rcx, x86::rbx); // Explicit mem , <EDX>, <EAX>, <ECX>, <EBX>
205 e->cpuid(); // Implicit <EAX>, <EBX>, <ECX>, <EDX>
206 e->cpuid(eax, ebx, ecx, edx); // Explicit <EAX>, <EBX>, <ECX>, <EDX>
207 e->crc32(gdA, byte_ptr(gzB));
208 e->crc32(gdA, word_ptr(gzB));
209 e->crc32(gdA, dword_ptr(gzB));
210 if (isX64) e->crc32(gdA, qword_ptr(gzB));
211 if (isX64) e->crc32(gzA, qword_ptr(gzB));
212 e->dec(gLoA);
213 e->dec(gHiA);
214 e->dec(gwA);
215 e->dec(gdA);
216 e->dec(gzA);
217 e->dec(intptr_gpA);
218 e->inc(gLoA);
219 e->inc(gwA);
220 e->inc(gdA);
221 e->inc(gzA);
222 e->inc(intptr_gpA);
223 e->int_(13);
224 e->int3();
225 e->into();
226 e->lea(gzA, intptr_gpB);
227 e->mov(gLoA, 1);
228 e->mov(gHiA, 1);
229 e->mov(gwA, 1);
230 e->mov(gdA, 1);
231 e->mov(gzA, 1);
232 e->mov(gLoA, gLoB);
233 e->mov(gHiA, gHiB);
234 e->mov(gwA, gwB);
235 e->mov(gdA, gdB);
236 e->mov(gzA, gzB);
237 e->mov(gLoA, anyptr_gpB);
238 e->mov(gwA, anyptr_gpB);
239 e->mov(gdA, anyptr_gpB);
240 e->mov(gzA, intptr_gpB);
241 e->mov(anyptr_gpA, gLoB);
242 e->mov(anyptr_gpA, gwB);
243 e->mov(anyptr_gpA, gdB);
244 e->mov(intptr_gpA, 1);
245 e->mov(intptr_gpA, gzB);
246 e->movsx(gzA, gLoB);
247 e->movsx(gzA, byte_ptr(gzB));
248 e->movzx(gzA, gLoB);
249 e->movzx(gzA, byte_ptr(gzB));
250 e->movbe(gzA, anyptr_gpB);
251 e->movbe(anyptr_gpA, gzB);
252 e->neg(gzA);
253 e->neg(intptr_gpA);
254 e->nop();
255 e->not_(gzA);
256 e->not_(intptr_gpA);
257 e->or_(gLoA, 1);
258 e->or_(gLoB, 1);
259 e->or_(gHiA, 1);
260 e->or_(gHiB, 1);
261 e->or_(gwA, 1);
262 e->or_(gwB, 1);
263 e->or_(gdA, 1);
264 e->or_(gdB, 1);
265 e->or_(gzA, 1);
266 e->or_(gzA, gzB);
267 e->or_(gzA, intptr_gpB);
268 e->or_(intptr_gpA, 1);
269 e->or_(intptr_gpA, gzB);
270 e->pop(gzA);
271 e->pop(intptr_gpA);
272 if (!isX64) e->popa();
273 if (!isX64) e->popad();
274 e->popf();
275 if (!isX64) e->popfd();
276 if ( isX64) e->popfq();
277 e->push(gzA);
278 e->push(intptr_gpA);
279 e->push(0);
280 if (!isX64) e->pusha();
281 if (!isX64) e->pushad();
282 e->pushf();
283 if (!isX64) e->pushfd();
284 if ( isX64) e->pushfq();
285 e->rcl(gdA, 0);
286 e->rcl(gzA, 0);
287 e->rcl(gdA, 1);
288 e->rcl(gzA, 1);
289 e->rcl(gdA, cl);
290 e->rcl(gzA, cl);
291 e->rcl(intptr_gpA, 0);
292 e->rcl(intptr_gpA, 1);
293 e->rcl(intptr_gpA, cl);
294 e->rcr(gdA, 0);
295 e->rcr(gzA, 0);
296 e->rcr(gdA, 1);
297 e->rcr(gzA, 1);
298 e->rcr(gdA, cl);
299 e->rcr(gzA, cl);
300 e->rcr(intptr_gpA, 0);
301 e->rcr(intptr_gpA, 1);
302 e->rcr(intptr_gpA, cl);
303 e->rdtsc(); // Implicit <EDX:EAX>
304 e->rdtsc(edx, eax); // Explicit <EDX:EAX>
305 e->rdtscp(); // Implicit <EDX:EAX>, <ECX>
306 e->rdtscp(edx, eax, ecx); // Implicit <EDX:EAX>, <ECX>
307 e->ret();
308 e->ret(0);
309 e->rol(gdA, 0);
310 e->rol(gzA, 0);
311 e->rol(gdA, 1);
312 e->rol(gzA, 1);
313 e->rol(gdA, cl);
314 e->rol(gzA, cl);
315 e->rol(intptr_gpA, 0);
316 e->rol(intptr_gpA, 1);
317 e->rol(intptr_gpA, cl);
318 e->ror(gdA, 0);
319 e->ror(gzA, 0);
320 e->ror(gdA, 1);
321 e->ror(gzA, 1);
322 e->ror(gdA, cl);
323 e->ror(gzA, cl);
324 e->ror(intptr_gpA, 0);
325 e->ror(intptr_gpA, 1);
326 e->ror(intptr_gpA, cl);
327 e->sbb(gLoA, 1);
328 e->sbb(gLoB, 1);
329 e->sbb(gHiA, 1);
330 e->sbb(gHiB, 1);
331 e->sbb(gwA, 1);
332 e->sbb(gwB, 1);
333 e->sbb(gdA, 1);
334 e->sbb(gdB, 1);
335 e->sbb(gzA, 1);
336 e->sbb(gzA, gzB);
337 e->sbb(gzA, intptr_gpB);
338 e->sbb(intptr_gpA, 1);
339 e->sbb(intptr_gpA, gzB);
340 e->sal(gdA, 0);
341 e->sal(gzA, 0);
342 e->sal(gdA, 1);
343 e->sal(gzA, 1);
344 e->sal(gdA, cl);
345 e->sal(gzA, cl);
346 e->sal(intptr_gpA, 0);
347 e->sal(intptr_gpA, 1);
348 e->sal(intptr_gpA, cl);
349 e->sar(gdA, 0);
350 e->sar(gzA, 0);
351 e->sar(gdA, 1);
352 e->sar(gzA, 1);
353 e->sar(gdA, cl);
354 e->sar(gzA, cl);
355 e->sar(intptr_gpA, 0);
356 e->sar(intptr_gpA, 1);
357 e->sar(intptr_gpA, cl);
358 e->shl(gdA, 0);
359 e->shl(gzA, 0);
360 e->shl(gdA, 1);
361 e->shl(gzA, 1);
362 e->shl(gdA, cl);
363 e->shl(gzA, cl);
364 e->shl(intptr_gpA, 0);
365 e->shl(intptr_gpA, 1);
366 e->shl(intptr_gpA, cl);
367 e->shr(gdA, 0);
368 e->shr(gzA, 0);
369 e->shr(gdA, 1);
370 e->shr(gzA, 1);
371 e->shr(gdA, cl);
372 e->shr(gzA, cl);
373 e->shr(intptr_gpA, 0);
374 e->shr(intptr_gpA, 1);
375 e->shr(intptr_gpA, cl);
376 e->shld(gdA, gdB, 0);
377 e->shld(gzA, gzB, 0);
378 e->shld(gdA, gdB, cl);
379 e->shld(gzA, gzB, cl);
380 e->shld(anyptr_gpA, gdB, 0);
381 e->shld(intptr_gpA, gzB, 0);
382 e->shld(anyptr_gpA, gdB, cl);
383 e->shld(intptr_gpA, gzB, cl);
384 e->shrd(gdA, gdB, 0);
385 e->shrd(gzA, gzB, 0);
386 e->shrd(gdA, gdB, cl);
387 e->shrd(gzA, gzB, cl);
388 e->shrd(anyptr_gpA, gdB, 0);
389 e->shrd(intptr_gpA, gzB, 0);
390 e->shrd(anyptr_gpA, gdB, cl);
391 e->shrd(intptr_gpA, gzB, cl);
392 e->stc();
393 e->std();
394 e->sti();
395 e->sub(gLoA, 1);
396 e->sub(gLoB, 1);
397 e->sub(gHiA, 1);
398 e->sub(gHiB, 1);
399 e->sub(gwA, 1);
400 e->sub(gwB, 1);
401 e->sub(gdA, 1);
402 e->sub(gdB, 1);
403 e->sub(gzA, 1);
404 e->sub(gzA, gzB);
405 e->sub(gzA, intptr_gpB);
406 e->sub(intptr_gpA, 1);
407 e->sub(intptr_gpA, gzB);
408 e->swapgs();
409 e->test(gzA, 1);
410 e->test(gzA, gzB);
411 e->test(intptr_gpA, 1);
412 e->test(intptr_gpA, gzB);
413 e->ud2();
414 e->xadd(gzA, gzB);
415 e->xadd(intptr_gpA, gzB);
416 e->xchg(gzA, gzB);
417 e->xchg(intptr_gpA, gzB);
418 e->xchg(gzA, intptr_gpB);
419 e->xor_(gLoA, 1);
420 e->xor_(gLoB, 1);
421 e->xor_(gHiA, 1);
422 e->xor_(gHiB, 1);
423 e->xor_(gwA, 1);
424 e->xor_(gwB, 1);
425 e->xor_(gdA, 1);
426 e->xor_(gdB, 1);
427 e->xor_(gzA, 1);
428 e->xor_(gzA, gzB);
429 e->xor_(gzA, intptr_gpB);
430 e->xor_(intptr_gpA, 1);
431 e->xor_(intptr_gpA, gzB);
432
433 // Special case - div|mul.
434 e->div(cl); // Implicit AH:AL <- AX * r8
435 e->div(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
436 e->div(ax, cl); // Explicit AH:AL <- AX * r8
437 e->div(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
438
439 e->div(cx); // Implicit DX:AX <- DX:AX * r16
440 e->div(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
441 e->div(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
442 e->div(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
443
444 e->div(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
445 e->div(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
446 e->div(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
447 e->div(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
448
449 if (isX64) e->div(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
450 if (isX64) e->div(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
451 if (isX64) e->div(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
452 if (isX64) e->div(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
453
454 e->idiv(cl); // Implicit AH:AL <- AX * r8
455 e->idiv(byte_ptr(gzA)); // Implicit AH:AL <- AX * m8
456 e->idiv(ax, cl); // Explicit AH:AL <- AX * r8
457 e->idiv(ax, anyptr_gpA); // Explicit AH:AL <- AX * m8
458
459 e->idiv(cx); // Implicit DX:AX <- DX:AX * r16
460 e->idiv(word_ptr(gzA)); // Implicit DX:AX <- DX:AX * m16
461 e->idiv(dx, ax, cx); // Explicit DX:AX <- DX:AX * r16
462 e->idiv(dx, ax, anyptr_gpA); // Explicit DX:AX <- DX:AX * m16
463
464 e->idiv(ecx); // Implicit EDX:EAX <- EDX:EAX * r32
465 e->idiv(dword_ptr(gzA)); // Implicit EDX:EAX <- EDX:EAX * m32
466 e->idiv(edx, eax, ecx); // Explicit EDX:EAX <- EDX:EAX * r32
467 e->idiv(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EDX:EAX * m32
468
469 if (isX64) e->idiv(rcx); // Implicit RDX|RAX <- RDX:RAX * r64
470 if (isX64) e->idiv(qword_ptr(gzA)); // Implicit RDX|RAX <- RDX:RAX * m64
471 if (isX64) e->idiv(rdx, rax, rcx); // Explicit RDX|RAX <- RDX:RAX * r64
472 if (isX64) e->idiv(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RDX:RAX * m64
473
474 e->mul(cl); // Implicit AX <- AL * r8
475 e->mul(byte_ptr(gzA)); // Implicit AX <- AL * m8
476 e->mul(ax, cl); // Explicit AX <- AL * r8
477 e->mul(ax, anyptr_gpA); // Explicit AX <- AL * m8
478
479 e->mul(cx); // Implicit DX:AX <- AX * r16
480 e->mul(word_ptr(gzA)); // Implicit DX:AX <- AX * m16
481 e->mul(dx, ax, cx); // Explicit DX:AX <- AX * r16
482 e->mul(dx, ax, anyptr_gpA); // Explicit DX:AX <- AX * m16
483
484 e->mul(ecx); // Implicit EDX:EAX <- EAX * r32
485 e->mul(dword_ptr(gzA)); // Implicit EDX:EAX <- EAX * m32
486 e->mul(edx, eax, ecx); // Explicit EDX:EAX <- EAX * r32
487 e->mul(edx, eax, anyptr_gpA); // Explicit EDX:EAX <- EAX * m32
488
489 if (isX64) e->mul(rcx); // Implicit RDX|RAX <- RAX * r64
490 if (isX64) e->mul(qword_ptr(gzA)); // Implicit RDX|RAX <- RAX * m64
491 if (isX64) e->mul(rdx, rax, rcx); // Explicit RDX|RAX <- RAX * r64
492 if (isX64) e->mul(rdx, rax, anyptr_gpA); // Explicit RDX|RAX <- RAX * m64
493
494 e->imul(gdA);
495 e->imul(gzA);
496 e->imul(intptr_gpA);
497 e->imul(gdA, 1);
498 e->imul(gzA, 1);
499 e->imul(gdA, gdB);
500 e->imul(gzA, gzB);
501 e->imul(gdA, gdB, 1);
502 e->imul(gzA, gzB, 1);
503 e->imul(gdA, anyptr_gpB);
504 e->imul(gzA, intptr_gpB);
505 e->imul(gdA, anyptr_gpB, 1);
506 e->imul(gzA, intptr_gpB, 1);
507
508 // Special case - zero-extend 32-bit immediate instead of sign-extend:
509 if (isX64) e->mov(gzA, uint32_t(0xFEEDFEED));
510 if (isX64) e->and_(gzA, uint32_t(0xFEEDFEED));
511
512 // Special case - mov with absolute 32-bit address.
513 e->mov(al , ptr(0x01020304u));
514 e->mov(ax , ptr(0x01020304u));
515 e->mov(eax, ptr(0x01020304u));
516 e->mov(ptr(0x01020304u), al );
517 e->mov(ptr(0x01020304u), ax );
518 e->mov(ptr(0x01020304u), eax);
519
520 // Special case - mov with absolute 64-bit address.
521 if (isX64) e->mov(al , ptr(0x0102030405060708u));
522 if (isX64) e->mov(ax , ptr(0x0102030405060708u));
523 if (isX64) e->mov(eax, ptr(0x0102030405060708u));
524 if (isX64) e->mov(rax, ptr(0x0102030405060708u));
525 if (isX64) e->mov(ptr(0x0102030405060708u), al );
526 if (isX64) e->mov(ptr(0x0102030405060708u), ax );
527 if (isX64) e->mov(ptr(0x0102030405060708u), eax);
528 if (isX64) e->mov(ptr(0x0102030405060708u), rax);
529
530 // Control registers.
531 e->nop();
532
533 e->mov(gzA, cr0);
534 e->mov(cr0, gzA);
535 if (isX64) e->mov(gzA, cr8);
536 if (isX64) e->mov(cr8, gzA);
537
538 // Debug registers.
539 e->nop();
540
541 e->mov(gzA, dr0);
542 e->mov(dr0, gzA);
543
544 // Segment registers.
545 e->nop();
546
547 if (!isX64) e->mov(es, ax);
548 if (!isX64) e->mov(es, bx);
549 if (!isX64) e->mov(ax, es);
550 if (!isX64) e->mov(bx, es);
551
552 if (!isX64) e->mov(cs, ax);
553 if (!isX64) e->mov(cs, bx);
554 if (!isX64) e->mov(ax, cs);
555 if (!isX64) e->mov(bx, cs);
556
557 if (!isX64) e->mov(ss, ax);
558 if (!isX64) e->mov(ss, bx);
559 if (!isX64) e->mov(ax, ss);
560 if (!isX64) e->mov(bx, ss);
561
562 if (!isX64) e->mov(ds, ax);
563 if (!isX64) e->mov(ds, bx);
564 if (!isX64) e->mov(ax, ds);
565 if (!isX64) e->mov(bx, ds);
566
567 e->mov(fs, ax);
568 e->mov(fs, bx);
569 e->mov(ax, fs);
570 e->mov(bx, fs);
571
572 e->mov(gs, ax);
573 e->mov(gs, bx);
574 e->mov(ax, gs);
575 e->mov(bx, gs);
576
577 // Instructions using REP prefix.
578 e->nop();
579
580 e->in(al, 0);
581 e->in(al, dx);
582 e->in(ax, 0);
583 e->in(ax, dx);
584 e->in(eax, 0);
585 e->in(eax, dx);
586 e->rep().ins(byte_ptr(e->zdi()), dx);
587 e->rep().ins(word_ptr(e->zdi()), dx);
588 e->rep().ins(dword_ptr(e->zdi()), dx);
589
590 e->out(imm(0), al);
591 e->out(dx, al);
592 e->out(imm(0), ax);
593 e->out(dx, ax);
594 e->out(imm(0), eax);
595 e->out(dx, eax);
596 e->rep().outs(dx, byte_ptr(e->zsi()));
597 e->rep().outs(dx, word_ptr(e->zsi()));
598 e->rep().outs(dx, dword_ptr(e->zsi()));
599
600 e->lodsb();
601 e->lodsd();
602 e->lodsw();
603 e->rep().lodsb();
604 e->rep().lodsd();
605 e->rep().lodsw();
606 if (isX64) e->rep().lodsq();
607
608 e->movsb();
609 e->movsd();
610 e->movsw();
611 e->rep().movsb();
612 e->rep().movsd();
613 e->rep().movsw();
614 if (isX64) e->rep().movsq();
615
616 e->stosb();
617 e->stosd();
618 e->stosw();
619 e->rep().stosb();
620 e->rep().stosd();
621 e->rep().stosw();
622 if (isX64) e->rep().stosq();
623
624 e->cmpsb();
625 e->cmpsd();
626 e->cmpsw();
627 e->repz().cmpsb();
628 e->repz().cmpsd();
629 e->repz().cmpsw();
630 if (isX64) e->repz().cmpsq();
631 e->repnz().cmpsb();
632 e->repnz().cmpsd();
633 e->repnz().cmpsw();
634 if (isX64) e->repnz().cmpsq();
635
636 e->scasb();
637 e->scasd();
638 e->scasw();
639 e->repz().scasb();
640 e->repz().scasd();
641 e->repz().scasw();
642 if (isX64) e->repz().scasq();
643 e->repnz().scasb();
644 e->repnz().scasd();
645 e->repnz().scasw();
646 if (isX64) e->repnz().scasq();
647
648 // Label...Jcc/Jecxz/Jmp.
649 e->nop();
650
651 L = e->newLabel();
652 e->bind(L);
653 e->ja(L);
654 e->jae(L);
655 e->jb(L);
656 e->jbe(L);
657 e->jc(L);
658 e->je(L);
659 e->jg(L);
660 e->jge(L);
661 e->jl(L);
662 e->jle(L);
663 e->jna(L);
664 e->jnae(L);
665 e->jnb(L);
666 e->jnbe(L);
667 e->jnc(L);
668 e->jne(L);
669 e->jng(L);
670 e->jnge(L);
671 e->jnl(L);
672 e->jnle(L);
673 e->jno(L);
674 e->jnp(L);
675 e->jns(L);
676 e->jnz(L);
677 e->jo(L);
678 e->jp(L);
679 e->jpe(L);
680 e->jpo(L);
681 e->js(L);
682 e->jz(L);
683 e->jecxz(ecx, L);
684 e->jmp(L);
685
686 // Jcc/Jecxz/Jmp...Label.
687 e->nop();
688
689 L = e->newLabel();
690 e->ja(L);
691 e->jae(L);
692 e->jb(L);
693 e->jbe(L);
694 e->jc(L);
695 e->je(L);
696 e->jg(L);
697 e->jge(L);
698 e->jl(L);
699 e->jle(L);
700 e->jna(L);
701 e->jnae(L);
702 e->jnb(L);
703 e->jnbe(L);
704 e->jnc(L);
705 e->jne(L);
706 e->jng(L);
707 e->jnge(L);
708 e->jnl(L);
709 e->jnle(L);
710 e->jno(L);
711 e->jnp(L);
712 e->jns(L);
713 e->jnz(L);
714 e->jo(L);
715 e->jp(L);
716 e->jpe(L);
717 e->jpo(L);
718 e->js(L);
719 e->jz(L);
720 e->jecxz(ecx, L);
721 e->jmp(L);
722 e->bind(L);
723
724 // FPU.
725 e->nop();
726
727 e->f2xm1();
728 e->fabs();
729 e->fadd(stA, stB);
730 e->fadd(stB, stA);
731 e->fadd(dword_ptr(gzA));
732 e->fadd(qword_ptr(gzA));
733 e->faddp(stB);
734 e->faddp();
735 e->fbld(dword_ptr(gzA));
736 e->fbstp(dword_ptr(gzA));
737 e->fchs();
738 e->fclex();
739 e->fcom(stB);
740 e->fcom();
741 e->fcom(dword_ptr(gzA));
742 e->fcom(qword_ptr(gzA));
743 e->fcomp(stB);
744 e->fcomp();
745 e->fcomp(dword_ptr(gzA));
746 e->fcomp(qword_ptr(gzA));
747 e->fcompp();
748 e->fcos();
749 e->fdecstp();
750 e->fdiv(stA, stB);
751 e->fdiv(stB, stA);
752 e->fdiv(dword_ptr(gzA));
753 e->fdiv(qword_ptr(gzA));
754 e->fdivp(stB);
755 e->fdivp();
756 e->fdivr(stA, stB);
757 e->fdivr(stB, stA);
758 e->fdivr(dword_ptr(gzA));
759 e->fdivr(qword_ptr(gzA));
760 e->fdivrp(stB);
761 e->fdivrp();
762 e->fiadd(dword_ptr(gzA));
763 e->ficom(word_ptr(gzA));
764 e->ficom(dword_ptr(gzA));
765 e->ficomp(word_ptr(gzA));
766 e->ficomp(dword_ptr(gzA));
767 e->fidiv(word_ptr(gzA));
768 e->fidiv(dword_ptr(gzA));
769 e->fidivr(word_ptr(gzA));
770 e->fidivr(dword_ptr(gzA));
771 e->fild(word_ptr(gzA));
772 e->fild(dword_ptr(gzA));
773 e->fild(qword_ptr(gzA));
774 e->fimul(word_ptr(gzA));
775 e->fimul(dword_ptr(gzA));
776 e->fincstp();
777 e->finit();
778 e->fninit();
779 e->fisub(word_ptr(gzA));
780 e->fisub(dword_ptr(gzA));
781 e->fisubr(word_ptr(gzA));
782 e->fisubr(dword_ptr(gzA));
783 e->fist(word_ptr(gzA));
784 e->fist(dword_ptr(gzA));
785 e->fistp(word_ptr(gzA));
786 e->fistp(dword_ptr(gzA));
787 e->fistp(qword_ptr(gzA));
788 e->fld(dword_ptr(gzA));
789 e->fld(qword_ptr(gzA));
790 e->fld(tword_ptr(gzA));
791 e->fld1();
792 e->fldl2t();
793 e->fldl2e();
794 e->fldpi();
795 e->fldlg2();
796 e->fldln2();
797 e->fldz();
798 e->fldcw(anyptr_gpA);
799 e->fldenv(anyptr_gpA);
800 e->fmul(stA, stB);
801 e->fmul(stB, stA);
802 e->fmul(dword_ptr(gzA));
803 e->fmul(qword_ptr(gzA));
804 e->fmulp(stB);
805 e->fmulp();
806 e->fnclex();
807 e->fnop();
808 e->fnsave(anyptr_gpA);
809 e->fnstenv(anyptr_gpA);
810 e->fnstcw(anyptr_gpA);
811 e->fpatan();
812 e->fprem();
813 e->fprem1();
814 e->fptan();
815 e->frndint();
816 e->frstor(anyptr_gpA);
817 e->fsave(anyptr_gpA);
818 e->fscale();
819 e->fsin();
820 e->fsincos();
821 e->fsqrt();
822 e->fst(dword_ptr(gzA));
823 e->fst(qword_ptr(gzA));
824 e->fstp(dword_ptr(gzA));
825 e->fstp(qword_ptr(gzA));
826 e->fstp(tword_ptr(gzA));
827 e->fstcw(anyptr_gpA);
828 e->fstenv(anyptr_gpA);
829 e->fsub(stA, stB);
830 e->fsub(stB, stA);
831 e->fsub(dword_ptr(gzA));
832 e->fsub(qword_ptr(gzA));
833 e->fsubp(stB);
834 e->fsubp();
835 e->fsubr(stA, stB);
836 e->fsubr(stB, stA);
837 e->fsubr(dword_ptr(gzA));
838 e->fsubr(qword_ptr(gzA));
839 e->fsubrp(stB);
840 e->fsubrp();
841 e->ftst();
842 e->fucom(stB);
843 e->fucom();
844 e->fucom(stB);
845 e->fucomi(stB);
846 e->fucomip(stB);
847 e->fucomp(stB);
848 e->fucompp();
849 e->fxam();
850 e->fxtract();
851 e->fyl2x();
852 e->fyl2xp1();
853
854 // LAHF/SAHF
855 e->lahf(); // Implicit <AH>
856 e->lahf(ah); // Explicit <AH>
857 e->sahf(); // Implicit <AH>
858 e->sahf(ah); // Explicit <AH>
859
860 // FXSR.
861 e->fxrstor(anyptr_gpA);
862 e->fxsave(anyptr_gpA);
863
864 // XSAVE.
865 e->nop();
866
867 e->xgetbv(); // Implicit <EDX:EAX>, <ECX>
868 e->xgetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
869
870 e->xsetbv(); // Implicit <EDX:EAX>, <ECX>
871 e->xsetbv(edx, eax, ecx); // Explicit <EDX:EAX>, <ECX>
872
873 e->xrstor(anyptr_gpA); // Implicit <EDX:EAX>
874 e->xrstors(anyptr_gpA); // Implicit <EDX:EAX>
875 e->xsave(anyptr_gpA); // Implicit <EDX:EAX>
876 e->xsavec(anyptr_gpA); // Implicit <EDX:EAX>
877 e->xsaveopt(anyptr_gpA); // Implicit <EDX:EAX>
878 e->xsaves(anyptr_gpA); // Implicit <EDX:EAX>
879
880 if (isX64) e->xrstor64(anyptr_gpA); // Implicit <EDX:EAX>
881 if (isX64) e->xrstors64(anyptr_gpA); // Implicit <EDX:EAX>
882 if (isX64) e->xsave64(anyptr_gpA); // Implicit <EDX:EAX>
883 if (isX64) e->xsavec64(anyptr_gpA); // Implicit <EDX:EAX>
884 if (isX64) e->xsaveopt64(anyptr_gpA); // Implicit <EDX:EAX>
885 if (isX64) e->xsaves64(anyptr_gpA); // Implicit <EDX:EAX>
886
887 // POPCNT.
888 e->nop();
889
890 e->popcnt(gdA, gdB);
891 e->popcnt(gzA, gzB);
892 e->popcnt(gdA, anyptr_gpB);
893 e->popcnt(gzA, anyptr_gpB);
894
895 // LZCNT.
896 e->nop();
897
898 e->lzcnt(gdA, gdB);
899 e->lzcnt(gzA, gzB);
900 e->lzcnt(gdA, anyptr_gpB);
901 e->lzcnt(gzA, anyptr_gpB);
902
903 // BMI.
904 e->nop();
905
906 e->andn(gdA, gdB, gdC);
907 e->andn(gzA, gzB, gzC);
908 e->andn(gdA, gdB, anyptr_gpC);
909 e->andn(gzA, gzB, anyptr_gpC);
910 e->bextr(gdA, gdB, gdC);
911 e->bextr(gzA, gzB, gzC);
912 e->bextr(gdA, anyptr_gpB, gdC);
913 e->bextr(gzA, anyptr_gpB, gzC);
914 e->blsi(gdA, gdB);
915 e->blsi(gzA, gzB);
916 e->blsi(gdA, anyptr_gpB);
917 e->blsi(gzA, anyptr_gpB);
918 e->blsmsk(gdA, gdB);
919 e->blsmsk(gzA, gzB);
920 e->blsmsk(gdA, anyptr_gpB);
921 e->blsmsk(gzA, anyptr_gpB);
922 e->blsr(gdA, gdB);
923 e->blsr(gzA, gzB);
924 e->blsr(gdA, anyptr_gpB);
925 e->blsr(gzA, anyptr_gpB);
926 e->tzcnt(gdA, gdB);
927 e->tzcnt(gzA, gzB);
928 e->tzcnt(gdA, anyptr_gpB);
929 e->tzcnt(gzA, anyptr_gpB);
930
931 // BMI2.
932 e->nop();
933
934 e->bzhi(gdA, gdB, gdC);
935 e->bzhi(gzA, gzB, gzC);
936 e->bzhi(gdA, anyptr_gpB, gdC);
937 e->bzhi(gzA, anyptr_gpB, gzC);
938 e->mulx(gdA, gdB, gdC); // Implicit gpA, gpB, gpC, <EDX>
939 e->mulx(gdA, gdB, gdC, edx); // Explicit gpA, gpB, gpC, <EDX>
940 e->mulx(gzA, gzB, gzC); // Implicit gpA, gpB, gpC, <EDX|RDX>
941 e->mulx(gzA, gzB, gzC, e->zdx()); // Explicit gpA, gpB, gpC, <EDX|RDX>
942 e->mulx(gdA, gdB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX>
943 e->mulx(gdA, gdB, anyptr_gpC, edx); // Explicit gpA, gpB, mem, <EDX>
944 e->mulx(gzA, gzB, anyptr_gpC); // Implicit gpA, gpB, mem, <EDX|RDX>
945 e->mulx(gzA, gzB, anyptr_gpC, e->zdx()); // Explicit gpA, gpB, mem, <EDX|RDX>
946 e->pdep(gdA, gdB, gdC);
947 e->pdep(gzA, gzB, gzC);
948 e->pdep(gdA, gdB, anyptr_gpC);
949 e->pdep(gzA, gzB, anyptr_gpC);
950 e->pext(gdA, gdB, gdC);
951 e->pext(gzA, gzB, gzC);
952 e->pext(gdA, gdB, anyptr_gpC);
953 e->pext(gzA, gzB, anyptr_gpC);
954 e->rorx(gdA, gdB, 0);
955 e->rorx(gzA, gzB, 0);
956 e->rorx(gdA, anyptr_gpB, 0);
957 e->rorx(gzA, anyptr_gpB, 0);
958 e->sarx(gdA, gdB, gdC);
959 e->sarx(gzA, gzB, gzC);
960 e->sarx(gdA, anyptr_gpB, gdC);
961 e->sarx(gzA, anyptr_gpB, gzC);
962 e->shlx(gdA, gdB, gdC);
963 e->shlx(gzA, gzB, gzC);
964 e->shlx(gdA, anyptr_gpB, gdC);
965 e->shlx(gzA, anyptr_gpB, gzC);
966 e->shrx(gdA, gdB, gdC);
967 e->shrx(gzA, gzB, gzC);
968 e->shrx(gdA, anyptr_gpB, gdC);
969 e->shrx(gzA, anyptr_gpB, gzC);
970
971 // ADX.
972 e->nop();
973
974 e->adcx(gdA, gdB);
975 e->adcx(gzA, gzB);
976 e->adcx(gdA, anyptr_gpB);
977 e->adcx(gzA, anyptr_gpB);
978 e->adox(gdA, gdB);
979 e->adox(gzA, gzB);
980 e->adox(gdA, anyptr_gpB);
981 e->adox(gzA, anyptr_gpB);
982
983 // TBM.
984 e->nop();
985
986 e->blcfill(gdA, gdB);
987 e->blcfill(gzA, gzB);
988 e->blcfill(gdA, anyptr_gpB);
989 e->blcfill(gzA, anyptr_gpB);
990
991 e->blci(gdA, gdB);
992 e->blci(gzA, gzB);
993 e->blci(gdA, anyptr_gpB);
994 e->blci(gzA, anyptr_gpB);
995
996 e->blcic(gdA, gdB);
997 e->blcic(gzA, gzB);
998 e->blcic(gdA, anyptr_gpB);
999 e->blcic(gzA, anyptr_gpB);
1000
1001 e->blcmsk(gdA, gdB);
1002 e->blcmsk(gzA, gzB);
1003 e->blcmsk(gdA, anyptr_gpB);
1004 e->blcmsk(gzA, anyptr_gpB);
1005
1006 e->blcs(gdA, gdB);
1007 e->blcs(gzA, gzB);
1008 e->blcs(gdA, anyptr_gpB);
1009 e->blcs(gzA, anyptr_gpB);
1010
1011 e->blsfill(gdA, gdB);
1012 e->blsfill(gzA, gzB);
1013 e->blsfill(gdA, anyptr_gpB);
1014 e->blsfill(gzA, anyptr_gpB);
1015
1016 e->blsic(gdA, gdB);
1017 e->blsic(gzA, gzB);
1018 e->blsic(gdA, anyptr_gpB);
1019 e->blsic(gzA, anyptr_gpB);
1020
1021 e->t1mskc(gdA, gdB);
1022 e->t1mskc(gzA, gzB);
1023 e->t1mskc(gdA, anyptr_gpB);
1024 e->t1mskc(gzA, anyptr_gpB);
1025
1026 e->tzmsk(gdA, gdB);
1027 e->tzmsk(gzA, gzB);
1028 e->tzmsk(gdA, anyptr_gpB);
1029 e->tzmsk(gzA, anyptr_gpB);
1030
1031 // CLFLUSH / CLFLUSH_OPT.
1032 e->nop();
1033 e->clflush(anyptr_gpA);
1034 e->clflushopt(anyptr_gpA);
1035
1036 // CLWB.
1037 e->nop();
1038 e->clwb(anyptr_gpA);
1039
1040 // CLZERO.
1041 e->nop();
1042 e->clzero(); // Implicit <ds:[EAX|RAX]>
1043 e->clzero(ptr(e->zax())); // Explicit <ds:[EAX|RAX]>
1044
1045 // MONITOR[X] / MWAIT[X].
1046 e->nop();
1047 e->monitor(); // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
1048 e->monitorx(); // Implicit <ds:[EAX|RAX]>, <ECX>, <EDX>
1049 e->mwait(); // Implicit <EAX>, <ECX>
1050 e->mwaitx(); // Implicit <EAX>, <ECX>, <EBX>
1051
1052 // PCOMMIT.
1053 e->nop();
1054 e->pcommit();
1055
1056 // PREFETCH / PREFETCHW / PREFETCHWT1.
1057 e->nop();
1058 e->prefetch(anyptr_gpA); // 3DNOW.
1059 e->prefetchnta(anyptr_gpA); // MMX+SSE.
1060 e->prefetcht0(anyptr_gpA); // MMX+SSE.
1061 e->prefetcht1(anyptr_gpA); // MMX+SSE.
1062 e->prefetcht2(anyptr_gpA); // MMX+SSE.
1063 e->prefetchw(anyptr_gpA); // PREFETCHW.
1064 e->prefetchwt1(anyptr_gpA); // PREFETCHWT1.
1065
1066 // RDRAND / RDSEED.
1067 e->nop();
1068
1069 e->rdrand(gdA);
1070 e->rdrand(gzA);
1071 e->rdseed(gdA);
1072 e->rdseed(gzA);
1073
1074 // MMX/MMX2.
1075 e->nop();
1076
1077 e->movd(anyptr_gpA, mmB);
1078 e->movd(gdA, mmB);
1079 e->movd(mmA, anyptr_gpB);
1080 e->movd(mmA, gdB);
1081 e->movq(mmA, mmB);
1082 e->movq(anyptr_gpA, mmB);
1083 e->movq(mmA, anyptr_gpB);
1084 e->packuswb(mmA, mmB);
1085 e->packuswb(mmA, anyptr_gpB);
1086 e->paddb(mmA, mmB);
1087 e->paddb(mmA, anyptr_gpB);
1088 e->paddw(mmA, mmB);
1089 e->paddw(mmA, anyptr_gpB);
1090 e->paddd(mmA, mmB);
1091 e->paddd(mmA, anyptr_gpB);
1092 e->paddsb(mmA, mmB);
1093 e->paddsb(mmA, anyptr_gpB);
1094 e->paddsw(mmA, mmB);
1095 e->paddsw(mmA, anyptr_gpB);
1096 e->paddusb(mmA, mmB);
1097 e->paddusb(mmA, anyptr_gpB);
1098 e->paddusw(mmA, mmB);
1099 e->paddusw(mmA, anyptr_gpB);
1100 e->pand(mmA, mmB);
1101 e->pand(mmA, anyptr_gpB);
1102 e->pandn(mmA, mmB);
1103 e->pandn(mmA, anyptr_gpB);
1104 e->pcmpeqb(mmA, mmB);
1105 e->pcmpeqb(mmA, anyptr_gpB);
1106 e->pcmpeqw(mmA, mmB);
1107 e->pcmpeqw(mmA, anyptr_gpB);
1108 e->pcmpeqd(mmA, mmB);
1109 e->pcmpeqd(mmA, anyptr_gpB);
1110 e->pcmpgtb(mmA, mmB);
1111 e->pcmpgtb(mmA, anyptr_gpB);
1112 e->pcmpgtw(mmA, mmB);
1113 e->pcmpgtw(mmA, anyptr_gpB);
1114 e->pcmpgtd(mmA, mmB);
1115 e->pcmpgtd(mmA, anyptr_gpB);
1116 e->pmulhw(mmA, mmB);
1117 e->pmulhw(mmA, anyptr_gpB);
1118 e->pmullw(mmA, mmB);
1119 e->pmullw(mmA, anyptr_gpB);
1120 e->por(mmA, mmB);
1121 e->por(mmA, anyptr_gpB);
1122 e->pmaddwd(mmA, mmB);
1123 e->pmaddwd(mmA, anyptr_gpB);
1124 e->pslld(mmA, mmB);
1125 e->pslld(mmA, anyptr_gpB);
1126 e->pslld(mmA, 0);
1127 e->psllq(mmA, mmB);
1128 e->psllq(mmA, anyptr_gpB);
1129 e->psllq(mmA, 0);
1130 e->psllw(mmA, mmB);
1131 e->psllw(mmA, anyptr_gpB);
1132 e->psllw(mmA, 0);
1133 e->psrad(mmA, mmB);
1134 e->psrad(mmA, anyptr_gpB);
1135 e->psrad(mmA, 0);
1136 e->psraw(mmA, mmB);
1137 e->psraw(mmA, anyptr_gpB);
1138 e->psraw(mmA, 0);
1139 e->psrld(mmA, mmB);
1140 e->psrld(mmA, anyptr_gpB);
1141 e->psrld(mmA, 0);
1142 e->psrlq(mmA, mmB);
1143 e->psrlq(mmA, anyptr_gpB);
1144 e->psrlq(mmA, 0);
1145 e->psrlw(mmA, mmB);
1146 e->psrlw(mmA, anyptr_gpB);
1147 e->psrlw(mmA, 0);
1148 e->psubb(mmA, mmB);
1149 e->psubb(mmA, anyptr_gpB);
1150 e->psubw(mmA, mmB);
1151 e->psubw(mmA, anyptr_gpB);
1152 e->psubd(mmA, mmB);
1153 e->psubd(mmA, anyptr_gpB);
1154 e->psubsb(mmA, mmB);
1155 e->psubsb(mmA, anyptr_gpB);
1156 e->psubsw(mmA, mmB);
1157 e->psubsw(mmA, anyptr_gpB);
1158 e->psubusb(mmA, mmB);
1159 e->psubusb(mmA, anyptr_gpB);
1160 e->psubusw(mmA, mmB);
1161 e->psubusw(mmA, anyptr_gpB);
1162 e->punpckhbw(mmA, mmB);
1163 e->punpckhbw(mmA, anyptr_gpB);
1164 e->punpckhwd(mmA, mmB);
1165 e->punpckhwd(mmA, anyptr_gpB);
1166 e->punpckhdq(mmA, mmB);
1167 e->punpckhdq(mmA, anyptr_gpB);
1168 e->punpcklbw(mmA, mmB);
1169 e->punpcklbw(mmA, anyptr_gpB);
1170 e->punpcklwd(mmA, mmB);
1171 e->punpcklwd(mmA, anyptr_gpB);
1172 e->punpckldq(mmA, mmB);
1173 e->punpckldq(mmA, anyptr_gpB);
1174 e->pxor(mmA, mmB);
1175 e->pxor(mmA, anyptr_gpB);
1176 e->emms();
1177
1178 // 3DNOW.
1179 e->nop();
1180
1181 e->pavgusb(mmA, mmB);
1182 e->pavgusb(mmA, anyptr_gpB);
1183 e->pf2id(mmA, mmB);
1184 e->pf2id(mmA, anyptr_gpB);
1185 e->pf2iw(mmA, mmB);
1186 e->pf2iw(mmA, anyptr_gpB);
1187 e->pfacc(mmA, mmB);
1188 e->pfacc(mmA, anyptr_gpB);
1189 e->pfadd(mmA, mmB);
1190 e->pfadd(mmA, anyptr_gpB);
1191 e->pfcmpeq(mmA, mmB);
1192 e->pfcmpeq(mmA, anyptr_gpB);
1193 e->pfcmpge(mmA, mmB);
1194 e->pfcmpge(mmA, anyptr_gpB);
1195 e->pfcmpgt(mmA, mmB);
1196 e->pfcmpgt(mmA, anyptr_gpB);
1197 e->pfmax(mmA, mmB);
1198 e->pfmax(mmA, anyptr_gpB);
1199 e->pfmin(mmA, mmB);
1200 e->pfmin(mmA, anyptr_gpB);
1201 e->pfmul(mmA, mmB);
1202 e->pfmul(mmA, anyptr_gpB);
1203 e->pfnacc(mmA, mmB);
1204 e->pfnacc(mmA, anyptr_gpB);
1205 e->pfpnacc(mmA, mmB);
1206 e->pfpnacc(mmA, anyptr_gpB);
1207 e->pfrcp(mmA, mmB);
1208 e->pfrcp(mmA, anyptr_gpB);
1209 e->pfrcpit1(mmA, mmB);
1210 e->pfrcpit1(mmA, anyptr_gpB);
1211 e->pfrcpit2(mmA, mmB);
1212 e->pfrcpit2(mmA, anyptr_gpB);
1213 e->pfrcpv(mmA, mmB);
1214 e->pfrcpv(mmA, anyptr_gpB);
1215 e->pfrsqit1(mmA, mmB);
1216 e->pfrsqit1(mmA, anyptr_gpB);
1217 e->pfrsqrt(mmA, mmB);
1218 e->pfrsqrt(mmA, anyptr_gpB);
1219 e->pfrsqrtv(mmA, mmB);
1220 e->pfrsqrtv(mmA, anyptr_gpB);
1221 e->pfsub(mmA, mmB);
1222 e->pfsub(mmA, anyptr_gpB);
1223 e->pfsubr(mmA, mmB);
1224 e->pfsubr(mmA, anyptr_gpB);
1225 e->pi2fd(mmA, mmB);
1226 e->pi2fd(mmA, anyptr_gpB);
1227 e->pi2fw(mmA, mmB);
1228 e->pi2fw(mmA, anyptr_gpB);
1229 e->pmulhrw(mmA, mmB);
1230 e->pmulhrw(mmA, anyptr_gpB);
1231 e->pswapd(mmA, mmB);
1232 e->pswapd(mmA, anyptr_gpB);
1233 e->femms();
1234
1235 // SSE.
1236 e->nop();
1237
1238 e->addps(xmmA, xmmB);
1239 e->addps(xmmA, anyptr_gpB);
1240 e->addss(xmmA, xmmB);
1241 e->addss(xmmA, anyptr_gpB);
1242 e->andnps(xmmA, xmmB);
1243 e->andnps(xmmA, anyptr_gpB);
1244 e->andps(xmmA, xmmB);
1245 e->andps(xmmA, anyptr_gpB);
1246 e->cmpps(xmmA, xmmB, 0);
1247 e->cmpps(xmmA, anyptr_gpB, 0);
1248 e->cmpss(xmmA, xmmB, 0);
1249 e->cmpss(xmmA, anyptr_gpB, 0);
1250 e->comiss(xmmA, xmmB);
1251 e->comiss(xmmA, anyptr_gpB);
1252 e->cvtpi2ps(xmmA, mmB);
1253 e->cvtpi2ps(xmmA, anyptr_gpB);
1254 e->cvtps2pi(mmA, xmmB);
1255 e->cvtps2pi(mmA, anyptr_gpB);
1256 e->cvtsi2ss(xmmA, gdB);
1257 e->cvtsi2ss(xmmA, gzB);
1258 e->cvtsi2ss(xmmA, anyptr_gpB);
1259 e->cvtss2si(gdA, xmmB);
1260 e->cvtss2si(gzA, xmmB);
1261 e->cvtss2si(gdA, anyptr_gpB);
1262 e->cvtss2si(gzA, anyptr_gpB);
1263 e->cvttps2pi(mmA, xmmB);
1264 e->cvttps2pi(mmA, anyptr_gpB);
1265 e->cvttss2si(gdA, xmmB);
1266 e->cvttss2si(gzA, xmmB);
1267 e->cvttss2si(gdA, anyptr_gpB);
1268 e->cvttss2si(gzA, anyptr_gpB);
1269 e->divps(xmmA, xmmB);
1270 e->divps(xmmA, anyptr_gpB);
1271 e->divss(xmmA, xmmB);
1272 e->divss(xmmA, anyptr_gpB);
1273 e->ldmxcsr(anyptr_gpA);
1274 e->maskmovq(mmA, mmB); // Implicit mmA, mmB, <ds:[EDI|RDI]>
1275 e->maskmovq(mmA, mmB, ptr(e->zdi())); // Explicit mmA, mmB, <ds:[EDI|RDI]>
1276 e->maxps(xmmA, xmmB);
1277 e->maxps(xmmA, anyptr_gpB);
1278 e->maxss(xmmA, xmmB);
1279 e->maxss(xmmA, anyptr_gpB);
1280 e->minps(xmmA, xmmB);
1281 e->minps(xmmA, anyptr_gpB);
1282 e->minss(xmmA, xmmB);
1283 e->minss(xmmA, anyptr_gpB);
1284 e->movaps(xmmA, xmmB);
1285 e->movaps(xmmA, anyptr_gpB);
1286 e->movaps(anyptr_gpA, xmmB);
1287 e->movd(anyptr_gpA, xmmB);
1288 e->movd(gdA, xmmB);
1289 e->movd(gzA, xmmB);
1290 e->movd(xmmA, anyptr_gpB);
1291 e->movd(xmmA, gdB);
1292 e->movd(xmmA, gzB);
1293 e->movq(mmA, mmB);
1294 e->movq(xmmA, xmmB);
1295 e->movq(anyptr_gpA, xmmB);
1296 e->movq(xmmA, anyptr_gpB);
1297 e->movntq(anyptr_gpA, mmB);
1298 e->movhlps(xmmA, xmmB);
1299 e->movhps(xmmA, anyptr_gpB);
1300 e->movhps(anyptr_gpA, xmmB);
1301 e->movlhps(xmmA, xmmB);
1302 e->movlps(xmmA, anyptr_gpB);
1303 e->movlps(anyptr_gpA, xmmB);
1304 e->movntps(anyptr_gpA, xmmB);
1305 e->movss(xmmA, anyptr_gpB);
1306 e->movss(anyptr_gpA, xmmB);
1307 e->movups(xmmA, xmmB);
1308 e->movups(xmmA, anyptr_gpB);
1309 e->movups(anyptr_gpA, xmmB);
1310 e->mulps(xmmA, xmmB);
1311 e->mulps(xmmA, anyptr_gpB);
1312 e->mulss(xmmA, xmmB);
1313 e->mulss(xmmA, anyptr_gpB);
1314 e->orps(xmmA, xmmB);
1315 e->orps(xmmA, anyptr_gpB);
1316 e->pavgb(mmA, mmB);
1317 e->pavgb(mmA, anyptr_gpB);
1318 e->pavgw(mmA, mmB);
1319 e->pavgw(mmA, anyptr_gpB);
1320 e->pextrw(gdA, mmB, 0);
1321 e->pextrw(gzA, mmB, 0);
1322 e->pinsrw(mmA, gdB, 0);
1323 e->pinsrw(mmA, gzB, 0);
1324 e->pinsrw(mmA, anyptr_gpB, 0);
1325 e->pmaxsw(mmA, mmB);
1326 e->pmaxsw(mmA, anyptr_gpB);
1327 e->pmaxub(mmA, mmB);
1328 e->pmaxub(mmA, anyptr_gpB);
1329 e->pminsw(mmA, mmB);
1330 e->pminsw(mmA, anyptr_gpB);
1331 e->pminub(mmA, mmB);
1332 e->pminub(mmA, anyptr_gpB);
1333 e->pmovmskb(gdA, mmB);
1334 e->pmovmskb(gzA, mmB);
1335 e->pmulhuw(mmA, mmB);
1336 e->pmulhuw(mmA, anyptr_gpB);
1337 e->psadbw(mmA, mmB);
1338 e->psadbw(mmA, anyptr_gpB);
1339 e->pshufw(mmA, mmB, 0);
1340 e->pshufw(mmA, anyptr_gpB, 0);
1341 e->rcpps(xmmA, xmmB);
1342 e->rcpps(xmmA, anyptr_gpB);
1343 e->rcpss(xmmA, xmmB);
1344 e->rcpss(xmmA, anyptr_gpB);
1345 e->psadbw(xmmA, xmmB);
1346 e->psadbw(xmmA, anyptr_gpB);
1347 e->rsqrtps(xmmA, xmmB);
1348 e->rsqrtps(xmmA, anyptr_gpB);
1349 e->rsqrtss(xmmA, xmmB);
1350 e->rsqrtss(xmmA, anyptr_gpB);
1351 e->sfence();
1352 e->shufps(xmmA, xmmB, 0);
1353 e->shufps(xmmA, anyptr_gpB, 0);
1354 e->sqrtps(xmmA, xmmB);
1355 e->sqrtps(xmmA, anyptr_gpB);
1356 e->sqrtss(xmmA, xmmB);
1357 e->sqrtss(xmmA, anyptr_gpB);
1358 e->stmxcsr(anyptr_gpA);
1359 e->subps(xmmA, xmmB);
1360 e->subps(xmmA, anyptr_gpB);
1361 e->subss(xmmA, xmmB);
1362 e->subss(xmmA, anyptr_gpB);
1363 e->ucomiss(xmmA, xmmB);
1364 e->ucomiss(xmmA, anyptr_gpB);
1365 e->unpckhps(xmmA, xmmB);
1366 e->unpckhps(xmmA, anyptr_gpB);
1367 e->unpcklps(xmmA, xmmB);
1368 e->unpcklps(xmmA, anyptr_gpB);
1369 e->xorps(xmmA, xmmB);
1370 e->xorps(xmmA, anyptr_gpB);
1371
1372 // SSE2.
1373 e->nop();
1374
1375 e->addpd(xmmA, xmmB);
1376 e->addpd(xmmA, anyptr_gpB);
1377 e->addsd(xmmA, xmmB);
1378 e->addsd(xmmA, anyptr_gpB);
1379 e->andnpd(xmmA, xmmB);
1380 e->andnpd(xmmA, anyptr_gpB);
1381 e->andpd(xmmA, xmmB);
1382 e->andpd(xmmA, anyptr_gpB);
1383 e->cmppd(xmmA, xmmB, 0);
1384 e->cmppd(xmmA, anyptr_gpB, 0);
1385 e->cmpsd(xmmA, xmmB, 0);
1386 e->cmpsd(xmmA, anyptr_gpB, 0);
1387 e->comisd(xmmA, xmmB);
1388 e->comisd(xmmA, anyptr_gpB);
1389 e->cvtdq2pd(xmmA, xmmB);
1390 e->cvtdq2pd(xmmA, anyptr_gpB);
1391 e->cvtdq2ps(xmmA, xmmB);
1392 e->cvtdq2ps(xmmA, anyptr_gpB);
1393 e->cvtpd2dq(xmmA, xmmB);
1394 e->cvtpd2dq(xmmA, anyptr_gpB);
1395 e->cvtpd2pi(mmA, xmmB);
1396 e->cvtpd2pi(mmA, anyptr_gpB);
1397 e->cvtpd2ps(xmmA, xmmB);
1398 e->cvtpd2ps(xmmA, anyptr_gpB);
1399 e->cvtpi2pd(xmmA, mmB);
1400 e->cvtpi2pd(xmmA, anyptr_gpB);
1401 e->cvtps2dq(xmmA, xmmB);
1402 e->cvtps2dq(xmmA, anyptr_gpB);
1403 e->cvtps2pd(xmmA, xmmB);
1404 e->cvtps2pd(xmmA, anyptr_gpB);
1405 e->cvtsd2si(gdA, xmmB);
1406 e->cvtsd2si(gzA, xmmB);
1407 e->cvtsd2si(gdA, anyptr_gpB);
1408 e->cvtsd2si(gzA, anyptr_gpB);
1409 e->cvtsd2ss(xmmA, xmmB);
1410 e->cvtsd2ss(xmmA, anyptr_gpB);
1411 e->cvtsi2sd(xmmA, gdB);
1412 e->cvtsi2sd(xmmA, gzB);
1413 e->cvtsi2sd(xmmA, anyptr_gpB);
1414 e->cvtss2sd(xmmA, xmmB);
1415 e->cvtss2sd(xmmA, anyptr_gpB);
1416 e->cvtss2si(gdA, xmmB);
1417 e->cvtss2si(gzA, xmmB);
1418 e->cvtss2si(gdA, anyptr_gpB);
1419 e->cvtss2si(gzA, anyptr_gpB);
1420 e->cvttpd2pi(mmA, xmmB);
1421 e->cvttpd2pi(mmA, anyptr_gpB);
1422 e->cvttpd2dq(xmmA, xmmB);
1423 e->cvttpd2dq(xmmA, anyptr_gpB);
1424 e->cvttps2dq(xmmA, xmmB);
1425 e->cvttps2dq(xmmA, anyptr_gpB);
1426 e->cvttsd2si(gdA, xmmB);
1427 e->cvttsd2si(gzA, xmmB);
1428 e->cvttsd2si(gdA, anyptr_gpB);
1429 e->cvttsd2si(gzA, anyptr_gpB);
1430 e->divpd(xmmA, xmmB);
1431 e->divpd(xmmA, anyptr_gpB);
1432 e->divsd(xmmA, xmmB);
1433 e->divsd(xmmA, anyptr_gpB);
1434 e->lfence();
1435 e->maskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
1436 e->maskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
1437 e->maxpd(xmmA, xmmB);
1438 e->maxpd(xmmA, anyptr_gpB);
1439 e->maxsd(xmmA, xmmB);
1440 e->maxsd(xmmA, anyptr_gpB);
1441 e->mfence();
1442 e->minpd(xmmA, xmmB);
1443 e->minpd(xmmA, anyptr_gpB);
1444 e->minsd(xmmA, xmmB);
1445 e->minsd(xmmA, anyptr_gpB);
1446 e->movdqa(xmmA, xmmB);
1447 e->movdqa(xmmA, anyptr_gpB);
1448 e->movdqa(anyptr_gpA, xmmB);
1449 e->movdqu(xmmA, xmmB);
1450 e->movdqu(xmmA, anyptr_gpB);
1451 e->movdqu(anyptr_gpA, xmmB);
1452 e->movmskps(gdA, xmmB);
1453 e->movmskps(gzA, xmmB);
1454 e->movmskpd(gdA, xmmB);
1455 e->movmskpd(gzA, xmmB);
1456 e->movsd(xmmA, xmmB);
1457 e->movsd(xmmA, anyptr_gpB);
1458 e->movsd(anyptr_gpA, xmmB);
1459 e->movapd(xmmA, anyptr_gpB);
1460 e->movapd(anyptr_gpA, xmmB);
1461 e->movdq2q(mmA, xmmB);
1462 e->movq2dq(xmmA, mmB);
1463 e->movhpd(xmmA, anyptr_gpB);
1464 e->movhpd(anyptr_gpA, xmmB);
1465 e->movlpd(xmmA, anyptr_gpB);
1466 e->movlpd(anyptr_gpA, xmmB);
1467 e->movntdq(anyptr_gpA, xmmB);
1468 e->movnti(anyptr_gpA, gdB);
1469 e->movnti(anyptr_gpA, gzB);
1470 e->movntpd(anyptr_gpA, xmmB);
1471 e->movupd(xmmA, anyptr_gpB);
1472 e->movupd(anyptr_gpA, xmmB);
1473 e->mulpd(xmmA, xmmB);
1474 e->mulpd(xmmA, anyptr_gpB);
1475 e->mulsd(xmmA, xmmB);
1476 e->mulsd(xmmA, anyptr_gpB);
1477 e->orpd(xmmA, xmmB);
1478 e->orpd(xmmA, anyptr_gpB);
1479 e->packsswb(xmmA, xmmB);
1480 e->packsswb(xmmA, anyptr_gpB);
1481 e->packssdw(xmmA, xmmB);
1482 e->packssdw(xmmA, anyptr_gpB);
1483 e->packuswb(xmmA, xmmB);
1484 e->packuswb(xmmA, anyptr_gpB);
1485 e->paddb(xmmA, xmmB);
1486 e->paddb(xmmA, anyptr_gpB);
1487 e->paddw(xmmA, xmmB);
1488 e->paddw(xmmA, anyptr_gpB);
1489 e->paddd(xmmA, xmmB);
1490 e->paddd(xmmA, anyptr_gpB);
1491 e->paddq(mmA, mmB);
1492 e->paddq(mmA, anyptr_gpB);
1493 e->paddq(xmmA, xmmB);
1494 e->paddq(xmmA, anyptr_gpB);
1495 e->paddsb(xmmA, xmmB);
1496 e->paddsb(xmmA, anyptr_gpB);
1497 e->paddsw(xmmA, xmmB);
1498 e->paddsw(xmmA, anyptr_gpB);
1499 e->paddusb(xmmA, xmmB);
1500 e->paddusb(xmmA, anyptr_gpB);
1501 e->paddusw(xmmA, xmmB);
1502 e->paddusw(xmmA, anyptr_gpB);
1503 e->pand(xmmA, xmmB);
1504 e->pand(xmmA, anyptr_gpB);
1505 e->pandn(xmmA, xmmB);
1506 e->pandn(xmmA, anyptr_gpB);
1507 e->pause();
1508 e->pavgb(xmmA, xmmB);
1509 e->pavgb(xmmA, anyptr_gpB);
1510 e->pavgw(xmmA, xmmB);
1511 e->pavgw(xmmA, anyptr_gpB);
1512 e->pcmpeqb(xmmA, xmmB);
1513 e->pcmpeqb(xmmA, anyptr_gpB);
1514 e->pcmpeqw(xmmA, xmmB);
1515 e->pcmpeqw(xmmA, anyptr_gpB);
1516 e->pcmpeqd(xmmA, xmmB);
1517 e->pcmpeqd(xmmA, anyptr_gpB);
1518 e->pcmpgtb(xmmA, xmmB);
1519 e->pcmpgtb(xmmA, anyptr_gpB);
1520 e->pcmpgtw(xmmA, xmmB);
1521 e->pcmpgtw(xmmA, anyptr_gpB);
1522 e->pcmpgtd(xmmA, xmmB);
1523 e->pcmpgtd(xmmA, anyptr_gpB);
1524 e->pmaxsw(xmmA, xmmB);
1525 e->pmaxsw(xmmA, anyptr_gpB);
1526 e->pmaxub(xmmA, xmmB);
1527 e->pmaxub(xmmA, anyptr_gpB);
1528 e->pminsw(xmmA, xmmB);
1529 e->pminsw(xmmA, anyptr_gpB);
1530 e->pminub(xmmA, xmmB);
1531 e->pminub(xmmA, anyptr_gpB);
1532 e->pmovmskb(gdA, xmmB);
1533 e->pmovmskb(gzA, xmmB);
1534 e->pmulhw(xmmA, xmmB);
1535 e->pmulhw(xmmA, anyptr_gpB);
1536 e->pmulhuw(xmmA, xmmB);
1537 e->pmulhuw(xmmA, anyptr_gpB);
1538 e->pmullw(xmmA, xmmB);
1539 e->pmullw(xmmA, anyptr_gpB);
1540 e->pmuludq(mmA, mmB);
1541 e->pmuludq(mmA, anyptr_gpB);
1542 e->pmuludq(xmmA, xmmB);
1543 e->pmuludq(xmmA, anyptr_gpB);
1544 e->por(xmmA, xmmB);
1545 e->por(xmmA, anyptr_gpB);
1546 e->pslld(xmmA, xmmB);
1547 e->pslld(xmmA, anyptr_gpB);
1548 e->pslld(xmmA, 0);
1549 e->psllq(xmmA, xmmB);
1550 e->psllq(xmmA, anyptr_gpB);
1551 e->psllq(xmmA, 0);
1552 e->psllw(xmmA, xmmB);
1553 e->psllw(xmmA, anyptr_gpB);
1554 e->psllw(xmmA, 0);
1555 e->pslldq(xmmA, 0);
1556 e->psrad(xmmA, xmmB);
1557 e->psrad(xmmA, anyptr_gpB);
1558 e->psrad(xmmA, 0);
1559 e->psraw(xmmA, xmmB);
1560 e->psraw(xmmA, anyptr_gpB);
1561 e->psraw(xmmA, 0);
1562 e->psubb(xmmA, xmmB);
1563 e->psubb(xmmA, anyptr_gpB);
1564 e->psubw(xmmA, xmmB);
1565 e->psubw(xmmA, anyptr_gpB);
1566 e->psubd(xmmA, xmmB);
1567 e->psubd(xmmA, anyptr_gpB);
1568 e->psubq(mmA, mmB);
1569 e->psubq(mmA, anyptr_gpB);
1570 e->psubq(xmmA, xmmB);
1571 e->psubq(xmmA, anyptr_gpB);
1572 e->pmaddwd(xmmA, xmmB);
1573 e->pmaddwd(xmmA, anyptr_gpB);
1574 e->pshufd(xmmA, xmmB, 0);
1575 e->pshufd(xmmA, anyptr_gpB, 0);
1576 e->pshufhw(xmmA, xmmB, 0);
1577 e->pshufhw(xmmA, anyptr_gpB, 0);
1578 e->pshuflw(xmmA, xmmB, 0);
1579 e->pshuflw(xmmA, anyptr_gpB, 0);
1580 e->psrld(xmmA, xmmB);
1581 e->psrld(xmmA, anyptr_gpB);
1582 e->psrld(xmmA, 0);
1583 e->psrlq(xmmA, xmmB);
1584 e->psrlq(xmmA, anyptr_gpB);
1585 e->psrlq(xmmA, 0);
1586 e->psrldq(xmmA, 0);
1587 e->psrlw(xmmA, xmmB);
1588 e->psrlw(xmmA, anyptr_gpB);
1589 e->psrlw(xmmA, 0);
1590 e->psubsb(xmmA, xmmB);
1591 e->psubsb(xmmA, anyptr_gpB);
1592 e->psubsw(xmmA, xmmB);
1593 e->psubsw(xmmA, anyptr_gpB);
1594 e->psubusb(xmmA, xmmB);
1595 e->psubusb(xmmA, anyptr_gpB);
1596 e->psubusw(xmmA, xmmB);
1597 e->psubusw(xmmA, anyptr_gpB);
1598 e->punpckhbw(xmmA, xmmB);
1599 e->punpckhbw(xmmA, anyptr_gpB);
1600 e->punpckhwd(xmmA, xmmB);
1601 e->punpckhwd(xmmA, anyptr_gpB);
1602 e->punpckhdq(xmmA, xmmB);
1603 e->punpckhdq(xmmA, anyptr_gpB);
1604 e->punpckhqdq(xmmA, xmmB);
1605 e->punpckhqdq(xmmA, anyptr_gpB);
1606 e->punpcklbw(xmmA, xmmB);
1607 e->punpcklbw(xmmA, anyptr_gpB);
1608 e->punpcklwd(xmmA, xmmB);
1609 e->punpcklwd(xmmA, anyptr_gpB);
1610 e->punpckldq(xmmA, xmmB);
1611 e->punpckldq(xmmA, anyptr_gpB);
1612 e->punpcklqdq(xmmA, xmmB);
1613 e->punpcklqdq(xmmA, anyptr_gpB);
1614 e->pxor(xmmA, xmmB);
1615 e->pxor(xmmA, anyptr_gpB);
1616 e->sqrtpd(xmmA, xmmB);
1617 e->sqrtpd(xmmA, anyptr_gpB);
1618 e->sqrtsd(xmmA, xmmB);
1619 e->sqrtsd(xmmA, anyptr_gpB);
1620 e->subpd(xmmA, xmmB);
1621 e->subpd(xmmA, anyptr_gpB);
1622 e->subsd(xmmA, xmmB);
1623 e->subsd(xmmA, anyptr_gpB);
1624 e->ucomisd(xmmA, xmmB);
1625 e->ucomisd(xmmA, anyptr_gpB);
1626 e->unpckhpd(xmmA, xmmB);
1627 e->unpckhpd(xmmA, anyptr_gpB);
1628 e->unpcklpd(xmmA, xmmB);
1629 e->unpcklpd(xmmA, anyptr_gpB);
1630 e->xorpd(xmmA, xmmB);
1631 e->xorpd(xmmA, anyptr_gpB);
1632
1633 // SSE3.
1634 e->nop();
1635
1636 e->addsubpd(xmmA, xmmB);
1637 e->addsubpd(xmmA, anyptr_gpB);
1638 e->addsubps(xmmA, xmmB);
1639 e->addsubps(xmmA, anyptr_gpB);
1640 e->fisttp(dword_ptr(gzA));
1641 e->haddpd(xmmA, xmmB);
1642 e->haddpd(xmmA, anyptr_gpB);
1643 e->haddps(xmmA, xmmB);
1644 e->haddps(xmmA, anyptr_gpB);
1645 e->hsubpd(xmmA, xmmB);
1646 e->hsubpd(xmmA, anyptr_gpB);
1647 e->hsubps(xmmA, xmmB);
1648 e->hsubps(xmmA, anyptr_gpB);
1649 e->lddqu(xmmA, anyptr_gpB);
1650 e->movddup(xmmA, xmmB);
1651 e->movddup(xmmA, anyptr_gpB);
1652 e->movshdup(xmmA, xmmB);
1653 e->movshdup(xmmA, anyptr_gpB);
1654 e->movsldup(xmmA, xmmB);
1655 e->movsldup(xmmA, anyptr_gpB);
1656
1657 // SSSE3.
1658 e->nop();
1659
1660 e->psignb(mmA, mmB);
1661 e->psignb(mmA, anyptr_gpB);
1662 e->psignb(xmmA, xmmB);
1663 e->psignb(xmmA, anyptr_gpB);
1664 e->psignw(mmA, mmB);
1665 e->psignw(mmA, anyptr_gpB);
1666 e->psignw(xmmA, xmmB);
1667 e->psignw(xmmA, anyptr_gpB);
1668 e->psignd(mmA, mmB);
1669 e->psignd(mmA, anyptr_gpB);
1670 e->psignd(xmmA, xmmB);
1671 e->psignd(xmmA, anyptr_gpB);
1672 e->phaddw(mmA, mmB);
1673 e->phaddw(mmA, anyptr_gpB);
1674 e->phaddw(xmmA, xmmB);
1675 e->phaddw(xmmA, anyptr_gpB);
1676 e->phaddd(mmA, mmB);
1677 e->phaddd(mmA, anyptr_gpB);
1678 e->phaddd(xmmA, xmmB);
1679 e->phaddd(xmmA, anyptr_gpB);
1680 e->phaddsw(mmA, mmB);
1681 e->phaddsw(mmA, anyptr_gpB);
1682 e->phaddsw(xmmA, xmmB);
1683 e->phaddsw(xmmA, anyptr_gpB);
1684 e->phsubw(mmA, mmB);
1685 e->phsubw(mmA, anyptr_gpB);
1686 e->phsubw(xmmA, xmmB);
1687 e->phsubw(xmmA, anyptr_gpB);
1688 e->phsubd(mmA, mmB);
1689 e->phsubd(mmA, anyptr_gpB);
1690 e->phsubd(xmmA, xmmB);
1691 e->phsubd(xmmA, anyptr_gpB);
1692 e->phsubsw(mmA, mmB);
1693 e->phsubsw(mmA, anyptr_gpB);
1694 e->phsubsw(xmmA, xmmB);
1695 e->phsubsw(xmmA, anyptr_gpB);
1696 e->pmaddubsw(mmA, mmB);
1697 e->pmaddubsw(mmA, anyptr_gpB);
1698 e->pmaddubsw(xmmA, xmmB);
1699 e->pmaddubsw(xmmA, anyptr_gpB);
1700 e->pabsb(mmA, mmB);
1701 e->pabsb(mmA, anyptr_gpB);
1702 e->pabsb(xmmA, xmmB);
1703 e->pabsb(xmmA, anyptr_gpB);
1704 e->pabsw(mmA, mmB);
1705 e->pabsw(mmA, anyptr_gpB);
1706 e->pabsw(xmmA, xmmB);
1707 e->pabsw(xmmA, anyptr_gpB);
1708 e->pabsd(mmA, mmB);
1709 e->pabsd(mmA, anyptr_gpB);
1710 e->pabsd(xmmA, xmmB);
1711 e->pabsd(xmmA, anyptr_gpB);
1712 e->pmulhrsw(mmA, mmB);
1713 e->pmulhrsw(mmA, anyptr_gpB);
1714 e->pmulhrsw(xmmA, xmmB);
1715 e->pmulhrsw(xmmA, anyptr_gpB);
1716 e->pshufb(mmA, mmB);
1717 e->pshufb(mmA, anyptr_gpB);
1718 e->pshufb(xmmA, xmmB);
1719 e->pshufb(xmmA, anyptr_gpB);
1720 e->palignr(mmA, mmB, 0);
1721 e->palignr(mmA, anyptr_gpB, 0);
1722 e->palignr(xmmA, xmmB, 0);
1723 e->palignr(xmmA, anyptr_gpB, 0);
1724
1725 // SSE4.1.
1726 e->nop();
1727
1728 e->blendpd(xmmA, xmmB, 0);
1729 e->blendpd(xmmA, anyptr_gpB, 0);
1730 e->blendps(xmmA, xmmB, 0);
1731 e->blendps(xmmA, anyptr_gpB, 0);
1732 e->blendvpd(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
1733 e->blendvpd(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
1734 e->blendvpd(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
1735 e->blendvpd(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
1736 e->blendvps(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
1737 e->blendvps(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
1738 e->blendvps(xmmA, anyptr_gpB); // Implicit xmmA, mem , <XMM0>
1739 e->blendvps(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem , <XMM0>
1740
1741 e->dppd(xmmA, xmmB, 0);
1742 e->dppd(xmmA, anyptr_gpB, 0);
1743 e->dpps(xmmA, xmmB, 0);
1744 e->dpps(xmmA, anyptr_gpB, 0);
1745 e->extractps(gdA, xmmB, 0);
1746 e->extractps(gzA, xmmB, 0);
1747 e->extractps(anyptr_gpA, xmmB, 0);
1748 e->insertps(xmmA, xmmB, 0);
1749 e->insertps(xmmA, anyptr_gpB, 0);
1750 e->movntdqa(xmmA, anyptr_gpB);
1751 e->mpsadbw(xmmA, xmmB, 0);
1752 e->mpsadbw(xmmA, anyptr_gpB, 0);
1753 e->packusdw(xmmA, xmmB);
1754 e->packusdw(xmmA, anyptr_gpB);
1755 e->pblendvb(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
1756 e->pblendvb(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
1757 e->pblendvb(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
1758 e->pblendvb(xmmA, anyptr_gpB, xmm0); // Implicit xmmA, mem, <XMM0>
1759 e->pblendw(xmmA, xmmB, 0);
1760 e->pblendw(xmmA, anyptr_gpB, 0);
1761 e->pcmpeqq(xmmA, xmmB);
1762 e->pcmpeqq(xmmA, anyptr_gpB);
1763 e->pextrb(gdA, xmmB, 0);
1764 e->pextrb(gzA, xmmB, 0);
1765 e->pextrb(anyptr_gpA, xmmB, 0);
1766 e->pextrd(gdA, xmmB, 0);
1767 e->pextrd(gzA, xmmB, 0);
1768 e->pextrd(anyptr_gpA, xmmB, 0);
1769 if (isX64) e->pextrq(gzA, xmmB, 0);
1770 if (isX64) e->pextrq(anyptr_gpA, xmmB, 0);
1771 e->pextrw(gdA, xmmB, 0);
1772 e->pextrw(gzA, xmmB, 0);
1773 e->pextrw(anyptr_gpA, xmmB, 0);
1774 e->phminposuw(xmmA, xmmB);
1775 e->phminposuw(xmmA, anyptr_gpB);
1776 e->pinsrb(xmmA, gdB, 0);
1777 e->pinsrb(xmmA, gzB, 0);
1778 e->pinsrb(xmmA, anyptr_gpB, 0);
1779 e->pinsrd(xmmA, gdB, 0);
1780 e->pinsrd(xmmA, gzB, 0);
1781 e->pinsrd(xmmA, anyptr_gpB, 0);
1782 e->pinsrw(xmmA, gdB, 0);
1783 e->pinsrw(xmmA, gzB, 0);
1784 e->pinsrw(xmmA, anyptr_gpB, 0);
1785 e->pmaxuw(xmmA, xmmB);
1786 e->pmaxuw(xmmA, anyptr_gpB);
1787 e->pmaxsb(xmmA, xmmB);
1788 e->pmaxsb(xmmA, anyptr_gpB);
1789 e->pmaxsd(xmmA, xmmB);
1790 e->pmaxsd(xmmA, anyptr_gpB);
1791 e->pmaxud(xmmA, xmmB);
1792 e->pmaxud(xmmA, anyptr_gpB);
1793 e->pminsb(xmmA, xmmB);
1794 e->pminsb(xmmA, anyptr_gpB);
1795 e->pminuw(xmmA, xmmB);
1796 e->pminuw(xmmA, anyptr_gpB);
1797 e->pminud(xmmA, xmmB);
1798 e->pminud(xmmA, anyptr_gpB);
1799 e->pminsd(xmmA, xmmB);
1800 e->pminsd(xmmA, anyptr_gpB);
1801 e->pmovsxbw(xmmA, xmmB);
1802 e->pmovsxbw(xmmA, anyptr_gpB);
1803 e->pmovsxbd(xmmA, xmmB);
1804 e->pmovsxbd(xmmA, anyptr_gpB);
1805 e->pmovsxbq(xmmA, xmmB);
1806 e->pmovsxbq(xmmA, anyptr_gpB);
1807 e->pmovsxwd(xmmA, xmmB);
1808 e->pmovsxwd(xmmA, anyptr_gpB);
1809 e->pmovsxwq(xmmA, xmmB);
1810 e->pmovsxwq(xmmA, anyptr_gpB);
1811 e->pmovsxdq(xmmA, xmmB);
1812 e->pmovsxdq(xmmA, anyptr_gpB);
1813 e->pmovzxbw(xmmA, xmmB);
1814 e->pmovzxbw(xmmA, anyptr_gpB);
1815 e->pmovzxbd(xmmA, xmmB);
1816 e->pmovzxbd(xmmA, anyptr_gpB);
1817 e->pmovzxbq(xmmA, xmmB);
1818 e->pmovzxbq(xmmA, anyptr_gpB);
1819 e->pmovzxwd(xmmA, xmmB);
1820 e->pmovzxwd(xmmA, anyptr_gpB);
1821 e->pmovzxwq(xmmA, xmmB);
1822 e->pmovzxwq(xmmA, anyptr_gpB);
1823 e->pmovzxdq(xmmA, xmmB);
1824 e->pmovzxdq(xmmA, anyptr_gpB);
1825 e->pmuldq(xmmA, xmmB);
1826 e->pmuldq(xmmA, anyptr_gpB);
1827 e->pmulld(xmmA, xmmB);
1828 e->pmulld(xmmA, anyptr_gpB);
1829 e->ptest(xmmA, xmmB);
1830 e->ptest(xmmA, anyptr_gpB);
1831 e->roundps(xmmA, xmmB, 0);
1832 e->roundps(xmmA, anyptr_gpB, 0);
1833 e->roundss(xmmA, xmmB, 0);
1834 e->roundss(xmmA, anyptr_gpB, 0);
1835 e->roundpd(xmmA, xmmB, 0);
1836 e->roundpd(xmmA, anyptr_gpB, 0);
1837 e->roundsd(xmmA, xmmB, 0);
1838 e->roundsd(xmmA, anyptr_gpB, 0);
1839
1840 // SSE4.2.
1841 e->nop();
1842
1843 e->pcmpestri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
1844 e->pcmpestri(xmmA, xmmB , imm(0), ecx, eax, edx); // Explicit xmmA, xmmB, imm, <ECX>, <EAX>, <EDX>
1845 e->pcmpestri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
1846 e->pcmpestri(xmmA, anyptr_gpB, imm(0), ecx, eax, edx); // Explicit xmmA, mem , imm, <ECX>, <EAX>, <EDX>
1847 e->pcmpestrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
1848 e->pcmpestrm(xmmA, xmmB , imm(0), xmm0, eax, edx); // Explicit xmmA, xmmB, imm, <XMM0>, <EAX>, <EDX>
1849 e->pcmpestrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
1850 e->pcmpestrm(xmmA, anyptr_gpB, imm(0), xmm0, eax, edx); // Explicit xmmA, mem , imm, <XMM0>, <EAX>, <EDX>
1851 e->pcmpistri(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <ECX>
1852 e->pcmpistri(xmmA, xmmB , imm(0), ecx); // Explicit xmmA, xmmB, imm, <ECX>
1853 e->pcmpistri(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <ECX>
1854 e->pcmpistri(xmmA, anyptr_gpB, imm(0), ecx); // Explicit xmmA, mem , imm, <ECX>
1855 e->pcmpistrm(xmmA, xmmB , imm(0)); // Implicit xmmA, xmmB, imm, <XMM0>
1856 e->pcmpistrm(xmmA, xmmB , imm(0), xmm0); // Explicit xmmA, xmmB, imm, <XMM0>
1857 e->pcmpistrm(xmmA, anyptr_gpB, imm(0)); // Implicit xmmA, mem , imm, <XMM0>
1858 e->pcmpistrm(xmmA, anyptr_gpB, imm(0), xmm0); // Explicit xmmA, mem , imm, <XMM0>
1859
1860 e->pcmpgtq(xmmA, xmmB);
1861 e->pcmpgtq(xmmA, anyptr_gpB);
1862
1863 // SSE4A.
1864 e->nop();
1865
1866 e->extrq(xmmA, xmmB);
1867 e->extrq(xmmA, 0x1, 0x2);
1868 e->extrq(xmmB, 0x1, 0x2);
1869 e->insertq(xmmA, xmmB);
1870 e->insertq(xmmA, xmmB, 0x1, 0x2);
1871 e->movntsd(anyptr_gpA, xmmB);
1872 e->movntss(anyptr_gpA, xmmB);
1873
1874 // AESNI.
1875 e->nop();
1876
1877 e->aesdec(xmmA, xmmB);
1878 e->aesdec(xmmA, anyptr_gpB);
1879 e->aesdeclast(xmmA, xmmB);
1880 e->aesdeclast(xmmA, anyptr_gpB);
1881 e->aesenc(xmmA, xmmB);
1882 e->aesenc(xmmA, anyptr_gpB);
1883 e->aesenclast(xmmA, xmmB);
1884 e->aesenclast(xmmA, anyptr_gpB);
1885 e->aesimc(xmmA, xmmB);
1886 e->aesimc(xmmA, anyptr_gpB);
1887 e->aeskeygenassist(xmmA, xmmB, 0);
1888 e->aeskeygenassist(xmmA, anyptr_gpB, 0);
1889
1890 // SHA.
1891 e->nop();
1892
1893 e->sha1msg1(xmmA, xmmB);
1894 e->sha1msg1(xmmA, anyptr_gpB);
1895 e->sha1msg2(xmmA, xmmB);
1896 e->sha1msg2(xmmA, anyptr_gpB);
1897 e->sha1nexte(xmmA, xmmB);
1898 e->sha1nexte(xmmA, anyptr_gpB);
1899 e->sha1rnds4(xmmA, xmmB, 0);
1900 e->sha1rnds4(xmmA, anyptr_gpB, 0);
1901 e->sha256msg1(xmmA, xmmB);
1902 e->sha256msg1(xmmA, anyptr_gpB);
1903 e->sha256msg2(xmmA, xmmB);
1904 e->sha256msg2(xmmA, anyptr_gpB);
1905 e->sha256rnds2(xmmA, xmmB); // Implicit xmmA, xmmB, <XMM0>
1906 e->sha256rnds2(xmmA, xmmB, xmm0); // Explicit xmmA, xmmB, <XMM0>
1907 e->sha256rnds2(xmmA, anyptr_gpB); // Implicit xmmA, mem, <XMM0>
1908 e->sha256rnds2(xmmA, anyptr_gpB, xmm0); // Explicit xmmA, mem, <XMM0>
1909
1910 // PCLMULQDQ.
1911 e->nop();
1912
1913 e->pclmulqdq(xmmA, xmmB, 0);
1914 e->pclmulqdq(xmmA, anyptr_gpB, 0);
1915
1916 // AVX.
1917 e->nop();
1918
1919 e->vaddpd(xmmA, xmmB, xmmC);
1920 e->vaddpd(xmmA, xmmB, anyptr_gpC);
1921 e->vaddpd(ymmA, ymmB, ymmC);
1922 e->vaddpd(ymmA, ymmB, anyptr_gpC);
1923 e->vaddps(xmmA, xmmB, xmmC);
1924 e->vaddps(xmmA, xmmB, anyptr_gpC);
1925 e->vaddps(ymmA, ymmB, ymmC);
1926 e->vaddps(ymmA, ymmB, anyptr_gpC);
1927 e->vaddsd(xmmA, xmmB, xmmC);
1928 e->vaddsd(xmmA, xmmB, anyptr_gpC);
1929 e->vaddss(xmmA, xmmB, xmmC);
1930 e->vaddss(xmmA, xmmB, anyptr_gpC);
1931 e->vaddsubpd(xmmA, xmmB, xmmC);
1932 e->vaddsubpd(xmmA, xmmB, anyptr_gpC);
1933 e->vaddsubpd(ymmA, ymmB, ymmC);
1934 e->vaddsubpd(ymmA, ymmB, anyptr_gpC);
1935 e->vaddsubps(xmmA, xmmB, xmmC);
1936 e->vaddsubps(xmmA, xmmB, anyptr_gpC);
1937 e->vaddsubps(ymmA, ymmB, ymmC);
1938 e->vaddsubps(ymmA, ymmB, anyptr_gpC);
1939 e->vandpd(xmmA, xmmB, xmmC);
1940 e->vandpd(xmmA, xmmB, anyptr_gpC);
1941 e->vandpd(ymmA, ymmB, ymmC);
1942 e->vandpd(ymmA, ymmB, anyptr_gpC);
1943 e->vandps(xmmA, xmmB, xmmC);
1944 e->vandps(xmmA, xmmB, anyptr_gpC);
1945 e->vandps(ymmA, ymmB, ymmC);
1946 e->vandps(ymmA, ymmB, anyptr_gpC);
1947 e->vandnpd(xmmA, xmmB, xmmC);
1948 e->vandnpd(xmmA, xmmB, anyptr_gpC);
1949 e->vandnpd(ymmA, ymmB, ymmC);
1950 e->vandnpd(ymmA, ymmB, anyptr_gpC);
1951 e->vandnps(xmmA, xmmB, xmmC);
1952 e->vandnps(xmmA, xmmB, anyptr_gpC);
1953 e->vandnps(ymmA, ymmB, ymmC);
1954 e->vandnps(ymmA, ymmB, anyptr_gpC);
1955 e->vblendpd(xmmA, xmmB, xmmC, 0);
1956 e->vblendpd(xmmA, xmmB, anyptr_gpC, 0);
1957 e->vblendpd(ymmA, ymmB, ymmC, 0);
1958 e->vblendpd(ymmA, ymmB, anyptr_gpC, 0);
1959 e->vblendps(xmmA, xmmB, xmmC, 0);
1960 e->vblendps(xmmA, xmmB, anyptr_gpC, 0);
1961 e->vblendps(ymmA, ymmB, ymmC, 0);
1962 e->vblendps(ymmA, ymmB, anyptr_gpC, 0);
1963 e->vblendvpd(xmmA, xmmB, xmmC, xmmD);
1964 e->vblendvpd(xmmA, xmmB, anyptr_gpC, xmmD);
1965 e->vblendvpd(ymmA, ymmB, ymmC, ymmD);
1966 e->vblendvpd(ymmA, ymmB, anyptr_gpC, ymmD);
1967 e->vbroadcastf128(ymmA, anyptr_gpB);
1968 e->vbroadcastsd(ymmA, anyptr_gpB);
1969 e->vbroadcastss(xmmA, anyptr_gpB);
1970 e->vbroadcastss(ymmA, anyptr_gpB);
1971 e->vcmppd(xmmA, xmmB, xmmC, 0);
1972 e->vcmppd(xmmA, xmmB, anyptr_gpC, 0);
1973 e->vcmppd(ymmA, ymmB, ymmC, 0);
1974 e->vcmppd(ymmA, ymmB, anyptr_gpC, 0);
1975 e->vcmpps(xmmA, xmmB, xmmC, 0);
1976 e->vcmpps(xmmA, xmmB, anyptr_gpC, 0);
1977 e->vcmpps(ymmA, ymmB, ymmC, 0);
1978 e->vcmpps(ymmA, ymmB, anyptr_gpC, 0);
1979 e->vcmpsd(xmmA, xmmB, xmmC, 0);
1980 e->vcmpsd(xmmA, xmmB, anyptr_gpC, 0);
1981 e->vcmpss(xmmA, xmmB, xmmC, 0);
1982 e->vcmpss(xmmA, xmmB, anyptr_gpC, 0);
1983 e->vcomisd(xmmA, xmmB);
1984 e->vcomisd(xmmA, anyptr_gpB);
1985 e->vcomiss(xmmA, xmmB);
1986 e->vcomiss(xmmA, anyptr_gpB);
1987 e->vcvtdq2pd(xmmA, xmmB);
1988 e->vcvtdq2pd(xmmA, anyptr_gpB);
1989 e->vcvtdq2pd(ymmA, xmmB);
1990 e->vcvtdq2pd(ymmA, anyptr_gpB);
1991 e->vcvtdq2ps(xmmA, xmmB);
1992 e->vcvtdq2ps(xmmA, anyptr_gpB);
1993 e->vcvtdq2ps(ymmA, ymmB);
1994 e->vcvtdq2ps(ymmA, anyptr_gpB);
1995 e->vcvtpd2dq(xmmA, xmmB);
1996 e->vcvtpd2dq(xmmA, ymmB);
1997 e->vcvtpd2dq(xmmA, anyptr_gpB);
1998 e->vcvtpd2ps(xmmA, xmmB);
1999 e->vcvtpd2ps(xmmA, ymmB);
2000 e->vcvtpd2ps(xmmA, anyptr_gpB);
2001 e->vcvtps2dq(xmmA, xmmB);
2002 e->vcvtps2dq(xmmA, anyptr_gpB);
2003 e->vcvtps2dq(ymmA, ymmB);
2004 e->vcvtps2dq(ymmA, anyptr_gpB);
2005 e->vcvtps2pd(xmmA, xmmB);
2006 e->vcvtps2pd(xmmA, anyptr_gpB);
2007 e->vcvtps2pd(ymmA, xmmB);
2008 e->vcvtps2pd(ymmA, anyptr_gpB);
2009 e->vcvtsd2si(gzA, xmmB);
2010 e->vcvtsd2si(gzA, anyptr_gpB);
2011 e->vcvtsd2ss(xmmA, xmmB, xmmC);
2012 e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
2013 e->vcvtsi2sd(xmmA, xmmB, gzC);
2014 e->vcvtsi2sd(xmmA, xmmB, anyptr_gpC);
2015 e->vcvtsi2ss(xmmA, xmmB, gzC);
2016 e->vcvtsi2ss(xmmA, xmmB, anyptr_gpC);
2017 e->vcvtss2sd(xmmA, xmmB, xmmC);
2018 e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
2019 e->vcvtss2si(gzA, xmmB);
2020 e->vcvtss2si(gzA, anyptr_gpB);
2021 e->vcvttpd2dq(xmmA, xmmB);
2022 e->vcvttpd2dq(xmmA, ymmB);
2023 e->vcvttpd2dq(xmmA, anyptr_gpB);
2024 e->vcvttps2dq(xmmA, xmmB);
2025 e->vcvttps2dq(xmmA, anyptr_gpB);
2026 e->vcvttps2dq(ymmA, ymmB);
2027 e->vcvttps2dq(ymmA, anyptr_gpB);
2028 e->vcvttsd2si(gzA, xmmB);
2029 e->vcvttsd2si(gzA, anyptr_gpB);
2030 e->vcvttss2si(gzA, xmmB);
2031 e->vcvttss2si(gzA, anyptr_gpB);
2032 e->vdivpd(xmmA, xmmB, xmmC);
2033 e->vdivpd(xmmA, xmmB, anyptr_gpC);
2034 e->vdivpd(ymmA, ymmB, ymmC);
2035 e->vdivpd(ymmA, ymmB, anyptr_gpC);
2036 e->vdivps(xmmA, xmmB, xmmC);
2037 e->vdivps(xmmA, xmmB, anyptr_gpC);
2038 e->vdivps(ymmA, ymmB, ymmC);
2039 e->vdivps(ymmA, ymmB, anyptr_gpC);
2040 e->vdivsd(xmmA, xmmB, xmmC);
2041 e->vdivsd(xmmA, xmmB, anyptr_gpC);
2042 e->vdivss(xmmA, xmmB, xmmC);
2043 e->vdivss(xmmA, xmmB, anyptr_gpC);
2044 e->vdppd(xmmA, xmmB, xmmC, 0);
2045 e->vdppd(xmmA, xmmB, anyptr_gpC, 0);
2046 e->vdpps(xmmA, xmmB, xmmC, 0);
2047 e->vdpps(xmmA, xmmB, anyptr_gpC, 0);
2048 e->vdpps(ymmA, ymmB, ymmC, 0);
2049 e->vdpps(ymmA, ymmB, anyptr_gpC, 0);
2050 e->vextractf128(xmmA, ymmB, 0);
2051 e->vextractf128(anyptr_gpA, ymmB, 0);
2052 e->vextractps(gzA, xmmB, 0);
2053 e->vextractps(anyptr_gpA, xmmB, 0);
2054 e->vhaddpd(xmmA, xmmB, xmmC);
2055 e->vhaddpd(xmmA, xmmB, anyptr_gpC);
2056 e->vhaddpd(ymmA, ymmB, ymmC);
2057 e->vhaddpd(ymmA, ymmB, anyptr_gpC);
2058 e->vhaddps(xmmA, xmmB, xmmC);
2059 e->vhaddps(xmmA, xmmB, anyptr_gpC);
2060 e->vhaddps(ymmA, ymmB, ymmC);
2061 e->vhaddps(ymmA, ymmB, anyptr_gpC);
2062 e->vhsubpd(xmmA, xmmB, xmmC);
2063 e->vhsubpd(xmmA, xmmB, anyptr_gpC);
2064 e->vhsubpd(ymmA, ymmB, ymmC);
2065 e->vhsubpd(ymmA, ymmB, anyptr_gpC);
2066 e->vhsubps(xmmA, xmmB, xmmC);
2067 e->vhsubps(xmmA, xmmB, anyptr_gpC);
2068 e->vhsubps(ymmA, ymmB, ymmC);
2069 e->vhsubps(ymmA, ymmB, anyptr_gpC);
2070 e->vinsertf128(ymmA, ymmB, xmmC, 0);
2071 e->vinsertf128(ymmA, ymmB, anyptr_gpC, 0);
2072 e->vinsertps(xmmA, xmmB, xmmC, 0);
2073 e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
2074 e->vlddqu(xmmA, anyptr_gpB);
2075 e->vlddqu(ymmA, anyptr_gpB);
2076 e->vldmxcsr(anyptr_gpA);
2077 e->vmaskmovdqu(xmmA, xmmB); // Implicit xmmA, xmmB, <ds:[EDI|RDI]>
2078 e->vmaskmovdqu(xmmA, xmmB, ptr(e->zdi())); // Explicit xmmA, xmmB, <ds:[EDI|RDI]>
2079 e->vmaskmovps(xmmA, xmmB, anyptr_gpC);
2080 e->vmaskmovps(ymmA, ymmB, anyptr_gpC);
2081 e->vmaskmovps(anyptr_gpA, xmmB, xmmC);
2082 e->vmaskmovps(anyptr_gpA, ymmB, ymmC);
2083 e->vmaskmovpd(xmmA, xmmB, anyptr_gpC);
2084 e->vmaskmovpd(ymmA, ymmB, anyptr_gpC);
2085 e->vmaskmovpd(anyptr_gpA, xmmB, xmmC);
2086 e->vmaskmovpd(anyptr_gpA, ymmB, ymmC);
2087 e->vmaxpd(xmmA, xmmB, xmmC);
2088 e->vmaxpd(xmmA, xmmB, anyptr_gpC);
2089 e->vmaxpd(ymmA, ymmB, ymmC);
2090 e->vmaxpd(ymmA, ymmB, anyptr_gpC);
2091 e->vmaxps(xmmA, xmmB, xmmC);
2092 e->vmaxps(xmmA, xmmB, anyptr_gpC);
2093 e->vmaxps(ymmA, ymmB, ymmC);
2094 e->vmaxps(ymmA, ymmB, anyptr_gpC);
2095 e->vmaxsd(xmmA, xmmB, xmmC);
2096 e->vmaxsd(xmmA, xmmB, anyptr_gpC);
2097 e->vmaxss(xmmA, xmmB, xmmC);
2098 e->vmaxss(xmmA, xmmB, anyptr_gpC);
2099 e->vminpd(xmmA, xmmB, xmmC);
2100 e->vminpd(xmmA, xmmB, anyptr_gpC);
2101 e->vminpd(ymmA, ymmB, ymmC);
2102 e->vminpd(ymmA, ymmB, anyptr_gpC);
2103 e->vminps(xmmA, xmmB, xmmC);
2104 e->vminps(xmmA, xmmB, anyptr_gpC);
2105 e->vminps(ymmA, ymmB, ymmC);
2106 e->vminps(ymmA, ymmB, anyptr_gpC);
2107 e->vminsd(xmmA, xmmB, xmmC);
2108 e->vminsd(xmmA, xmmB, anyptr_gpC);
2109 e->vminss(xmmA, xmmB, xmmC);
2110 e->vminss(xmmA, xmmB, anyptr_gpC);
2111 e->vmovapd(xmmA, xmmB);
2112 e->vmovapd(xmmA, anyptr_gpB);
2113 e->vmovapd(anyptr_gpA, xmmB);
2114 e->vmovapd(ymmA, ymmB);
2115 e->vmovapd(ymmA, anyptr_gpB);
2116 e->vmovapd(anyptr_gpA, ymmB);
2117 e->vmovaps(xmmA, xmmB);
2118 e->vmovaps(xmmA, anyptr_gpB);
2119 e->vmovaps(anyptr_gpA, xmmB);
2120 e->vmovaps(ymmA, ymmB);
2121 e->vmovaps(ymmA, anyptr_gpB);
2122 e->vmovaps(anyptr_gpA, ymmB);
2123 e->vmovd(xmmA, gzB);
2124 e->vmovd(xmmA, anyptr_gpB);
2125 e->vmovd(gzA, xmmB);
2126 e->vmovd(anyptr_gpA, xmmB);
2127 e->vmovddup(xmmA, xmmB);
2128 e->vmovddup(xmmA, anyptr_gpB);
2129 e->vmovddup(ymmA, ymmB);
2130 e->vmovddup(ymmA, anyptr_gpB);
2131 e->vmovdqa(xmmA, xmmB);
2132 e->vmovdqa(xmmA, anyptr_gpB);
2133 e->vmovdqa(anyptr_gpA, xmmB);
2134 e->vmovdqa(ymmA, ymmB);
2135 e->vmovdqa(ymmA, anyptr_gpB);
2136 e->vmovdqa(anyptr_gpA, ymmB);
2137 e->vmovdqu(xmmA, xmmB);
2138 e->vmovdqu(xmmA, anyptr_gpB);
2139 e->vmovdqu(anyptr_gpA, xmmB);
2140 e->vmovdqu(ymmA, ymmB);
2141 e->vmovdqu(ymmA, anyptr_gpB);
2142 e->vmovdqu(anyptr_gpA, ymmB);
2143 e->vmovhlps(xmmA, xmmB, xmmC);
2144 e->vmovhpd(xmmA, xmmB, anyptr_gpC);
2145 e->vmovhpd(anyptr_gpA, xmmB);
2146 e->vmovhps(xmmA, xmmB, anyptr_gpC);
2147 e->vmovhps(anyptr_gpA, xmmB);
2148 e->vmovlhps(xmmA, xmmB, xmmC);
2149 e->vmovlpd(xmmA, xmmB, anyptr_gpC);
2150 e->vmovlpd(anyptr_gpA, xmmB);
2151 e->vmovlps(xmmA, xmmB, anyptr_gpC);
2152 e->vmovlps(anyptr_gpA, xmmB);
2153 e->vmovmskpd(gzA, xmmB);
2154 e->vmovmskpd(gzA, ymmB);
2155 e->vmovmskps(gzA, xmmB);
2156 e->vmovmskps(gzA, ymmB);
2157 e->vmovntdq(anyptr_gpA, xmmB);
2158 e->vmovntdq(anyptr_gpA, ymmB);
2159 e->vmovntdqa(xmmA, anyptr_gpB);
2160 e->vmovntpd(anyptr_gpA, xmmB);
2161 e->vmovntpd(anyptr_gpA, ymmB);
2162 e->vmovntps(anyptr_gpA, xmmB);
2163 e->vmovntps(anyptr_gpA, ymmB);
2164 e->vmovsd(xmmA, xmmB, xmmC);
2165 e->vmovsd(xmmA, anyptr_gpB);
2166 e->vmovsd(anyptr_gpA, xmmB);
2167 e->vmovshdup(xmmA, xmmB);
2168 e->vmovshdup(xmmA, anyptr_gpB);
2169 e->vmovshdup(ymmA, ymmB);
2170 e->vmovshdup(ymmA, anyptr_gpB);
2171 e->vmovsldup(xmmA, xmmB);
2172 e->vmovsldup(xmmA, anyptr_gpB);
2173 e->vmovsldup(ymmA, ymmB);
2174 e->vmovsldup(ymmA, anyptr_gpB);
2175 e->vmovss(xmmA, xmmB, xmmC);
2176 e->vmovss(xmmA, anyptr_gpB);
2177 e->vmovss(anyptr_gpA, xmmB);
2178 e->vmovupd(xmmA, xmmB);
2179 e->vmovupd(xmmA, anyptr_gpB);
2180 e->vmovupd(anyptr_gpA, xmmB);
2181 e->vmovupd(ymmA, ymmB);
2182 e->vmovupd(ymmA, anyptr_gpB);
2183 e->vmovupd(anyptr_gpA, ymmB);
2184 e->vmovups(xmmA, xmmB);
2185 e->vmovups(xmmA, anyptr_gpB);
2186 e->vmovups(anyptr_gpA, xmmB);
2187 e->vmovups(ymmA, ymmB);
2188 e->vmovups(ymmA, anyptr_gpB);
2189 e->vmovups(anyptr_gpA, ymmB);
2190 e->vmpsadbw(xmmA, xmmB, xmmC, 0);
2191 e->vmpsadbw(xmmA, xmmB, anyptr_gpC, 0);
2192 e->vmulpd(xmmA, xmmB, xmmC);
2193 e->vmulpd(xmmA, xmmB, anyptr_gpC);
2194 e->vmulpd(ymmA, ymmB, ymmC);
2195 e->vmulpd(ymmA, ymmB, anyptr_gpC);
2196 e->vmulps(xmmA, xmmB, xmmC);
2197 e->vmulps(xmmA, xmmB, anyptr_gpC);
2198 e->vmulps(ymmA, ymmB, ymmC);
2199 e->vmulps(ymmA, ymmB, anyptr_gpC);
2200 e->vmulsd(xmmA, xmmB, xmmC);
2201 e->vmulsd(xmmA, xmmB, anyptr_gpC);
2202 e->vmulss(xmmA, xmmB, xmmC);
2203 e->vmulss(xmmA, xmmB, anyptr_gpC);
2204 e->vorpd(xmmA, xmmB, xmmC);
2205 e->vorpd(xmmA, xmmB, anyptr_gpC);
2206 e->vorpd(ymmA, ymmB, ymmC);
2207 e->vorpd(ymmA, ymmB, anyptr_gpC);
2208 e->vorps(xmmA, xmmB, xmmC);
2209 e->vorps(xmmA, xmmB, anyptr_gpC);
2210 e->vorps(ymmA, ymmB, ymmC);
2211 e->vorps(ymmA, ymmB, anyptr_gpC);
2212 e->vpabsb(xmmA, xmmB);
2213 e->vpabsb(xmmA, anyptr_gpB);
2214 e->vpabsd(xmmA, xmmB);
2215 e->vpabsd(xmmA, anyptr_gpB);
2216 e->vpabsw(xmmA, xmmB);
2217 e->vpabsw(xmmA, anyptr_gpB);
2218 e->vpackssdw(xmmA, xmmB, xmmC);
2219 e->vpackssdw(xmmA, xmmB, anyptr_gpC);
2220 e->vpacksswb(xmmA, xmmB, xmmC);
2221 e->vpacksswb(xmmA, xmmB, anyptr_gpC);
2222 e->vpackusdw(xmmA, xmmB, xmmC);
2223 e->vpackusdw(xmmA, xmmB, anyptr_gpC);
2224 e->vpackuswb(xmmA, xmmB, xmmC);
2225 e->vpackuswb(xmmA, xmmB, anyptr_gpC);
2226 e->vpaddb(xmmA, xmmB, xmmC);
2227 e->vpaddb(xmmA, xmmB, anyptr_gpC);
2228 e->vpaddd(xmmA, xmmB, xmmC);
2229 e->vpaddd(xmmA, xmmB, anyptr_gpC);
2230 e->vpaddq(xmmA, xmmB, xmmC);
2231 e->vpaddq(xmmA, xmmB, anyptr_gpC);
2232 e->vpaddw(xmmA, xmmB, xmmC);
2233 e->vpaddw(xmmA, xmmB, anyptr_gpC);
2234 e->vpaddsb(xmmA, xmmB, xmmC);
2235 e->vpaddsb(xmmA, xmmB, anyptr_gpC);
2236 e->vpaddsw(xmmA, xmmB, xmmC);
2237 e->vpaddsw(xmmA, xmmB, anyptr_gpC);
2238 e->vpaddusb(xmmA, xmmB, xmmC);
2239 e->vpaddusb(xmmA, xmmB, anyptr_gpC);
2240 e->vpaddusw(xmmA, xmmB, xmmC);
2241 e->vpaddusw(xmmA, xmmB, anyptr_gpC);
2242 e->vpalignr(xmmA, xmmB, xmmC, 0);
2243 e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
2244 e->vpand(xmmA, xmmB, xmmC);
2245 e->vpand(xmmA, xmmB, anyptr_gpC);
2246 e->vpandn(xmmA, xmmB, xmmC);
2247 e->vpandn(xmmA, xmmB, anyptr_gpC);
2248 e->vpavgb(xmmA, xmmB, xmmC);
2249 e->vpavgb(xmmA, xmmB, anyptr_gpC);
2250 e->vpavgw(xmmA, xmmB, xmmC);
2251 e->vpavgw(xmmA, xmmB, anyptr_gpC);
2252 e->vpblendvb(xmmA, xmmB, xmmC, xmmD);
2253 e->vpblendvb(xmmA, xmmB, anyptr_gpC, xmmD);
2254 e->vpblendw(xmmA, xmmB, xmmC, 0);
2255 e->vpblendw(xmmA, xmmB, anyptr_gpC, 0);
2256 e->vpcmpeqb(xmmA, xmmB, xmmC);
2257 e->vpcmpeqb(xmmA, xmmB, anyptr_gpC);
2258 e->vpcmpeqd(xmmA, xmmB, xmmC);
2259 e->vpcmpeqd(xmmA, xmmB, anyptr_gpC);
2260 e->vpcmpeqq(xmmA, xmmB, xmmC);
2261 e->vpcmpeqq(xmmA, xmmB, anyptr_gpC);
2262 e->vpcmpeqw(xmmA, xmmB, xmmC);
2263 e->vpcmpeqw(xmmA, xmmB, anyptr_gpC);
2264 e->vpcmpgtb(xmmA, xmmB, xmmC);
2265 e->vpcmpgtb(xmmA, xmmB, anyptr_gpC);
2266 e->vpcmpgtd(xmmA, xmmB, xmmC);
2267 e->vpcmpgtd(xmmA, xmmB, anyptr_gpC);
2268 e->vpcmpgtq(xmmA, xmmB, xmmC);
2269 e->vpcmpgtq(xmmA, xmmB, anyptr_gpC);
2270 e->vpcmpgtw(xmmA, xmmB, xmmC);
2271 e->vpcmpgtw(xmmA, xmmB, anyptr_gpC);
2272 e->vpcmpestri(xmmA, xmmB, 0);
2273 e->vpcmpestri(xmmA, anyptr_gpB, 0);
2274 e->vpcmpestrm(xmmA, xmmB, 0);
2275 e->vpcmpestrm(xmmA, anyptr_gpB, 0);
2276 e->vpcmpistri(xmmA, xmmB, 0);
2277 e->vpcmpistri(xmmA, anyptr_gpB, 0);
2278 e->vpcmpistrm(xmmA, xmmB, 0);
2279 e->vpcmpistrm(xmmA, anyptr_gpB, 0);
2280 e->vpermilpd(xmmA, xmmB, xmmC);
2281 e->vpermilpd(xmmA, xmmB, anyptr_gpC);
2282 e->vpermilpd(ymmA, ymmB, ymmC);
2283 e->vpermilpd(ymmA, ymmB, anyptr_gpC);
2284 e->vpermilpd(xmmA, xmmB, 0);
2285 e->vpermilpd(xmmA, anyptr_gpB, 0);
2286 e->vpermilpd(ymmA, ymmB, 0);
2287 e->vpermilpd(ymmA, anyptr_gpB, 0);
2288 e->vpermilps(xmmA, xmmB, xmmC);
2289 e->vpermilps(xmmA, xmmB, anyptr_gpC);
2290 e->vpermilps(ymmA, ymmB, ymmC);
2291 e->vpermilps(ymmA, ymmB, anyptr_gpC);
2292 e->vpermilps(xmmA, xmmB, 0);
2293 e->vpermilps(xmmA, anyptr_gpB, 0);
2294 e->vpermilps(ymmA, ymmB, 0);
2295 e->vpermilps(ymmA, anyptr_gpB, 0);
2296 e->vperm2f128(ymmA, ymmB, ymmC, 0);
2297 e->vperm2f128(ymmA, ymmB, anyptr_gpC, 0);
2298 e->vpextrb(gzA, xmmB, 0);
2299 e->vpextrb(anyptr_gpA, xmmB, 0);
2300 e->vpextrd(gzA, xmmB, 0);
2301 e->vpextrd(anyptr_gpA, xmmB, 0);
2302 if (isX64) e->vpextrq(gzA, xmmB, 0);
2303 if (isX64) e->vpextrq(anyptr_gpA, xmmB, 0);
2304 e->vpextrw(gzA, xmmB, 0);
2305 e->vpextrw(anyptr_gpA, xmmB, 0);
2306 e->vphaddd(xmmA, xmmB, xmmC);
2307 e->vphaddd(xmmA, xmmB, anyptr_gpC);
2308 e->vphaddsw(xmmA, xmmB, xmmC);
2309 e->vphaddsw(xmmA, xmmB, anyptr_gpC);
2310 e->vphaddw(xmmA, xmmB, xmmC);
2311 e->vphaddw(xmmA, xmmB, anyptr_gpC);
2312 e->vphminposuw(xmmA, xmmB);
2313 e->vphminposuw(xmmA, anyptr_gpB);
2314 e->vphsubd(xmmA, xmmB, xmmC);
2315 e->vphsubd(xmmA, xmmB, anyptr_gpC);
2316 e->vphsubsw(xmmA, xmmB, xmmC);
2317 e->vphsubsw(xmmA, xmmB, anyptr_gpC);
2318 e->vphsubw(xmmA, xmmB, xmmC);
2319 e->vphsubw(xmmA, xmmB, anyptr_gpC);
2320 e->vpinsrb(xmmA, xmmB, gzC, 0);
2321 e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
2322 e->vpinsrd(xmmA, xmmB, gzC, 0);
2323 e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
2324 e->vpinsrw(xmmA, xmmB, gzC, 0);
2325 e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
2326 e->vpmaddubsw(xmmA, xmmB, xmmC);
2327 e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
2328 e->vpmaddwd(xmmA, xmmB, xmmC);
2329 e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
2330 e->vpmaxsb(xmmA, xmmB, xmmC);
2331 e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
2332 e->vpmaxsd(xmmA, xmmB, xmmC);
2333 e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
2334 e->vpmaxsw(xmmA, xmmB, xmmC);
2335 e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
2336 e->vpmaxub(xmmA, xmmB, xmmC);
2337 e->vpmaxub(xmmA, xmmB, anyptr_gpC);
2338 e->vpmaxud(xmmA, xmmB, xmmC);
2339 e->vpmaxud(xmmA, xmmB, anyptr_gpC);
2340 e->vpmaxuw(xmmA, xmmB, xmmC);
2341 e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
2342 e->vpminsb(xmmA, xmmB, xmmC);
2343 e->vpminsb(xmmA, xmmB, anyptr_gpC);
2344 e->vpminsd(xmmA, xmmB, xmmC);
2345 e->vpminsd(xmmA, xmmB, anyptr_gpC);
2346 e->vpminsw(xmmA, xmmB, xmmC);
2347 e->vpminsw(xmmA, xmmB, anyptr_gpC);
2348 e->vpminub(xmmA, xmmB, xmmC);
2349 e->vpminub(xmmA, xmmB, anyptr_gpC);
2350 e->vpminud(xmmA, xmmB, xmmC);
2351 e->vpminud(xmmA, xmmB, anyptr_gpC);
2352 e->vpminuw(xmmA, xmmB, xmmC);
2353 e->vpminuw(xmmA, xmmB, anyptr_gpC);
2354 e->vpmovmskb(gzA, xmmB);
2355 e->vpmovsxbd(xmmA, xmmB);
2356 e->vpmovsxbd(xmmA, anyptr_gpB);
2357 e->vpmovsxbq(xmmA, xmmB);
2358 e->vpmovsxbq(xmmA, anyptr_gpB);
2359 e->vpmovsxbw(xmmA, xmmB);
2360 e->vpmovsxbw(xmmA, anyptr_gpB);
2361 e->vpmovsxdq(xmmA, xmmB);
2362 e->vpmovsxdq(xmmA, anyptr_gpB);
2363 e->vpmovsxwd(xmmA, xmmB);
2364 e->vpmovsxwd(xmmA, anyptr_gpB);
2365 e->vpmovsxwq(xmmA, xmmB);
2366 e->vpmovsxwq(xmmA, anyptr_gpB);
2367 e->vpmovzxbd(xmmA, xmmB);
2368 e->vpmovzxbd(xmmA, anyptr_gpB);
2369 e->vpmovzxbq(xmmA, xmmB);
2370 e->vpmovzxbq(xmmA, anyptr_gpB);
2371 e->vpmovzxbw(xmmA, xmmB);
2372 e->vpmovzxbw(xmmA, anyptr_gpB);
2373 e->vpmovzxdq(xmmA, xmmB);
2374 e->vpmovzxdq(xmmA, anyptr_gpB);
2375 e->vpmovzxwd(xmmA, xmmB);
2376 e->vpmovzxwd(xmmA, anyptr_gpB);
2377 e->vpmovzxwq(xmmA, xmmB);
2378 e->vpmovzxwq(xmmA, anyptr_gpB);
2379 e->vpmuldq(xmmA, xmmB, xmmC);
2380 e->vpmuldq(xmmA, xmmB, anyptr_gpC);
2381 e->vpmulhrsw(xmmA, xmmB, xmmC);
2382 e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
2383 e->vpmulhuw(xmmA, xmmB, xmmC);
2384 e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
2385 e->vpmulhw(xmmA, xmmB, xmmC);
2386 e->vpmulhw(xmmA, xmmB, anyptr_gpC);
2387 e->vpmulld(xmmA, xmmB, xmmC);
2388 e->vpmulld(xmmA, xmmB, anyptr_gpC);
2389 e->vpmullw(xmmA, xmmB, xmmC);
2390 e->vpmullw(xmmA, xmmB, anyptr_gpC);
2391 e->vpmuludq(xmmA, xmmB, xmmC);
2392 e->vpmuludq(xmmA, xmmB, anyptr_gpC);
2393 e->vpor(xmmA, xmmB, xmmC);
2394 e->vpor(xmmA, xmmB, anyptr_gpC);
2395 e->vpsadbw(xmmA, xmmB, xmmC);
2396 e->vpsadbw(xmmA, xmmB, anyptr_gpC);
2397 e->vpshufb(xmmA, xmmB, xmmC);
2398 e->vpshufb(xmmA, xmmB, anyptr_gpC);
2399 e->vpshufd(xmmA, xmmB, 0);
2400 e->vpshufd(xmmA, anyptr_gpB, 0);
2401 e->vpshufhw(xmmA, xmmB, 0);
2402 e->vpshufhw(xmmA, anyptr_gpB, 0);
2403 e->vpshuflw(xmmA, xmmB, 0);
2404 e->vpshuflw(xmmA, anyptr_gpB, 0);
2405 e->vpsignb(xmmA, xmmB, xmmC);
2406 e->vpsignb(xmmA, xmmB, anyptr_gpC);
2407 e->vpsignd(xmmA, xmmB, xmmC);
2408 e->vpsignd(xmmA, xmmB, anyptr_gpC);
2409 e->vpsignw(xmmA, xmmB, xmmC);
2410 e->vpsignw(xmmA, xmmB, anyptr_gpC);
2411 e->vpslld(xmmA, xmmB, xmmC);
2412 e->vpslld(xmmA, xmmB, anyptr_gpC);
2413 e->vpslld(xmmA, xmmB, 0);
2414 e->vpslldq(xmmA, xmmB, 0);
2415 e->vpsllq(xmmA, xmmB, xmmC);
2416 e->vpsllq(xmmA, xmmB, anyptr_gpC);
2417 e->vpsllq(xmmA, xmmB, 0);
2418 e->vpsllw(xmmA, xmmB, xmmC);
2419 e->vpsllw(xmmA, xmmB, anyptr_gpC);
2420 e->vpsllw(xmmA, xmmB, 0);
2421 e->vpsrad(xmmA, xmmB, xmmC);
2422 e->vpsrad(xmmA, xmmB, anyptr_gpC);
2423 e->vpsrad(xmmA, xmmB, 0);
2424 e->vpsraw(xmmA, xmmB, xmmC);
2425 e->vpsraw(xmmA, xmmB, anyptr_gpC);
2426 e->vpsraw(xmmA, xmmB, 0);
2427 e->vpsrld(xmmA, xmmB, xmmC);
2428 e->vpsrld(xmmA, xmmB, anyptr_gpC);
2429 e->vpsrld(xmmA, xmmB, 0);
2430 e->vpsrldq(xmmA, xmmB, 0);
2431 e->vpsrlq(xmmA, xmmB, xmmC);
2432 e->vpsrlq(xmmA, xmmB, anyptr_gpC);
2433 e->vpsrlq(xmmA, xmmB, 0);
2434 e->vpsrlw(xmmA, xmmB, xmmC);
2435 e->vpsrlw(xmmA, xmmB, anyptr_gpC);
2436 e->vpsrlw(xmmA, xmmB, 0);
2437 e->vpsubb(xmmA, xmmB, xmmC);
2438 e->vpsubb(xmmA, xmmB, anyptr_gpC);
2439 e->vpsubd(xmmA, xmmB, xmmC);
2440 e->vpsubd(xmmA, xmmB, anyptr_gpC);
2441 e->vpsubq(xmmA, xmmB, xmmC);
2442 e->vpsubq(xmmA, xmmB, anyptr_gpC);
2443 e->vpsubw(xmmA, xmmB, xmmC);
2444 e->vpsubw(xmmA, xmmB, anyptr_gpC);
2445 e->vpsubsb(xmmA, xmmB, xmmC);
2446 e->vpsubsb(xmmA, xmmB, anyptr_gpC);
2447 e->vpsubsw(xmmA, xmmB, xmmC);
2448 e->vpsubsw(xmmA, xmmB, anyptr_gpC);
2449 e->vpsubusb(xmmA, xmmB, xmmC);
2450 e->vpsubusb(xmmA, xmmB, anyptr_gpC);
2451 e->vpsubusw(xmmA, xmmB, xmmC);
2452 e->vpsubusw(xmmA, xmmB, anyptr_gpC);
2453 e->vptest(xmmA, xmmB);
2454 e->vptest(xmmA, anyptr_gpB);
2455 e->vptest(ymmA, ymmB);
2456 e->vptest(ymmA, anyptr_gpB);
2457 e->vpunpckhbw(xmmA, xmmB, xmmC);
2458 e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
2459 e->vpunpckhdq(xmmA, xmmB, xmmC);
2460 e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
2461 e->vpunpckhqdq(xmmA, xmmB, xmmC);
2462 e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
2463 e->vpunpckhwd(xmmA, xmmB, xmmC);
2464 e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
2465 e->vpunpcklbw(xmmA, xmmB, xmmC);
2466 e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
2467 e->vpunpckldq(xmmA, xmmB, xmmC);
2468 e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
2469 e->vpunpcklqdq(xmmA, xmmB, xmmC);
2470 e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
2471 e->vpunpcklwd(xmmA, xmmB, xmmC);
2472 e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
2473 e->vpxor(xmmA, xmmB, xmmC);
2474 e->vpxor(xmmA, xmmB, anyptr_gpC);
2475 e->vrcpps(xmmA, xmmB);
2476 e->vrcpps(xmmA, anyptr_gpB);
2477 e->vrcpps(ymmA, ymmB);
2478 e->vrcpps(ymmA, anyptr_gpB);
2479 e->vrcpss(xmmA, xmmB, xmmC);
2480 e->vrcpss(xmmA, xmmB, anyptr_gpC);
2481 e->vrsqrtps(xmmA, xmmB);
2482 e->vrsqrtps(xmmA, anyptr_gpB);
2483 e->vrsqrtps(ymmA, ymmB);
2484 e->vrsqrtps(ymmA, anyptr_gpB);
2485 e->vrsqrtss(xmmA, xmmB, xmmC);
2486 e->vrsqrtss(xmmA, xmmB, anyptr_gpC);
2487 e->vroundpd(xmmA, xmmB, 0);
2488 e->vroundpd(xmmA, anyptr_gpB, 0);
2489 e->vroundpd(ymmA, ymmB, 0);
2490 e->vroundpd(ymmA, anyptr_gpB, 0);
2491 e->vroundps(xmmA, xmmB, 0);
2492 e->vroundps(xmmA, anyptr_gpB, 0);
2493 e->vroundps(ymmA, ymmB, 0);
2494 e->vroundps(ymmA, anyptr_gpB, 0);
2495 e->vroundsd(xmmA, xmmB, xmmC, 0);
2496 e->vroundsd(xmmA, xmmB, anyptr_gpC, 0);
2497 e->vroundss(xmmA, xmmB, xmmC, 0);
2498 e->vroundss(xmmA, xmmB, anyptr_gpC, 0);
2499 e->vshufpd(xmmA, xmmB, xmmC, 0);
2500 e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
2501 e->vshufpd(ymmA, ymmB, ymmC, 0);
2502 e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
2503 e->vshufps(xmmA, xmmB, xmmC, 0);
2504 e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
2505 e->vshufps(ymmA, ymmB, ymmC, 0);
2506 e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
2507 e->vsqrtpd(xmmA, xmmB);
2508 e->vsqrtpd(xmmA, anyptr_gpB);
2509 e->vsqrtpd(ymmA, ymmB);
2510 e->vsqrtpd(ymmA, anyptr_gpB);
2511 e->vsqrtps(xmmA, xmmB);
2512 e->vsqrtps(xmmA, anyptr_gpB);
2513 e->vsqrtps(ymmA, ymmB);
2514 e->vsqrtps(ymmA, anyptr_gpB);
2515 e->vsqrtsd(xmmA, xmmB, xmmC);
2516 e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
2517 e->vsqrtss(xmmA, xmmB, xmmC);
2518 e->vsqrtss(xmmA, xmmB, anyptr_gpC);
2519 e->vstmxcsr(anyptr_gpA);
2520 e->vsubpd(xmmA, xmmB, xmmC);
2521 e->vsubpd(xmmA, xmmB, anyptr_gpC);
2522 e->vsubpd(ymmA, ymmB, ymmC);
2523 e->vsubpd(ymmA, ymmB, anyptr_gpC);
2524 e->vsubps(xmmA, xmmB, xmmC);
2525 e->vsubps(xmmA, xmmB, anyptr_gpC);
2526 e->vsubps(ymmA, ymmB, ymmC);
2527 e->vsubps(ymmA, ymmB, anyptr_gpC);
2528 e->vsubsd(xmmA, xmmB, xmmC);
2529 e->vsubsd(xmmA, xmmB, anyptr_gpC);
2530 e->vsubss(xmmA, xmmB, xmmC);
2531 e->vsubss(xmmA, xmmB, anyptr_gpC);
2532 e->vtestps(xmmA, xmmB);
2533 e->vtestps(xmmA, anyptr_gpB);
2534 e->vtestps(ymmA, ymmB);
2535 e->vtestps(ymmA, anyptr_gpB);
2536 e->vtestpd(xmmA, xmmB);
2537 e->vtestpd(xmmA, anyptr_gpB);
2538 e->vtestpd(ymmA, ymmB);
2539 e->vtestpd(ymmA, anyptr_gpB);
2540 e->vucomisd(xmmA, xmmB);
2541 e->vucomisd(xmmA, anyptr_gpB);
2542 e->vucomiss(xmmA, xmmB);
2543 e->vucomiss(xmmA, anyptr_gpB);
2544 e->vunpckhpd(xmmA, xmmB, xmmC);
2545 e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
2546 e->vunpckhpd(ymmA, ymmB, ymmC);
2547 e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
2548 e->vunpckhps(xmmA, xmmB, xmmC);
2549 e->vunpckhps(xmmA, xmmB, anyptr_gpC);
2550 e->vunpckhps(ymmA, ymmB, ymmC);
2551 e->vunpckhps(ymmA, ymmB, anyptr_gpC);
2552 e->vunpcklpd(xmmA, xmmB, xmmC);
2553 e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
2554 e->vunpcklpd(ymmA, ymmB, ymmC);
2555 e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
2556 e->vunpcklps(xmmA, xmmB, xmmC);
2557 e->vunpcklps(xmmA, xmmB, anyptr_gpC);
2558 e->vunpcklps(ymmA, ymmB, ymmC);
2559 e->vunpcklps(ymmA, ymmB, anyptr_gpC);
2560 e->vxorpd(xmmA, xmmB, xmmC);
2561 e->vxorpd(xmmA, xmmB, anyptr_gpC);
2562 e->vxorpd(ymmA, ymmB, ymmC);
2563 e->vxorpd(ymmA, ymmB, anyptr_gpC);
2564 e->vxorps(xmmA, xmmB, xmmC);
2565 e->vxorps(xmmA, xmmB, anyptr_gpC);
2566 e->vxorps(ymmA, ymmB, ymmC);
2567 e->vxorps(ymmA, ymmB, anyptr_gpC);
2568 e->vzeroall();
2569 e->vex3().vzeroall();
2570 e->vzeroupper();
2571 e->vex3().vzeroupper();
2572
2573 // AVX+AESNI.
2574 e->nop();
2575
2576 e->vaesdec(xmmA, xmmB, xmmC);
2577 e->vaesdec(xmmA, xmmB, anyptr_gpC);
2578 e->vaesdeclast(xmmA, xmmB, xmmC);
2579 e->vaesdeclast(xmmA, xmmB, anyptr_gpC);
2580 e->vaesenc(xmmA, xmmB, xmmC);
2581 e->vaesenc(xmmA, xmmB, anyptr_gpC);
2582 e->vaesenclast(xmmA, xmmB, xmmC);
2583 e->vaesenclast(xmmA, xmmB, anyptr_gpC);
2584 e->vaesimc(xmmA, xmmB);
2585 e->vaesimc(xmmA, anyptr_gpB);
2586 e->vaeskeygenassist(xmmA, xmmB, 0);
2587 e->vaeskeygenassist(xmmA, anyptr_gpB, 0);
2588
2589 // AVX+PCLMULQDQ.
2590 e->nop();
2591
2592 e->vpclmulqdq(xmmA, xmmB, xmmC, 0);
2593 e->vpclmulqdq(xmmA, xmmB, anyptr_gpC, 0);
2594
2595 // AVX2.
2596 e->nop();
2597
2598 e->vbroadcasti128(ymmA, anyptr_gpB);
2599 e->vbroadcastsd(ymmA, xmmB);
2600 e->vbroadcastss(xmmA, xmmB);
2601 e->vbroadcastss(ymmA, xmmB);
2602 e->vextracti128(xmmA, ymmB, 0);
2603 e->vextracti128(anyptr_gpA, ymmB, 0);
2604 e->vgatherdpd(xmmA, vx_ptr, xmmC);
2605 e->vgatherdpd(ymmA, vx_ptr, ymmC);
2606 e->vgatherdps(xmmA, vx_ptr, xmmC);
2607 e->vgatherdps(ymmA, vy_ptr, ymmC);
2608 e->vgatherqpd(xmmA, vx_ptr, xmmC);
2609 e->vgatherqpd(ymmA, vy_ptr, ymmC);
2610 e->vgatherqps(xmmA, vx_ptr, xmmC);
2611 e->vgatherqps(xmmA, vy_ptr, xmmC);
2612 e->vinserti128(ymmA, ymmB, xmmC, 0);
2613 e->vinserti128(ymmA, ymmB, anyptr_gpC, 0);
2614 e->vmovntdqa(ymmA, anyptr_gpB);
2615 e->vmpsadbw(ymmA, ymmB, ymmC, 0);
2616 e->vmpsadbw(ymmA, ymmB, anyptr_gpC, 0);
2617 e->vpabsb(ymmA, ymmB);
2618 e->vpabsb(ymmA, anyptr_gpB);
2619 e->vpabsd(ymmA, ymmB);
2620 e->vpabsd(ymmA, anyptr_gpB);
2621 e->vpabsw(ymmA, ymmB);
2622 e->vpabsw(ymmA, anyptr_gpB);
2623 e->vpackssdw(ymmA, ymmB, ymmC);
2624 e->vpackssdw(ymmA, ymmB, anyptr_gpC);
2625 e->vpacksswb(ymmA, ymmB, ymmC);
2626 e->vpacksswb(ymmA, ymmB, anyptr_gpC);
2627 e->vpackusdw(ymmA, ymmB, ymmC);
2628 e->vpackusdw(ymmA, ymmB, anyptr_gpC);
2629 e->vpackuswb(ymmA, ymmB, ymmC);
2630 e->vpackuswb(ymmA, ymmB, anyptr_gpC);
2631 e->vpaddb(ymmA, ymmB, ymmC);
2632 e->vpaddb(ymmA, ymmB, anyptr_gpC);
2633 e->vpaddd(ymmA, ymmB, ymmC);
2634 e->vpaddd(ymmA, ymmB, anyptr_gpC);
2635 e->vpaddq(ymmA, ymmB, ymmC);
2636 e->vpaddq(ymmA, ymmB, anyptr_gpC);
2637 e->vpaddw(ymmA, ymmB, ymmC);
2638 e->vpaddw(ymmA, ymmB, anyptr_gpC);
2639 e->vpaddsb(ymmA, ymmB, ymmC);
2640 e->vpaddsb(ymmA, ymmB, anyptr_gpC);
2641 e->vpaddsw(ymmA, ymmB, ymmC);
2642 e->vpaddsw(ymmA, ymmB, anyptr_gpC);
2643 e->vpaddusb(ymmA, ymmB, ymmC);
2644 e->vpaddusb(ymmA, ymmB, anyptr_gpC);
2645 e->vpaddusw(ymmA, ymmB, ymmC);
2646 e->vpaddusw(ymmA, ymmB, anyptr_gpC);
2647 e->vpalignr(ymmA, ymmB, ymmC, 0);
2648 e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
2649 e->vpand(ymmA, ymmB, ymmC);
2650 e->vpand(ymmA, ymmB, anyptr_gpC);
2651 e->vpandn(ymmA, ymmB, ymmC);
2652 e->vpandn(ymmA, ymmB, anyptr_gpC);
2653 e->vpavgb(ymmA, ymmB, ymmC);
2654 e->vpavgb(ymmA, ymmB, anyptr_gpC);
2655 e->vpavgw(ymmA, ymmB, ymmC);
2656 e->vpavgw(ymmA, ymmB, anyptr_gpC);
2657 e->vpblendd(xmmA, xmmB, xmmC, 0);
2658 e->vpblendd(xmmA, xmmB, anyptr_gpC, 0);
2659 e->vpblendd(ymmA, ymmB, ymmC, 0);
2660 e->vpblendd(ymmA, ymmB, anyptr_gpC, 0);
2661 e->vpblendvb(ymmA, ymmB, ymmC, ymmD);
2662 e->vpblendvb(ymmA, ymmB, anyptr_gpC, ymmD);
2663 e->vpblendw(ymmA, ymmB, ymmC, 0);
2664 e->vpblendw(ymmA, ymmB, anyptr_gpC, 0);
2665 e->vpbroadcastb(xmmA, xmmB);
2666 e->vpbroadcastb(xmmA, anyptr_gpB);
2667 e->vpbroadcastb(ymmA, xmmB);
2668 e->vpbroadcastb(ymmA, anyptr_gpB);
2669 e->vpbroadcastd(xmmA, xmmB);
2670 e->vpbroadcastd(xmmA, anyptr_gpB);
2671 e->vpbroadcastd(ymmA, xmmB);
2672 e->vpbroadcastd(ymmA, anyptr_gpB);
2673 e->vpbroadcastq(xmmA, xmmB);
2674 e->vpbroadcastq(xmmA, anyptr_gpB);
2675 e->vpbroadcastq(ymmA, xmmB);
2676 e->vpbroadcastq(ymmA, anyptr_gpB);
2677 e->vpbroadcastw(xmmA, xmmB);
2678 e->vpbroadcastw(xmmA, anyptr_gpB);
2679 e->vpbroadcastw(ymmA, xmmB);
2680 e->vpbroadcastw(ymmA, anyptr_gpB);
2681 e->vpcmpeqb(ymmA, ymmB, ymmC);
2682 e->vpcmpeqb(ymmA, ymmB, anyptr_gpC);
2683 e->vpcmpeqd(ymmA, ymmB, ymmC);
2684 e->vpcmpeqd(ymmA, ymmB, anyptr_gpC);
2685 e->vpcmpeqq(ymmA, ymmB, ymmC);
2686 e->vpcmpeqq(ymmA, ymmB, anyptr_gpC);
2687 e->vpcmpeqw(ymmA, ymmB, ymmC);
2688 e->vpcmpeqw(ymmA, ymmB, anyptr_gpC);
2689 e->vpcmpgtb(ymmA, ymmB, ymmC);
2690 e->vpcmpgtb(ymmA, ymmB, anyptr_gpC);
2691 e->vpcmpgtd(ymmA, ymmB, ymmC);
2692 e->vpcmpgtd(ymmA, ymmB, anyptr_gpC);
2693 e->vpcmpgtq(ymmA, ymmB, ymmC);
2694 e->vpcmpgtq(ymmA, ymmB, anyptr_gpC);
2695 e->vpcmpgtw(ymmA, ymmB, ymmC);
2696 e->vpcmpgtw(ymmA, ymmB, anyptr_gpC);
2697 e->vperm2i128(ymmA, ymmB, ymmC, 0);
2698 e->vperm2i128(ymmA, ymmB, anyptr_gpC, 0);
2699 e->vpermd(ymmA, ymmB, ymmC);
2700 e->vpermd(ymmA, ymmB, anyptr_gpC);
2701 e->vpermps(ymmA, ymmB, ymmC);
2702 e->vpermps(ymmA, ymmB, anyptr_gpC);
2703 e->vpermpd(ymmA, ymmB, 0);
2704 e->vpermpd(ymmA, anyptr_gpB, 0);
2705 e->vpermq(ymmA, ymmB, 0);
2706 e->vpermq(ymmA, anyptr_gpB, 0);
2707 e->vpgatherdd(xmmA, vx_ptr, xmmC);
2708 e->vpgatherdd(ymmA, vy_ptr, ymmC);
2709 e->vpgatherdq(xmmA, vx_ptr, xmmC);
2710 e->vpgatherdq(ymmA, vx_ptr, ymmC);
2711 e->vpgatherqd(xmmA, vx_ptr, xmmC);
2712 e->vpgatherqd(xmmA, vy_ptr, xmmC);
2713 e->vpgatherqq(xmmA, vx_ptr, xmmC);
2714 e->vpgatherqq(ymmA, vy_ptr, ymmC);
2715 e->vpmovmskb(gzA, ymmB);
2716 e->vpmovsxbd(ymmA, anyptr_gpB);
2717 e->vpmovsxbd(ymmA, xmmB);
2718 e->vpmovsxbq(ymmA, anyptr_gpB);
2719 e->vpmovsxbq(ymmA, xmmB);
2720 e->vpmovsxbw(ymmA, anyptr_gpB);
2721 e->vpmovsxbw(ymmA, xmmB);
2722 e->vpmovsxdq(ymmA, anyptr_gpB);
2723 e->vpmovsxdq(ymmA, xmmB);
2724 e->vpmovsxwd(ymmA, anyptr_gpB);
2725 e->vpmovsxwd(ymmA, xmmB);
2726 e->vpmovsxwq(ymmA, anyptr_gpB);
2727 e->vpmovsxwq(ymmA, xmmB);
2728 e->vpmovzxbd(ymmA, anyptr_gpB);
2729 e->vpmovzxbd(ymmA, xmmB);
2730 e->vpmovzxbq(ymmA, anyptr_gpB);
2731 e->vpmovzxbq(ymmA, xmmB);
2732 e->vpmovzxbw(ymmA, anyptr_gpB);
2733 e->vpmovzxbw(ymmA, xmmB);
2734 e->vpmovzxdq(ymmA, anyptr_gpB);
2735 e->vpmovzxdq(ymmA, xmmB);
2736 e->vpmovzxwd(ymmA, anyptr_gpB);
2737 e->vpmovzxwd(ymmA, xmmB);
2738 e->vpmovzxwq(ymmA, anyptr_gpB);
2739 e->vpmovzxwq(ymmA, xmmB);
2740 e->vpshufd(ymmA, anyptr_gpB, 0);
2741 e->vpshufd(ymmA, ymmB, 0);
2742 e->vpshufhw(ymmA, anyptr_gpB, 0);
2743 e->vpshufhw(ymmA, ymmB, 0);
2744 e->vpshuflw(ymmA, anyptr_gpB, 0);
2745 e->vpshuflw(ymmA, ymmB, 0);
2746 e->vpslld(ymmA, ymmB, 0);
2747 e->vpslldq(ymmA, ymmB, 0);
2748 e->vpsllq(ymmA, ymmB, 0);
2749 e->vpsllw(ymmA, ymmB, 0);
2750 e->vpsrad(ymmA, ymmB, 0);
2751 e->vpsraw(ymmA, ymmB, 0);
2752 e->vpsrld(ymmA, ymmB, 0);
2753 e->vpsrldq(ymmA, ymmB, 0);
2754 e->vpsrlq(ymmA, ymmB, 0);
2755 e->vpsrlw(ymmA, ymmB, 0);
2756 e->vphaddd(ymmA, ymmB, anyptr_gpC);
2757 e->vphaddd(ymmA, ymmB, ymmC);
2758 e->vphaddsw(ymmA, ymmB, anyptr_gpC);
2759 e->vphaddsw(ymmA, ymmB, ymmC);
2760 e->vphaddw(ymmA, ymmB, anyptr_gpC);
2761 e->vphaddw(ymmA, ymmB, ymmC);
2762 e->vphsubd(ymmA, ymmB, anyptr_gpC);
2763 e->vphsubd(ymmA, ymmB, ymmC);
2764 e->vphsubsw(ymmA, ymmB, anyptr_gpC);
2765 e->vphsubsw(ymmA, ymmB, ymmC);
2766 e->vphsubw(ymmA, ymmB, anyptr_gpC);
2767 e->vphsubw(ymmA, ymmB, ymmC);
2768 e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
2769 e->vpmaddubsw(ymmA, ymmB, ymmC);
2770 e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
2771 e->vpmaddwd(ymmA, ymmB, ymmC);
2772 e->vpmaskmovd(anyptr_gpA, xmmB, xmmC);
2773 e->vpmaskmovd(anyptr_gpA, ymmB, ymmC);
2774 e->vpmaskmovd(xmmA, xmmB, anyptr_gpC);
2775 e->vpmaskmovd(ymmA, ymmB, anyptr_gpC);
2776 e->vpmaskmovq(anyptr_gpA, xmmB, xmmC);
2777 e->vpmaskmovq(anyptr_gpA, ymmB, ymmC);
2778 e->vpmaskmovq(xmmA, xmmB, anyptr_gpC);
2779 e->vpmaskmovq(ymmA, ymmB, anyptr_gpC);
2780 e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
2781 e->vpmaxsb(ymmA, ymmB, ymmC);
2782 e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
2783 e->vpmaxsd(ymmA, ymmB, ymmC);
2784 e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
2785 e->vpmaxsw(ymmA, ymmB, ymmC);
2786 e->vpmaxub(ymmA, ymmB, anyptr_gpC);
2787 e->vpmaxub(ymmA, ymmB, ymmC);
2788 e->vpmaxud(ymmA, ymmB, anyptr_gpC);
2789 e->vpmaxud(ymmA, ymmB, ymmC);
2790 e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
2791 e->vpmaxuw(ymmA, ymmB, ymmC);
2792 e->vpminsb(ymmA, ymmB, anyptr_gpC);
2793 e->vpminsb(ymmA, ymmB, ymmC);
2794 e->vpminsd(ymmA, ymmB, anyptr_gpC);
2795 e->vpminsd(ymmA, ymmB, ymmC);
2796 e->vpminsw(ymmA, ymmB, anyptr_gpC);
2797 e->vpminsw(ymmA, ymmB, ymmC);
2798 e->vpminub(ymmA, ymmB, anyptr_gpC);
2799 e->vpminub(ymmA, ymmB, ymmC);
2800 e->vpminud(ymmA, ymmB, anyptr_gpC);
2801 e->vpminud(ymmA, ymmB, ymmC);
2802 e->vpminuw(ymmA, ymmB, anyptr_gpC);
2803 e->vpminuw(ymmA, ymmB, ymmC);
2804 e->vpmuldq(ymmA, ymmB, anyptr_gpC);
2805 e->vpmuldq(ymmA, ymmB, ymmC);
2806 e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
2807 e->vpmulhrsw(ymmA, ymmB, ymmC);
2808 e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
2809 e->vpmulhuw(ymmA, ymmB, ymmC);
2810 e->vpmulhw(ymmA, ymmB, anyptr_gpC);
2811 e->vpmulhw(ymmA, ymmB, ymmC);
2812 e->vpmulld(ymmA, ymmB, anyptr_gpC);
2813 e->vpmulld(ymmA, ymmB, ymmC);
2814 e->vpmullw(ymmA, ymmB, anyptr_gpC);
2815 e->vpmullw(ymmA, ymmB, ymmC);
2816 e->vpmuludq(ymmA, ymmB, anyptr_gpC);
2817 e->vpmuludq(ymmA, ymmB, ymmC);
2818 e->vpor(ymmA, ymmB, anyptr_gpC);
2819 e->vpor(ymmA, ymmB, ymmC);
2820 e->vpsadbw(ymmA, ymmB, anyptr_gpC);
2821 e->vpsadbw(ymmA, ymmB, ymmC);
2822 e->vpshufb(ymmA, ymmB, anyptr_gpC);
2823 e->vpshufb(ymmA, ymmB, ymmC);
2824 e->vpsignb(ymmA, ymmB, anyptr_gpC);
2825 e->vpsignb(ymmA, ymmB, ymmC);
2826 e->vpsignd(ymmA, ymmB, anyptr_gpC);
2827 e->vpsignd(ymmA, ymmB, ymmC);
2828 e->vpsignw(ymmA, ymmB, anyptr_gpC);
2829 e->vpsignw(ymmA, ymmB, ymmC);
2830 e->vpslld(ymmA, ymmB, anyptr_gpC);
2831 e->vpslld(ymmA, ymmB, xmmC);
2832 e->vpsllq(ymmA, ymmB, anyptr_gpC);
2833 e->vpsllq(ymmA, ymmB, xmmC);
2834 e->vpsllvd(xmmA, xmmB, anyptr_gpC);
2835 e->vpsllvd(xmmA, xmmB, xmmC);
2836 e->vpsllvd(ymmA, ymmB, anyptr_gpC);
2837 e->vpsllvd(ymmA, ymmB, ymmC);
2838 e->vpsllvq(xmmA, xmmB, anyptr_gpC);
2839 e->vpsllvq(xmmA, xmmB, xmmC);
2840 e->vpsllvq(ymmA, ymmB, anyptr_gpC);
2841 e->vpsllvq(ymmA, ymmB, ymmC);
2842 e->vpsllw(ymmA, ymmB, anyptr_gpC);
2843 e->vpsllw(ymmA, ymmB, xmmC);
2844 e->vpsrad(ymmA, ymmB, anyptr_gpC);
2845 e->vpsrad(ymmA, ymmB, xmmC);
2846 e->vpsravd(xmmA, xmmB, anyptr_gpC);
2847 e->vpsravd(xmmA, xmmB, xmmC);
2848 e->vpsravd(ymmA, ymmB, anyptr_gpC);
2849 e->vpsravd(ymmA, ymmB, ymmC);
2850 e->vpsraw(ymmA, ymmB, anyptr_gpC);
2851 e->vpsraw(ymmA, ymmB, xmmC);
2852 e->vpsrld(ymmA, ymmB, anyptr_gpC);
2853 e->vpsrld(ymmA, ymmB, xmmC);
2854 e->vpsrlq(ymmA, ymmB, anyptr_gpC);
2855 e->vpsrlq(ymmA, ymmB, xmmC);
2856 e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
2857 e->vpsrlvd(xmmA, xmmB, xmmC);
2858 e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
2859 e->vpsrlvd(ymmA, ymmB, ymmC);
2860 e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
2861 e->vpsrlvq(xmmA, xmmB, xmmC);
2862 e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
2863 e->vpsrlvq(ymmA, ymmB, ymmC);
2864 e->vpsrlw(ymmA, ymmB, anyptr_gpC);
2865 e->vpsrlw(ymmA, ymmB, xmmC);
2866 e->vpsubb(ymmA, ymmB, anyptr_gpC);
2867 e->vpsubb(ymmA, ymmB, ymmC);
2868 e->vpsubd(ymmA, ymmB, anyptr_gpC);
2869 e->vpsubd(ymmA, ymmB, ymmC);
2870 e->vpsubq(ymmA, ymmB, anyptr_gpC);
2871 e->vpsubq(ymmA, ymmB, ymmC);
2872 e->vpsubsb(ymmA, ymmB, anyptr_gpC);
2873 e->vpsubsb(ymmA, ymmB, ymmC);
2874 e->vpsubsw(ymmA, ymmB, anyptr_gpC);
2875 e->vpsubsw(ymmA, ymmB, ymmC);
2876 e->vpsubusb(ymmA, ymmB, anyptr_gpC);
2877 e->vpsubusb(ymmA, ymmB, ymmC);
2878 e->vpsubusw(ymmA, ymmB, anyptr_gpC);
2879 e->vpsubusw(ymmA, ymmB, ymmC);
2880 e->vpsubw(ymmA, ymmB, anyptr_gpC);
2881 e->vpsubw(ymmA, ymmB, ymmC);
2882 e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
2883 e->vpunpckhbw(ymmA, ymmB, ymmC);
2884 e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
2885 e->vpunpckhdq(ymmA, ymmB, ymmC);
2886 e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
2887 e->vpunpckhqdq(ymmA, ymmB, ymmC);
2888 e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
2889 e->vpunpckhwd(ymmA, ymmB, ymmC);
2890 e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
2891 e->vpunpcklbw(ymmA, ymmB, ymmC);
2892 e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
2893 e->vpunpckldq(ymmA, ymmB, ymmC);
2894 e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
2895 e->vpunpcklqdq(ymmA, ymmB, ymmC);
2896 e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
2897 e->vpunpcklwd(ymmA, ymmB, ymmC);
2898 e->vpxor(ymmA, ymmB, anyptr_gpC);
2899 e->vpxor(ymmA, ymmB, ymmC);
2900
2901 // FMA.
2902 e->nop();
2903
2904 e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
2905 e->vfmadd132pd(xmmA, xmmB, xmmC);
2906 e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
2907 e->vfmadd132pd(ymmA, ymmB, ymmC);
2908 e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
2909 e->vfmadd132ps(xmmA, xmmB, xmmC);
2910 e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
2911 e->vfmadd132ps(ymmA, ymmB, ymmC);
2912 e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
2913 e->vfmadd132sd(xmmA, xmmB, xmmC);
2914 e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
2915 e->vfmadd132ss(xmmA, xmmB, xmmC);
2916 e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
2917 e->vfmadd213pd(xmmA, xmmB, xmmC);
2918 e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
2919 e->vfmadd213pd(ymmA, ymmB, ymmC);
2920 e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
2921 e->vfmadd213ps(xmmA, xmmB, xmmC);
2922 e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
2923 e->vfmadd213ps(ymmA, ymmB, ymmC);
2924 e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
2925 e->vfmadd213sd(xmmA, xmmB, xmmC);
2926 e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
2927 e->vfmadd213ss(xmmA, xmmB, xmmC);
2928 e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
2929 e->vfmadd231pd(xmmA, xmmB, xmmC);
2930 e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
2931 e->vfmadd231pd(ymmA, ymmB, ymmC);
2932 e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
2933 e->vfmadd231ps(xmmA, xmmB, xmmC);
2934 e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
2935 e->vfmadd231ps(ymmA, ymmB, ymmC);
2936 e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
2937 e->vfmadd231sd(xmmA, xmmB, xmmC);
2938 e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
2939 e->vfmadd231ss(xmmA, xmmB, xmmC);
2940 e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
2941 e->vfmaddsub132pd(xmmA, xmmB, xmmC);
2942 e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
2943 e->vfmaddsub132pd(ymmA, ymmB, ymmC);
2944 e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
2945 e->vfmaddsub132ps(xmmA, xmmB, xmmC);
2946 e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
2947 e->vfmaddsub132ps(ymmA, ymmB, ymmC);
2948 e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
2949 e->vfmaddsub213pd(xmmA, xmmB, xmmC);
2950 e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
2951 e->vfmaddsub213pd(ymmA, ymmB, ymmC);
2952 e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
2953 e->vfmaddsub213ps(xmmA, xmmB, xmmC);
2954 e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
2955 e->vfmaddsub213ps(ymmA, ymmB, ymmC);
2956 e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
2957 e->vfmaddsub231pd(xmmA, xmmB, xmmC);
2958 e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
2959 e->vfmaddsub231pd(ymmA, ymmB, ymmC);
2960 e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
2961 e->vfmaddsub231ps(xmmA, xmmB, xmmC);
2962 e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
2963 e->vfmaddsub231ps(ymmA, ymmB, ymmC);
2964 e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
2965 e->vfmsub132pd(xmmA, xmmB, xmmC);
2966 e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
2967 e->vfmsub132pd(ymmA, ymmB, ymmC);
2968 e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
2969 e->vfmsub132ps(xmmA, xmmB, xmmC);
2970 e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
2971 e->vfmsub132ps(ymmA, ymmB, ymmC);
2972 e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
2973 e->vfmsub132sd(xmmA, xmmB, xmmC);
2974 e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
2975 e->vfmsub132ss(xmmA, xmmB, xmmC);
2976 e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
2977 e->vfmsub213pd(xmmA, xmmB, xmmC);
2978 e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
2979 e->vfmsub213pd(ymmA, ymmB, ymmC);
2980 e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
2981 e->vfmsub213ps(xmmA, xmmB, xmmC);
2982 e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
2983 e->vfmsub213ps(ymmA, ymmB, ymmC);
2984 e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
2985 e->vfmsub213sd(xmmA, xmmB, xmmC);
2986 e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
2987 e->vfmsub213ss(xmmA, xmmB, xmmC);
2988 e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
2989 e->vfmsub231pd(xmmA, xmmB, xmmC);
2990 e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
2991 e->vfmsub231pd(ymmA, ymmB, ymmC);
2992 e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
2993 e->vfmsub231ps(xmmA, xmmB, xmmC);
2994 e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
2995 e->vfmsub231ps(ymmA, ymmB, ymmC);
2996 e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
2997 e->vfmsub231sd(xmmA, xmmB, xmmC);
2998 e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
2999 e->vfmsub231ss(xmmA, xmmB, xmmC);
3000 e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
3001 e->vfmsubadd132pd(xmmA, xmmB, xmmC);
3002 e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
3003 e->vfmsubadd132pd(ymmA, ymmB, ymmC);
3004 e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
3005 e->vfmsubadd132ps(xmmA, xmmB, xmmC);
3006 e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
3007 e->vfmsubadd132ps(ymmA, ymmB, ymmC);
3008 e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
3009 e->vfmsubadd213pd(xmmA, xmmB, xmmC);
3010 e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
3011 e->vfmsubadd213pd(ymmA, ymmB, ymmC);
3012 e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
3013 e->vfmsubadd213ps(xmmA, xmmB, xmmC);
3014 e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
3015 e->vfmsubadd213ps(ymmA, ymmB, ymmC);
3016 e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
3017 e->vfmsubadd231pd(xmmA, xmmB, xmmC);
3018 e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
3019 e->vfmsubadd231pd(ymmA, ymmB, ymmC);
3020 e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
3021 e->vfmsubadd231ps(xmmA, xmmB, xmmC);
3022 e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
3023 e->vfmsubadd231ps(ymmA, ymmB, ymmC);
3024 e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
3025 e->vfnmadd132pd(xmmA, xmmB, xmmC);
3026 e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
3027 e->vfnmadd132pd(ymmA, ymmB, ymmC);
3028 e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
3029 e->vfnmadd132ps(xmmA, xmmB, xmmC);
3030 e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
3031 e->vfnmadd132ps(ymmA, ymmB, ymmC);
3032 e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
3033 e->vfnmadd132sd(xmmA, xmmB, xmmC);
3034 e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
3035 e->vfnmadd132ss(xmmA, xmmB, xmmC);
3036 e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
3037 e->vfnmadd213pd(xmmA, xmmB, xmmC);
3038 e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
3039 e->vfnmadd213pd(ymmA, ymmB, ymmC);
3040 e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
3041 e->vfnmadd213ps(xmmA, xmmB, xmmC);
3042 e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
3043 e->vfnmadd213ps(ymmA, ymmB, ymmC);
3044 e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
3045 e->vfnmadd213sd(xmmA, xmmB, xmmC);
3046 e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
3047 e->vfnmadd213ss(xmmA, xmmB, xmmC);
3048 e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
3049 e->vfnmadd231pd(xmmA, xmmB, xmmC);
3050 e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
3051 e->vfnmadd231pd(ymmA, ymmB, ymmC);
3052 e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
3053 e->vfnmadd231ps(xmmA, xmmB, xmmC);
3054 e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
3055 e->vfnmadd231ps(ymmA, ymmB, ymmC);
3056 e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
3057 e->vfnmadd231sd(xmmA, xmmB, xmmC);
3058 e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
3059 e->vfnmadd231ss(xmmA, xmmB, xmmC);
3060 e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
3061 e->vfnmsub132pd(xmmA, xmmB, xmmC);
3062 e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
3063 e->vfnmsub132pd(ymmA, ymmB, ymmC);
3064 e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
3065 e->vfnmsub132ps(xmmA, xmmB, xmmC);
3066 e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
3067 e->vfnmsub132ps(ymmA, ymmB, ymmC);
3068 e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
3069 e->vfnmsub132sd(xmmA, xmmB, xmmC);
3070 e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
3071 e->vfnmsub132ss(xmmA, xmmB, xmmC);
3072 e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
3073 e->vfnmsub213pd(xmmA, xmmB, xmmC);
3074 e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
3075 e->vfnmsub213pd(ymmA, ymmB, ymmC);
3076 e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
3077 e->vfnmsub213ps(xmmA, xmmB, xmmC);
3078 e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
3079 e->vfnmsub213ps(ymmA, ymmB, ymmC);
3080 e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
3081 e->vfnmsub213sd(xmmA, xmmB, xmmC);
3082 e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
3083 e->vfnmsub213ss(xmmA, xmmB, xmmC);
3084 e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
3085 e->vfnmsub231pd(xmmA, xmmB, xmmC);
3086 e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
3087 e->vfnmsub231pd(ymmA, ymmB, ymmC);
3088 e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
3089 e->vfnmsub231ps(xmmA, xmmB, xmmC);
3090 e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
3091 e->vfnmsub231ps(ymmA, ymmB, ymmC);
3092 e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
3093 e->vfnmsub231sd(xmmA, xmmB, xmmC);
3094 e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
3095 e->vfnmsub231ss(xmmA, xmmB, xmmC);
3096
3097 // FMA4.
3098 e->nop();
3099
3100 e->vfmaddpd(xmmA, xmmB, xmmC, xmmD);
3101 e->vfmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
3102 e->vfmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
3103 e->vfmaddpd(ymmA, ymmB, ymmC, ymmD);
3104 e->vfmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
3105 e->vfmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
3106 e->vfmaddps(xmmA, xmmB, xmmC, xmmD);
3107 e->vfmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
3108 e->vfmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
3109 e->vfmaddps(ymmA, ymmB, ymmC, ymmD);
3110 e->vfmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
3111 e->vfmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
3112 e->vfmaddsd(xmmA, xmmB, xmmC, xmmD);
3113 e->vfmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
3114 e->vfmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
3115 e->vfmaddss(xmmA, xmmB, xmmC, xmmD);
3116 e->vfmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
3117 e->vfmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
3118 e->vfmaddsubpd(xmmA, xmmB, xmmC, xmmD);
3119 e->vfmaddsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
3120 e->vfmaddsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
3121 e->vfmaddsubpd(ymmA, ymmB, ymmC, ymmD);
3122 e->vfmaddsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
3123 e->vfmaddsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
3124 e->vfmaddsubps(xmmA, xmmB, xmmC, xmmD);
3125 e->vfmaddsubps(xmmA, xmmB, anyptr_gpC, xmmD);
3126 e->vfmaddsubps(xmmA, xmmB, xmmC, anyptr_gpD);
3127 e->vfmaddsubps(ymmA, ymmB, ymmC, ymmD);
3128 e->vfmaddsubps(ymmA, ymmB, anyptr_gpC, ymmD);
3129 e->vfmaddsubps(ymmA, ymmB, ymmC, anyptr_gpD);
3130 e->vfmsubaddpd(xmmA, xmmB, xmmC, xmmD);
3131 e->vfmsubaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
3132 e->vfmsubaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
3133 e->vfmsubaddpd(ymmA, ymmB, ymmC, ymmD);
3134 e->vfmsubaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
3135 e->vfmsubaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
3136 e->vfmsubaddps(xmmA, xmmB, xmmC, xmmD);
3137 e->vfmsubaddps(xmmA, xmmB, anyptr_gpC, xmmD);
3138 e->vfmsubaddps(xmmA, xmmB, xmmC, anyptr_gpD);
3139 e->vfmsubaddps(ymmA, ymmB, ymmC, ymmD);
3140 e->vfmsubaddps(ymmA, ymmB, anyptr_gpC, ymmD);
3141 e->vfmsubaddps(ymmA, ymmB, ymmC, anyptr_gpD);
3142 e->vfmsubpd(xmmA, xmmB, xmmC, xmmD);
3143 e->vfmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
3144 e->vfmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
3145 e->vfmsubpd(ymmA, ymmB, ymmC, ymmD);
3146 e->vfmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
3147 e->vfmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
3148 e->vfmsubps(xmmA, xmmB, xmmC, xmmD);
3149 e->vfmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
3150 e->vfmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
3151 e->vfmsubps(ymmA, ymmB, ymmC, ymmD);
3152 e->vfmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
3153 e->vfmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
3154 e->vfmsubsd(xmmA, xmmB, xmmC, xmmD);
3155 e->vfmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
3156 e->vfmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
3157 e->vfmsubss(xmmA, xmmB, xmmC, xmmD);
3158 e->vfmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
3159 e->vfmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
3160 e->vfnmaddpd(xmmA, xmmB, xmmC, xmmD);
3161 e->vfnmaddpd(xmmA, xmmB, anyptr_gpC, xmmD);
3162 e->vfnmaddpd(xmmA, xmmB, xmmC, anyptr_gpD);
3163 e->vfnmaddpd(ymmA, ymmB, ymmC, ymmD);
3164 e->vfnmaddpd(ymmA, ymmB, anyptr_gpC, ymmD);
3165 e->vfnmaddpd(ymmA, ymmB, ymmC, anyptr_gpD);
3166 e->vfnmaddps(xmmA, xmmB, xmmC, xmmD);
3167 e->vfnmaddps(xmmA, xmmB, anyptr_gpC, xmmD);
3168 e->vfnmaddps(xmmA, xmmB, xmmC, anyptr_gpD);
3169 e->vfnmaddps(ymmA, ymmB, ymmC, ymmD);
3170 e->vfnmaddps(ymmA, ymmB, anyptr_gpC, ymmD);
3171 e->vfnmaddps(ymmA, ymmB, ymmC, anyptr_gpD);
3172 e->vfnmaddsd(xmmA, xmmB, xmmC, xmmD);
3173 e->vfnmaddsd(xmmA, xmmB, anyptr_gpC, xmmD);
3174 e->vfnmaddsd(xmmA, xmmB, xmmC, anyptr_gpD);
3175 e->vfnmaddss(xmmA, xmmB, xmmC, xmmD);
3176 e->vfnmaddss(xmmA, xmmB, anyptr_gpC, xmmD);
3177 e->vfnmaddss(xmmA, xmmB, xmmC, anyptr_gpD);
3178 e->vfnmsubpd(xmmA, xmmB, xmmC, xmmD);
3179 e->vfnmsubpd(xmmA, xmmB, anyptr_gpC, xmmD);
3180 e->vfnmsubpd(xmmA, xmmB, xmmC, anyptr_gpD);
3181 e->vfnmsubpd(ymmA, ymmB, ymmC, ymmD);
3182 e->vfnmsubpd(ymmA, ymmB, anyptr_gpC, ymmD);
3183 e->vfnmsubpd(ymmA, ymmB, ymmC, anyptr_gpD);
3184 e->vfnmsubps(xmmA, xmmB, xmmC, xmmD);
3185 e->vfnmsubps(xmmA, xmmB, anyptr_gpC, xmmD);
3186 e->vfnmsubps(xmmA, xmmB, xmmC, anyptr_gpD);
3187 e->vfnmsubps(ymmA, ymmB, ymmC, ymmD);
3188 e->vfnmsubps(ymmA, ymmB, anyptr_gpC, ymmD);
3189 e->vfnmsubps(ymmA, ymmB, ymmC, anyptr_gpD);
3190 e->vfnmsubsd(xmmA, xmmB, xmmC, xmmD);
3191 e->vfnmsubsd(xmmA, xmmB, anyptr_gpC, xmmD);
3192 e->vfnmsubsd(xmmA, xmmB, xmmC, anyptr_gpD);
3193 e->vfnmsubss(xmmA, xmmB, xmmC, xmmD);
3194 e->vfnmsubss(xmmA, xmmB, anyptr_gpC, xmmD);
3195 e->vfnmsubss(xmmA, xmmB, xmmC, anyptr_gpD);
3196
3197 // XOP.
3198 e->nop();
3199
3200 e->vfrczpd(xmmA, xmmB);
3201 e->vfrczpd(xmmA, anyptr_gpB);
3202 e->vfrczpd(ymmA, ymmB);
3203 e->vfrczpd(ymmA, anyptr_gpB);
3204 e->vfrczps(xmmA, xmmB);
3205 e->vfrczps(xmmA, anyptr_gpB);
3206 e->vfrczps(ymmA, ymmB);
3207 e->vfrczps(ymmA, anyptr_gpB);
3208 e->vfrczsd(xmmA, xmmB);
3209 e->vfrczsd(xmmA, anyptr_gpB);
3210 e->vfrczss(xmmA, xmmB);
3211 e->vfrczss(xmmA, anyptr_gpB);
3212 e->vpcmov(xmmA, xmmB, xmmC, xmmD);
3213 e->vpcmov(xmmA, xmmB, anyptr_gpC, xmmD);
3214 e->vpcmov(xmmA, xmmB, xmmC, anyptr_gpD);
3215 e->vpcmov(ymmA, ymmB, ymmC, ymmD);
3216 e->vpcmov(ymmA, ymmB, anyptr_gpC, ymmD);
3217 e->vpcmov(ymmA, ymmB, ymmC, anyptr_gpD);
3218 e->vpcomb(xmmA, xmmB, xmmC, 0);
3219 e->vpcomb(xmmA, xmmB, anyptr_gpC, 0);
3220 e->vpcomd(xmmA, xmmB, xmmC, 0);
3221 e->vpcomd(xmmA, xmmB, anyptr_gpC, 0);
3222 e->vpcomq(xmmA, xmmB, xmmC, 0);
3223 e->vpcomq(xmmA, xmmB, anyptr_gpC, 0);
3224 e->vpcomw(xmmA, xmmB, xmmC, 0);
3225 e->vpcomw(xmmA, xmmB, anyptr_gpC, 0);
3226 e->vpcomub(xmmA, xmmB, xmmC, 0);
3227 e->vpcomub(xmmA, xmmB, anyptr_gpC, 0);
3228 e->vpcomud(xmmA, xmmB, xmmC, 0);
3229 e->vpcomud(xmmA, xmmB, anyptr_gpC, 0);
3230 e->vpcomuq(xmmA, xmmB, xmmC, 0);
3231 e->vpcomuq(xmmA, xmmB, anyptr_gpC, 0);
3232 e->vpcomuw(xmmA, xmmB, xmmC, 0);
3233 e->vpcomuw(xmmA, xmmB, anyptr_gpC, 0);
3234 e->vpermil2pd(xmmA, xmmB, xmmC, xmmD, 0);
3235 e->vpermil2pd(xmmA, xmmB, anyptr_gpC, xmmD, 0);
3236 e->vpermil2pd(xmmA, xmmB, xmmC, anyptr_gpD, 0);
3237 e->vpermil2pd(ymmA, ymmB, ymmC, ymmD, 0);
3238 e->vpermil2pd(ymmA, ymmB, anyptr_gpC, ymmD, 0);
3239 e->vpermil2pd(ymmA, ymmB, ymmC, anyptr_gpD, 0);
3240 e->vpermil2ps(xmmA, xmmB, xmmC, xmmD, 0);
3241 e->vpermil2ps(xmmA, xmmB, anyptr_gpC, xmmD, 0);
3242 e->vpermil2ps(xmmA, xmmB, xmmC, anyptr_gpD, 0);
3243 e->vpermil2ps(ymmA, ymmB, ymmC, ymmD, 0);
3244 e->vpermil2ps(ymmA, ymmB, anyptr_gpC, ymmD, 0);
3245 e->vpermil2ps(ymmA, ymmB, ymmC, anyptr_gpD, 0);
3246 e->vphaddbd(xmmA, xmmB);
3247 e->vphaddbd(xmmA, anyptr_gpB);
3248 e->vphaddbq(xmmA, xmmB);
3249 e->vphaddbq(xmmA, anyptr_gpB);
3250 e->vphaddbw(xmmA, xmmB);
3251 e->vphaddbw(xmmA, anyptr_gpB);
3252 e->vphadddq(xmmA, xmmB);
3253 e->vphadddq(xmmA, anyptr_gpB);
3254 e->vphaddwd(xmmA, xmmB);
3255 e->vphaddwd(xmmA, anyptr_gpB);
3256 e->vphaddwq(xmmA, xmmB);
3257 e->vphaddwq(xmmA, anyptr_gpB);
3258 e->vphaddubd(xmmA, xmmB);
3259 e->vphaddubd(xmmA, anyptr_gpB);
3260 e->vphaddubq(xmmA, xmmB);
3261 e->vphaddubq(xmmA, anyptr_gpB);
3262 e->vphaddubw(xmmA, xmmB);
3263 e->vphaddubw(xmmA, anyptr_gpB);
3264 e->vphaddudq(xmmA, xmmB);
3265 e->vphaddudq(xmmA, anyptr_gpB);
3266 e->vphadduwd(xmmA, xmmB);
3267 e->vphadduwd(xmmA, anyptr_gpB);
3268 e->vphadduwq(xmmA, xmmB);
3269 e->vphadduwq(xmmA, anyptr_gpB);
3270 e->vphsubbw(xmmA, xmmB);
3271 e->vphsubbw(xmmA, anyptr_gpB);
3272 e->vphsubdq(xmmA, xmmB);
3273 e->vphsubdq(xmmA, anyptr_gpB);
3274 e->vphsubwd(xmmA, xmmB);
3275 e->vphsubwd(xmmA, anyptr_gpB);
3276 e->vpmacsdd(xmmA, xmmB, xmmC, xmmD);
3277 e->vpmacsdd(xmmA, xmmB, anyptr_gpC, xmmD);
3278 e->vpmacsdqh(xmmA, xmmB, xmmC, xmmD);
3279 e->vpmacsdqh(xmmA, xmmB, anyptr_gpC, xmmD);
3280 e->vpmacsdql(xmmA, xmmB, xmmC, xmmD);
3281 e->vpmacsdql(xmmA, xmmB, anyptr_gpC, xmmD);
3282 e->vpmacswd(xmmA, xmmB, xmmC, xmmD);
3283 e->vpmacswd(xmmA, xmmB, anyptr_gpC, xmmD);
3284 e->vpmacsww(xmmA, xmmB, xmmC, xmmD);
3285 e->vpmacsww(xmmA, xmmB, anyptr_gpC, xmmD);
3286 e->vpmacssdd(xmmA, xmmB, xmmC, xmmD);
3287 e->vpmacssdd(xmmA, xmmB, anyptr_gpC, xmmD);
3288 e->vpmacssdqh(xmmA, xmmB, xmmC, xmmD);
3289 e->vpmacssdqh(xmmA, xmmB, anyptr_gpC, xmmD);
3290 e->vpmacssdql(xmmA, xmmB, xmmC, xmmD);
3291 e->vpmacssdql(xmmA, xmmB, anyptr_gpC, xmmD);
3292 e->vpmacsswd(xmmA, xmmB, xmmC, xmmD);
3293 e->vpmacsswd(xmmA, xmmB, anyptr_gpC, xmmD);
3294 e->vpmacssww(xmmA, xmmB, xmmC, xmmD);
3295 e->vpmacssww(xmmA, xmmB, anyptr_gpC, xmmD);
3296 e->vpmadcsswd(xmmA, xmmB, xmmC, xmmD);
3297 e->vpmadcsswd(xmmA, xmmB, anyptr_gpC, xmmD);
3298 e->vpmadcswd(xmmA, xmmB, xmmC, xmmD);
3299 e->vpmadcswd(xmmA, xmmB, anyptr_gpC, xmmD);
3300 e->vpperm(xmmA, xmmB, xmmC, xmmD);
3301 e->vpperm(xmmA, xmmB, anyptr_gpC, xmmD);
3302 e->vpperm(xmmA, xmmB, xmmC, anyptr_gpD);
3303 e->vprotb(xmmA, xmmB, xmmC);
3304 e->vprotb(xmmA, anyptr_gpB, xmmC);
3305 e->vprotb(xmmA, xmmB, anyptr_gpC);
3306 e->vprotb(xmmA, xmmB, 0);
3307 e->vprotb(xmmA, anyptr_gpB, 0);
3308 e->vprotd(xmmA, xmmB, xmmC);
3309 e->vprotd(xmmA, anyptr_gpB, xmmC);
3310 e->vprotd(xmmA, xmmB, anyptr_gpC);
3311 e->vprotd(xmmA, xmmB, 0);
3312 e->vprotd(xmmA, anyptr_gpB, 0);
3313 e->vprotq(xmmA, xmmB, xmmC);
3314 e->vprotq(xmmA, anyptr_gpB, xmmC);
3315 e->vprotq(xmmA, xmmB, anyptr_gpC);
3316 e->vprotq(xmmA, xmmB, 0);
3317 e->vprotq(xmmA, anyptr_gpB, 0);
3318 e->vprotw(xmmA, xmmB, xmmC);
3319 e->vprotw(xmmA, anyptr_gpB, xmmC);
3320 e->vprotw(xmmA, xmmB, anyptr_gpC);
3321 e->vprotw(xmmA, xmmB, 0);
3322 e->vprotw(xmmA, anyptr_gpB, 0);
3323 e->vpshab(xmmA, xmmB, xmmC);
3324 e->vpshab(xmmA, anyptr_gpB, xmmC);
3325 e->vpshab(xmmA, xmmB, anyptr_gpC);
3326 e->vpshad(xmmA, xmmB, xmmC);
3327 e->vpshad(xmmA, anyptr_gpB, xmmC);
3328 e->vpshad(xmmA, xmmB, anyptr_gpC);
3329 e->vpshaq(xmmA, xmmB, xmmC);
3330 e->vpshaq(xmmA, anyptr_gpB, xmmC);
3331 e->vpshaq(xmmA, xmmB, anyptr_gpC);
3332 e->vpshaw(xmmA, xmmB, xmmC);
3333 e->vpshaw(xmmA, anyptr_gpB, xmmC);
3334 e->vpshaw(xmmA, xmmB, anyptr_gpC);
3335 e->vpshlb(xmmA, xmmB, xmmC);
3336 e->vpshlb(xmmA, anyptr_gpB, xmmC);
3337 e->vpshlb(xmmA, xmmB, anyptr_gpC);
3338 e->vpshld(xmmA, xmmB, xmmC);
3339 e->vpshld(xmmA, anyptr_gpB, xmmC);
3340 e->vpshld(xmmA, xmmB, anyptr_gpC);
3341 e->vpshlq(xmmA, xmmB, xmmC);
3342 e->vpshlq(xmmA, anyptr_gpB, xmmC);
3343 e->vpshlq(xmmA, xmmB, anyptr_gpC);
3344 e->vpshlw(xmmA, xmmB, xmmC);
3345 e->vpshlw(xmmA, anyptr_gpB, xmmC);
3346 e->vpshlw(xmmA, xmmB, anyptr_gpC);
3347
3348 // F16C.
3349 e->nop();
3350
3351 e->vcvtph2ps(xmmA, xmmB);
3352 e->vcvtph2ps(xmmA, anyptr_gpB);
3353 e->vcvtph2ps(ymmA, xmmB);
3354 e->vcvtph2ps(ymmA, anyptr_gpB);
3355 e->vcvtps2ph(xmmA, xmmB, 0);
3356 e->vcvtps2ph(anyptr_gpA, xmmB, 0);
3357 e->vcvtps2ph(xmmA, ymmB, 0);
3358 e->vcvtps2ph(anyptr_gpA, ymmB, 0);
3359
3360 // AVX512.
3361 e->nop();
3362
3363 e->kaddb(kA, kB, kC);
3364 e->kaddd(kA, kB, kC);
3365 e->kaddq(kA, kB, kC);
3366 e->kaddw(kA, kB, kC);
3367 e->kandb(kA, kB, kC);
3368 e->kandd(kA, kB, kC);
3369 e->kandnb(kA, kB, kC);
3370 e->kandnd(kA, kB, kC);
3371 e->kandnq(kA, kB, kC);
3372 e->kandnw(kA, kB, kC);
3373 e->kandq(kA, kB, kC);
3374 e->kandw(kA, kB, kC);
3375 e->kmovb(kA, kB);
3376 e->kmovb(kA, anyptr_gpB);
3377 e->kmovb(kA, gdB);
3378 if (isX64) e->kmovb(kA, gzB);
3379 e->kmovb(anyptr_gpA, kB);
3380 e->kmovb(gdA, kB);
3381 if (isX64) e->kmovb(gzA, kB);
3382 e->kmovd(kA, kB);
3383 e->kmovd(kA, anyptr_gpB);
3384 e->kmovd(kA, gdB);
3385 if (isX64) e->kmovd(kA, gzB);
3386 e->kmovd(anyptr_gpA, kB);
3387 e->kmovd(gdA, kB);
3388 if (isX64) e->kmovd(gzA, kB);
3389 e->kmovq(kA, kB);
3390 e->kmovq(kA, anyptr_gpB);
3391 if (isX64) e->kmovq(kA, gzB);
3392 e->kmovq(anyptr_gpA, kB);
3393 if (isX64) e->kmovq(gzA, kB);
3394 e->kmovw(kA, kB);
3395 e->kmovw(kA, anyptr_gpB);
3396 e->kmovw(kA, gdB);
3397 if (isX64) e->kmovw(kA, gzB);
3398 e->kmovw(anyptr_gpA, kB);
3399 e->kmovw(gdA, kB);
3400 if (isX64) e->kmovw(gzA, kB);
3401 e->knotb(kA, kB);
3402 e->knotd(kA, kB);
3403 e->knotq(kA, kB);
3404 e->knotw(kA, kB);
3405 e->korb(kA, kB, kC);
3406 e->kord(kA, kB, kC);
3407 e->korq(kA, kB, kC);
3408 e->kortestb(kA, kB);
3409 e->kortestd(kA, kB);
3410 e->kortestq(kA, kB);
3411 e->kortestw(kA, kB);
3412 e->korw(kA, kB, kC);
3413 e->kshiftlb(kA, kB, 0);
3414 e->kshiftld(kA, kB, 0);
3415 e->kshiftlq(kA, kB, 0);
3416 e->kshiftlw(kA, kB, 0);
3417 e->kshiftrb(kA, kB, 0);
3418 e->kshiftrd(kA, kB, 0);
3419 e->kshiftrq(kA, kB, 0);
3420 e->kshiftrw(kA, kB, 0);
3421 e->ktestb(kA, kB);
3422 e->ktestd(kA, kB);
3423 e->ktestq(kA, kB);
3424 e->ktestw(kA, kB);
3425 e->kunpckbw(kA, kB, kC);
3426 e->kunpckdq(kA, kB, kC);
3427 e->kunpckwd(kA, kB, kC);
3428 e->kxnorb(kA, kB, kC);
3429 e->kxnord(kA, kB, kC);
3430 e->kxnorq(kA, kB, kC);
3431 e->kxnorw(kA, kB, kC);
3432 e->kxorb(kA, kB, kC);
3433 e->kxord(kA, kB, kC);
3434 e->kxorq(kA, kB, kC);
3435 e->kxorw(kA, kB, kC);
3436 e->nop();
3437
3438 e->vaddpd(xmmA, xmmB, xmmC);
3439 e->vaddpd(xmmA, xmmB, anyptr_gpC);
3440 e->vaddpd(ymmA, ymmB, ymmC);
3441 e->vaddpd(ymmA, ymmB, anyptr_gpC);
3442 e->vaddpd(zmmA, zmmB, zmmC);
3443 e->vaddpd(zmmA, zmmB, anyptr_gpC);
3444 e->vaddps(xmmA, xmmB, xmmC);
3445 e->vaddps(xmmA, xmmB, anyptr_gpC);
3446 e->vaddps(ymmA, ymmB, ymmC);
3447 e->vaddps(ymmA, ymmB, anyptr_gpC);
3448 e->vaddps(zmmA, zmmB, zmmC);
3449 e->vaddps(zmmA, zmmB, anyptr_gpC);
3450 e->vaddsd(xmmA, xmmB, xmmC);
3451 e->vaddsd(xmmA, xmmB, anyptr_gpC);
3452 e->vaddss(xmmA, xmmB, xmmC);
3453 e->vaddss(xmmA, xmmB, anyptr_gpC);
3454 e->valignd(xmmA, xmmB, xmmC, 0);
3455 e->valignd(xmmA, xmmB, anyptr_gpC, 0);
3456 e->valignd(ymmA, ymmB, ymmC, 0);
3457 e->valignd(ymmA, ymmB, anyptr_gpC, 0);
3458 e->valignd(zmmA, zmmB, zmmC, 0);
3459 e->valignd(zmmA, zmmB, anyptr_gpC, 0);
3460 e->valignq(xmmA, xmmB, xmmC, 0);
3461 e->valignq(xmmA, xmmB, anyptr_gpC, 0);
3462 e->valignq(ymmA, ymmB, ymmC, 0);
3463 e->valignq(ymmA, ymmB, anyptr_gpC, 0);
3464 e->valignq(zmmA, zmmB, zmmC, 0);
3465 e->valignq(zmmA, zmmB, anyptr_gpC, 0);
3466 e->vandnpd(xmmA, xmmB, xmmC);
3467 e->vandnpd(xmmA, xmmB, anyptr_gpC);
3468 e->vandnpd(ymmA, ymmB, ymmC);
3469 e->vandnpd(ymmA, ymmB, anyptr_gpC);
3470 e->vandnpd(zmmA, zmmB, zmmC);
3471 e->vandnpd(zmmA, zmmB, anyptr_gpC);
3472 e->vandnps(xmmA, xmmB, xmmC);
3473 e->vandnps(xmmA, xmmB, anyptr_gpC);
3474 e->vandnps(ymmA, ymmB, ymmC);
3475 e->vandnps(ymmA, ymmB, anyptr_gpC);
3476 e->vandnps(zmmA, zmmB, zmmC);
3477 e->vandnps(zmmA, zmmB, anyptr_gpC);
3478 e->vandpd(xmmA, xmmB, xmmC);
3479 e->vandpd(xmmA, xmmB, anyptr_gpC);
3480 e->vandpd(ymmA, ymmB, ymmC);
3481 e->vandpd(ymmA, ymmB, anyptr_gpC);
3482 e->vandpd(zmmA, zmmB, zmmC);
3483 e->vandpd(zmmA, zmmB, anyptr_gpC);
3484 e->vandps(xmmA, xmmB, xmmC);
3485 e->vandps(xmmA, xmmB, anyptr_gpC);
3486 e->vandps(ymmA, ymmB, ymmC);
3487 e->vandps(ymmA, ymmB, anyptr_gpC);
3488 e->vandps(zmmA, zmmB, zmmC);
3489 e->vandps(zmmA, zmmB, anyptr_gpC);
3490 e->vblendmb(xmmA, xmmB, xmmC);
3491 e->vblendmb(xmmA, xmmB, anyptr_gpC);
3492 e->vblendmb(ymmA, ymmB, ymmC);
3493 e->vblendmb(ymmA, ymmB, anyptr_gpC);
3494 e->vblendmb(zmmA, zmmB, zmmC);
3495 e->vblendmb(zmmA, zmmB, anyptr_gpC);
3496 e->vblendmd(xmmA, xmmB, xmmC);
3497 e->vblendmd(xmmA, xmmB, anyptr_gpC);
3498 e->vblendmd(ymmA, ymmB, ymmC);
3499 e->vblendmd(ymmA, ymmB, anyptr_gpC);
3500 e->vblendmd(zmmA, zmmB, zmmC);
3501 e->vblendmd(zmmA, zmmB, anyptr_gpC);
3502 e->vblendmpd(xmmA, xmmB, xmmC);
3503 e->vblendmpd(xmmA, xmmB, anyptr_gpC);
3504 e->vblendmpd(ymmA, ymmB, ymmC);
3505 e->vblendmpd(ymmA, ymmB, anyptr_gpC);
3506 e->vblendmpd(zmmA, zmmB, zmmC);
3507 e->vblendmpd(zmmA, zmmB, anyptr_gpC);
3508 e->vblendmps(xmmA, xmmB, xmmC);
3509 e->vblendmps(xmmA, xmmB, anyptr_gpC);
3510 e->vblendmps(ymmA, ymmB, ymmC);
3511 e->vblendmps(ymmA, ymmB, anyptr_gpC);
3512 e->vblendmps(zmmA, zmmB, zmmC);
3513 e->vblendmps(zmmA, zmmB, anyptr_gpC);
3514 e->vblendmq(xmmA, xmmB, xmmC);
3515 e->vblendmq(xmmA, xmmB, anyptr_gpC);
3516 e->vblendmq(ymmA, ymmB, ymmC);
3517 e->vblendmq(ymmA, ymmB, anyptr_gpC);
3518 e->vblendmq(zmmA, zmmB, zmmC);
3519 e->vblendmq(zmmA, zmmB, anyptr_gpC);
3520 e->vblendmw(xmmA, xmmB, xmmC);
3521 e->vblendmw(xmmA, xmmB, anyptr_gpC);
3522 e->vblendmw(ymmA, ymmB, ymmC);
3523 e->vblendmw(ymmA, ymmB, anyptr_gpC);
3524 e->vblendmw(zmmA, zmmB, zmmC);
3525 e->vblendmw(zmmA, zmmB, anyptr_gpC);
3526 e->vbroadcastf32x2(ymmA, xmmB);
3527 e->vbroadcastf32x2(ymmA, anyptr_gpB);
3528 e->vbroadcastf32x2(zmmA, xmmB);
3529 e->vbroadcastf32x2(zmmA, anyptr_gpB);
3530 e->vbroadcastf32x4(ymmA, anyptr_gpB);
3531 e->vbroadcastf32x4(zmmA, anyptr_gpB);
3532 e->vbroadcastf32x8(zmmA, anyptr_gpB);
3533 e->vbroadcastf64x2(ymmA, anyptr_gpB);
3534 e->vbroadcastf64x2(zmmA, anyptr_gpB);
3535 e->vbroadcastf64x4(zmmA, anyptr_gpB);
3536 e->vbroadcasti32x2(xmmA, xmmB);
3537 e->vbroadcasti32x2(xmmA, anyptr_gpB);
3538 e->vbroadcasti32x2(ymmA, xmmB);
3539 e->vbroadcasti32x2(ymmA, anyptr_gpB);
3540 e->vbroadcasti32x2(zmmA, xmmB);
3541 e->vbroadcasti32x2(zmmA, anyptr_gpB);
3542 e->vbroadcasti32x4(ymmA, anyptr_gpB);
3543 e->vbroadcasti32x4(zmmA, anyptr_gpB);
3544 e->vbroadcasti32x8(zmmA, anyptr_gpB);
3545 e->vbroadcasti64x2(ymmA, anyptr_gpB);
3546 e->vbroadcasti64x2(zmmA, anyptr_gpB);
3547 e->vbroadcasti64x4(zmmA, anyptr_gpB);
3548 e->vbroadcastsd(ymmA, xmmB);
3549 e->vbroadcastsd(ymmA, anyptr_gpB);
3550 e->vbroadcastsd(zmmA, xmmB);
3551 e->vbroadcastsd(zmmA, anyptr_gpB);
3552 e->vbroadcastss(xmmA, xmmB);
3553 e->vbroadcastss(xmmA, anyptr_gpB);
3554 e->vbroadcastss(ymmA, xmmB);
3555 e->vbroadcastss(ymmA, anyptr_gpB);
3556 e->vbroadcastss(zmmA, xmmB);
3557 e->vbroadcastss(zmmA, anyptr_gpB);
3558 e->vcmppd(kA, xmmB, xmmC, 0);
3559 e->vcmppd(kA, xmmB, anyptr_gpC, 0);
3560 e->vcmppd(kA, ymmB, ymmC, 0);
3561 e->vcmppd(kA, ymmB, anyptr_gpC, 0);
3562 e->vcmppd(kA, zmmB, zmmC, 0);
3563 e->vcmppd(kA, zmmB, anyptr_gpC, 0);
3564 e->vcmpps(kA, xmmB, xmmC, 0);
3565 e->vcmpps(kA, xmmB, anyptr_gpC, 0);
3566 e->vcmpps(kA, ymmB, ymmC, 0);
3567 e->vcmpps(kA, ymmB, anyptr_gpC, 0);
3568 e->vcmpps(kA, zmmB, zmmC, 0);
3569 e->vcmpps(kA, zmmB, anyptr_gpC, 0);
3570 e->vcmpsd(kA, xmmB, xmmC, 0);
3571 e->vcmpsd(kA, xmmB, anyptr_gpC, 0);
3572 e->vcmpss(kA, xmmB, xmmC, 0);
3573 e->vcmpss(kA, xmmB, anyptr_gpC, 0);
3574 e->vcomisd(xmmA, xmmB);
3575 e->vcomisd(xmmA, anyptr_gpB);
3576 e->vcomiss(xmmA, xmmB);
3577 e->vcomiss(xmmA, anyptr_gpB);
3578 e->vcompresspd(xmmA, xmmB);
3579 e->vcompresspd(anyptr_gpA, xmmB);
3580 e->vcompresspd(ymmA, ymmB);
3581 e->vcompresspd(anyptr_gpA, ymmB);
3582 e->vcompresspd(zmmA, zmmB);
3583 e->vcompresspd(anyptr_gpA, zmmB);
3584 e->vcompressps(xmmA, xmmB);
3585 e->vcompressps(anyptr_gpA, xmmB);
3586 e->vcompressps(ymmA, ymmB);
3587 e->vcompressps(anyptr_gpA, ymmB);
3588 e->vcompressps(zmmA, zmmB);
3589 e->vcompressps(anyptr_gpA, zmmB);
3590 e->vcvtdq2pd(xmmA, xmmB);
3591 e->vcvtdq2pd(xmmA, anyptr_gpB);
3592 e->vcvtdq2pd(ymmA, xmmB);
3593 e->vcvtdq2pd(ymmA, anyptr_gpB);
3594 e->vcvtdq2pd(zmmA, ymmB);
3595 e->vcvtdq2pd(zmmA, anyptr_gpB);
3596 e->vcvtdq2ps(xmmA, xmmB);
3597 e->vcvtdq2ps(xmmA, anyptr_gpB);
3598 e->vcvtdq2ps(ymmA, ymmB);
3599 e->vcvtdq2ps(ymmA, anyptr_gpB);
3600 e->vcvtdq2ps(zmmA, zmmB);
3601 e->vcvtdq2ps(zmmA, anyptr_gpB);
3602 e->vcvtpd2dq(xmmA, xmmB);
3603 e->vcvtpd2dq(xmmA, anyptr_gpB);
3604 e->vcvtpd2dq(xmmA, ymmB);
3605 e->vcvtpd2dq(xmmA, anyptr_gpB);
3606 e->vcvtpd2dq(ymmA, zmmB);
3607 e->vcvtpd2dq(ymmA, anyptr_gpB);
3608 e->vcvtpd2qq(xmmA, xmmB);
3609 e->vcvtpd2qq(xmmA, anyptr_gpB);
3610 e->vcvtpd2qq(ymmA, ymmB);
3611 e->vcvtpd2qq(ymmA, anyptr_gpB);
3612 e->vcvtpd2qq(zmmA, zmmB);
3613 e->vcvtpd2qq(zmmA, anyptr_gpB);
3614 e->vcvtpd2udq(xmmA, xmmB);
3615 e->vcvtpd2udq(xmmA, anyptr_gpB);
3616 e->vcvtpd2udq(xmmA, ymmB);
3617 e->vcvtpd2udq(xmmA, anyptr_gpB);
3618 e->vcvtpd2udq(ymmA, zmmB);
3619 e->vcvtpd2udq(ymmA, anyptr_gpB);
3620 e->vcvtpd2uqq(xmmA, xmmB);
3621 e->vcvtpd2uqq(xmmA, anyptr_gpB);
3622 e->vcvtpd2uqq(ymmA, ymmB);
3623 e->vcvtpd2uqq(ymmA, anyptr_gpB);
3624 e->vcvtpd2uqq(zmmA, zmmB);
3625 e->vcvtpd2uqq(zmmA, anyptr_gpB);
3626 e->vcvtph2ps(xmmA, xmmB);
3627 e->vcvtph2ps(xmmA, anyptr_gpB);
3628 e->vcvtph2ps(ymmA, xmmB);
3629 e->vcvtph2ps(ymmA, anyptr_gpB);
3630 e->vcvtph2ps(zmmA, ymmB);
3631 e->vcvtph2ps(zmmA, anyptr_gpB);
3632 e->vcvtps2dq(xmmA, xmmB);
3633 e->vcvtps2dq(xmmA, anyptr_gpB);
3634 e->vcvtps2dq(ymmA, ymmB);
3635 e->vcvtps2dq(ymmA, anyptr_gpB);
3636 e->vcvtps2dq(zmmA, zmmB);
3637 e->vcvtps2dq(zmmA, anyptr_gpB);
3638 e->vcvtps2pd(xmmA, xmmB);
3639 e->vcvtps2pd(xmmA, anyptr_gpB);
3640 e->vcvtps2pd(ymmA, xmmB);
3641 e->vcvtps2pd(ymmA, anyptr_gpB);
3642 e->vcvtps2pd(zmmA, ymmB);
3643 e->vcvtps2pd(zmmA, anyptr_gpB);
3644 e->vcvtps2ph(xmmA, xmmB, 0);
3645 e->vcvtps2ph(anyptr_gpA, xmmB, 0);
3646 e->vcvtps2ph(xmmA, ymmB, 0);
3647 e->vcvtps2ph(anyptr_gpA, ymmB, 0);
3648 e->vcvtps2ph(ymmA, zmmB, 0);
3649 e->vcvtps2ph(anyptr_gpA, zmmB, 0);
3650 e->vcvtps2qq(xmmA, xmmB);
3651 e->vcvtps2qq(xmmA, anyptr_gpB);
3652 e->vcvtps2qq(ymmA, xmmB);
3653 e->vcvtps2qq(ymmA, anyptr_gpB);
3654 e->vcvtps2qq(zmmA, ymmB);
3655 e->vcvtps2qq(zmmA, anyptr_gpB);
3656 e->vcvtps2udq(xmmA, xmmB);
3657 e->vcvtps2udq(xmmA, anyptr_gpB);
3658 e->vcvtps2udq(ymmA, ymmB);
3659 e->vcvtps2udq(ymmA, anyptr_gpB);
3660 e->vcvtps2udq(zmmA, zmmB);
3661 e->vcvtps2udq(zmmA, anyptr_gpB);
3662 e->vcvtps2uqq(xmmA, xmmB);
3663 e->vcvtps2uqq(xmmA, anyptr_gpB);
3664 e->vcvtps2uqq(ymmA, xmmB);
3665 e->vcvtps2uqq(ymmA, anyptr_gpB);
3666 e->vcvtps2uqq(zmmA, ymmB);
3667 e->vcvtps2uqq(zmmA, anyptr_gpB);
3668 e->vcvtqq2pd(xmmA, xmmB);
3669 e->vcvtqq2pd(xmmA, anyptr_gpB);
3670 e->vcvtqq2pd(ymmA, ymmB);
3671 e->vcvtqq2pd(ymmA, anyptr_gpB);
3672 e->vcvtqq2pd(zmmA, zmmB);
3673 e->vcvtqq2pd(zmmA, anyptr_gpB);
3674 e->vcvtqq2ps(xmmA, xmmB);
3675 e->vcvtqq2ps(xmmA, anyptr_gpB);
3676 e->vcvtqq2ps(xmmA, ymmB);
3677 e->vcvtqq2ps(xmmA, anyptr_gpB);
3678 e->vcvtqq2ps(ymmA, zmmB);
3679 e->vcvtqq2ps(ymmA, anyptr_gpB);
3680 e->vcvtsd2si(gdA, xmmB);
3681 e->vcvtsd2si(gdA, anyptr_gpB);
3682 if (isX64) e->vcvtsd2si(gzA, xmmB);
3683 if (isX64) e->vcvtsd2si(gzA, anyptr_gpB);
3684 e->vcvtsd2ss(xmmA, xmmB, xmmC);
3685 e->vcvtsd2ss(xmmA, xmmB, anyptr_gpC);
3686 e->vcvtsd2usi(gdA, xmmB);
3687 e->vcvtsd2usi(gdA, anyptr_gpB);
3688 if (isX64) e->vcvtsd2usi(gzA, xmmB);
3689 if (isX64) e->vcvtsd2usi(gzA, anyptr_gpB);
3690 e->vcvtsi2sd(xmmA, xmmB, gdC);
3691 e->vcvtsi2sd(xmmA, xmmB, dword_ptr(gzC));
3692 if (isX64) e->vcvtsi2sd(xmmA, xmmB, gzC);
3693 if (isX64) e->vcvtsi2sd(xmmA, xmmB, qword_ptr(gzC));
3694 e->vcvtsi2ss(xmmA, xmmB, gdC);
3695 e->vcvtsi2ss(xmmA, xmmB, dword_ptr(gzC));
3696 if (isX64) e->vcvtsi2ss(xmmA, xmmB, gzC);
3697 if (isX64) e->vcvtsi2ss(xmmA, xmmB, qword_ptr(gzC));
3698 e->vcvtss2sd(xmmA, xmmB, xmmC);
3699 e->vcvtss2sd(xmmA, xmmB, anyptr_gpC);
3700 e->vcvtss2si(gdA, xmmB);
3701 e->vcvtss2si(gdA, anyptr_gpB);
3702 if (isX64) e->vcvtss2si(gzA, xmmB);
3703 if (isX64) e->vcvtss2si(gzA, anyptr_gpB);
3704 e->vcvtss2usi(gdA, xmmB);
3705 e->vcvtss2usi(gdA, anyptr_gpB);
3706 if (isX64) e->vcvtss2usi(gzA, xmmB);
3707 if (isX64) e->vcvtss2usi(gzA, anyptr_gpB);
3708 e->vcvttpd2dq(xmmA, xmmB);
3709 e->vcvttpd2dq(xmmA, anyptr_gpB);
3710 e->vcvttpd2dq(xmmA, ymmB);
3711 e->vcvttpd2dq(xmmA, anyptr_gpB);
3712 e->vcvttpd2dq(ymmA, zmmB);
3713 e->vcvttpd2dq(ymmA, anyptr_gpB);
3714 e->vcvttpd2qq(xmmA, xmmB);
3715 e->vcvttpd2qq(xmmA, anyptr_gpB);
3716 e->vcvttpd2qq(ymmA, ymmB);
3717 e->vcvttpd2qq(ymmA, anyptr_gpB);
3718 e->vcvttpd2qq(zmmA, zmmB);
3719 e->vcvttpd2qq(zmmA, anyptr_gpB);
3720 e->vcvttpd2udq(xmmA, xmmB);
3721 e->vcvttpd2udq(xmmA, anyptr_gpB);
3722 e->vcvttpd2udq(xmmA, ymmB);
3723 e->vcvttpd2udq(xmmA, anyptr_gpB);
3724 e->vcvttpd2udq(ymmA, zmmB);
3725 e->vcvttpd2udq(ymmA, anyptr_gpB);
3726 e->vcvttpd2uqq(xmmA, xmmB);
3727 e->vcvttpd2uqq(xmmA, anyptr_gpB);
3728 e->vcvttpd2uqq(ymmA, ymmB);
3729 e->vcvttpd2uqq(ymmA, anyptr_gpB);
3730 e->vcvttpd2uqq(zmmA, zmmB);
3731 e->vcvttpd2uqq(zmmA, anyptr_gpB);
3732 e->vcvttps2dq(xmmA, xmmB);
3733 e->vcvttps2dq(xmmA, anyptr_gpB);
3734 e->vcvttps2dq(ymmA, ymmB);
3735 e->vcvttps2dq(ymmA, anyptr_gpB);
3736 e->vcvttps2dq(zmmA, zmmB);
3737 e->vcvttps2dq(zmmA, anyptr_gpB);
3738 e->vcvttps2qq(xmmA, xmmB);
3739 e->vcvttps2qq(xmmA, anyptr_gpB);
3740 e->vcvttps2qq(ymmA, xmmB);
3741 e->vcvttps2qq(ymmA, anyptr_gpB);
3742 e->vcvttps2qq(zmmA, ymmB);
3743 e->vcvttps2qq(zmmA, anyptr_gpB);
3744 e->vcvttps2udq(xmmA, xmmB);
3745 e->vcvttps2udq(xmmA, anyptr_gpB);
3746 e->vcvttps2udq(ymmA, ymmB);
3747 e->vcvttps2udq(ymmA, anyptr_gpB);
3748 e->vcvttps2udq(zmmA, zmmB);
3749 e->vcvttps2udq(zmmA, anyptr_gpB);
3750 e->vcvttps2uqq(xmmA, xmmB);
3751 e->vcvttps2uqq(xmmA, anyptr_gpB);
3752 e->vcvttps2uqq(ymmA, xmmB);
3753 e->vcvttps2uqq(ymmA, anyptr_gpB);
3754 e->vcvttps2uqq(zmmA, ymmB);
3755 e->vcvttps2uqq(zmmA, anyptr_gpB);
3756 e->vcvttsd2si(gdA, xmmB);
3757 e->vcvttsd2si(gdA, anyptr_gpB);
3758 if (isX64) e->vcvttsd2si(gzA, xmmB);
3759 if (isX64) e->vcvttsd2si(gzA, anyptr_gpB);
3760 e->vcvttsd2usi(gdA, xmmB);
3761 e->vcvttsd2usi(gdA, anyptr_gpB);
3762 if (isX64) e->vcvttsd2usi(gzA, xmmB);
3763 if (isX64) e->vcvttsd2usi(gzA, anyptr_gpB);
3764 e->vcvttss2si(gdA, xmmB);
3765 e->vcvttss2si(gdA, anyptr_gpB);
3766 if (isX64) e->vcvttss2si(gzA, xmmB);
3767 if (isX64) e->vcvttss2si(gzA, anyptr_gpB);
3768 e->vcvttss2usi(gdA, xmmB);
3769 e->vcvttss2usi(gdA, anyptr_gpB);
3770 if (isX64) e->vcvttss2usi(gzA, xmmB);
3771 if (isX64) e->vcvttss2usi(gzA, anyptr_gpB);
3772 e->vcvtudq2pd(xmmA, xmmB);
3773 e->vcvtudq2pd(xmmA, anyptr_gpB);
3774 e->vcvtudq2pd(ymmA, xmmB);
3775 e->vcvtudq2pd(ymmA, anyptr_gpB);
3776 e->vcvtudq2pd(zmmA, ymmB);
3777 e->vcvtudq2pd(zmmA, anyptr_gpB);
3778 e->vcvtudq2ps(xmmA, xmmB);
3779 e->vcvtudq2ps(xmmA, anyptr_gpB);
3780 e->vcvtudq2ps(ymmA, ymmB);
3781 e->vcvtudq2ps(ymmA, anyptr_gpB);
3782 e->vcvtudq2ps(zmmA, zmmB);
3783 e->vcvtudq2ps(zmmA, anyptr_gpB);
3784 e->vcvtuqq2pd(xmmA, xmmB);
3785 e->vcvtuqq2pd(xmmA, anyptr_gpB);
3786 e->vcvtuqq2pd(ymmA, ymmB);
3787 e->vcvtuqq2pd(ymmA, anyptr_gpB);
3788 e->vcvtuqq2pd(zmmA, zmmB);
3789 e->vcvtuqq2pd(zmmA, anyptr_gpB);
3790 e->vcvtuqq2ps(xmmA, xmmB);
3791 e->vcvtuqq2ps(xmmA, anyptr_gpB);
3792 e->vcvtuqq2ps(xmmA, ymmB);
3793 e->vcvtuqq2ps(xmmA, anyptr_gpB);
3794 e->vcvtuqq2ps(ymmA, zmmB);
3795 e->vcvtuqq2ps(ymmA, anyptr_gpB);
3796 e->vcvtusi2sd(xmmA, xmmB, gdC);
3797 e->vcvtusi2sd(xmmA, xmmB, dword_ptr(gzC));
3798 if (isX64) e->vcvtusi2sd(xmmA, xmmB, gzC);
3799 if (isX64) e->vcvtusi2sd(xmmA, xmmB, qword_ptr(gzC));
3800 e->vcvtusi2ss(xmmA, xmmB, gdC);
3801 e->vcvtusi2ss(xmmA, xmmB, dword_ptr(gzC));
3802 if (isX64) e->vcvtusi2ss(xmmA, xmmB, gzC);
3803 if (isX64) e->vcvtusi2ss(xmmA, xmmB, qword_ptr(gzC));
3804 e->vdbpsadbw(xmmA, xmmB, xmmC, 0);
3805 e->vdbpsadbw(xmmA, xmmB, anyptr_gpC, 0);
3806 e->vdbpsadbw(ymmA, ymmB, ymmC, 0);
3807 e->vdbpsadbw(ymmA, ymmB, anyptr_gpC, 0);
3808 e->vdbpsadbw(zmmA, zmmB, zmmC, 0);
3809 e->vdbpsadbw(zmmA, zmmB, anyptr_gpC, 0);
3810 e->vdivpd(xmmA, xmmB, xmmC);
3811 e->vdivpd(xmmA, xmmB, anyptr_gpC);
3812 e->vdivpd(ymmA, ymmB, ymmC);
3813 e->vdivpd(ymmA, ymmB, anyptr_gpC);
3814 e->vdivpd(zmmA, zmmB, zmmC);
3815 e->vdivpd(zmmA, zmmB, anyptr_gpC);
3816 e->vdivps(xmmA, xmmB, xmmC);
3817 e->vdivps(xmmA, xmmB, anyptr_gpC);
3818 e->vdivps(ymmA, ymmB, ymmC);
3819 e->vdivps(ymmA, ymmB, anyptr_gpC);
3820 e->vdivps(zmmA, zmmB, zmmC);
3821 e->vdivps(zmmA, zmmB, anyptr_gpC);
3822 e->vdivsd(xmmA, xmmB, xmmC);
3823 e->vdivsd(xmmA, xmmB, anyptr_gpC);
3824 e->vdivss(xmmA, xmmB, xmmC);
3825 e->vdivss(xmmA, xmmB, anyptr_gpC);
3826 e->vexp2pd(zmmA, zmmB);
3827 e->vexp2pd(zmmA, anyptr_gpB);
3828 e->vexp2ps(zmmA, zmmB);
3829 e->vexp2ps(zmmA, anyptr_gpB);
3830 e->vexpandpd(xmmA, xmmB);
3831 e->vexpandpd(xmmA, anyptr_gpB);
3832 e->vexpandpd(ymmA, ymmB);
3833 e->vexpandpd(ymmA, anyptr_gpB);
3834 e->vexpandpd(zmmA, zmmB);
3835 e->vexpandpd(zmmA, anyptr_gpB);
3836 e->vexpandps(xmmA, xmmB);
3837 e->vexpandps(xmmA, anyptr_gpB);
3838 e->vexpandps(ymmA, ymmB);
3839 e->vexpandps(ymmA, anyptr_gpB);
3840 e->vexpandps(zmmA, zmmB);
3841 e->vexpandps(zmmA, anyptr_gpB);
3842 e->vextractf32x4(xmmA, ymmB, 0);
3843 e->vextractf32x4(anyptr_gpA, ymmB, 0);
3844 e->vextractf32x4(xmmA, zmmB, 0);
3845 e->vextractf32x4(anyptr_gpA, zmmB, 0);
3846 e->vextractf32x8(ymmA, zmmB, 0);
3847 e->vextractf32x8(anyptr_gpA, zmmB, 0);
3848 e->vextractf64x2(xmmA, ymmB, 0);
3849 e->vextractf64x2(anyptr_gpA, ymmB, 0);
3850 e->vextractf64x2(xmmA, zmmB, 0);
3851 e->vextractf64x2(anyptr_gpA, zmmB, 0);
3852 e->vextractf64x4(ymmA, zmmB, 0);
3853 e->vextractf64x4(anyptr_gpA, zmmB, 0);
3854 e->vextracti32x4(xmmA, ymmB, 0);
3855 e->vextracti32x4(anyptr_gpA, ymmB, 0);
3856 e->vextracti32x4(xmmA, zmmB, 0);
3857 e->vextracti32x4(anyptr_gpA, zmmB, 0);
3858 e->vextracti32x8(ymmA, zmmB, 0);
3859 e->vextracti32x8(anyptr_gpA, zmmB, 0);
3860 e->vextracti64x2(xmmA, ymmB, 0);
3861 e->vextracti64x2(anyptr_gpA, ymmB, 0);
3862 e->vextracti64x2(xmmA, zmmB, 0);
3863 e->vextracti64x2(anyptr_gpA, zmmB, 0);
3864 e->vextracti64x4(ymmA, zmmB, 0);
3865 e->vextracti64x4(anyptr_gpA, zmmB, 0);
3866 e->vextractps(gdA, xmmB, 0);
3867 e->vextractps(gzA, xmmB, 0);
3868 e->vextractps(anyptr_gpA, xmmB, 0);
3869 e->vfixupimmpd(xmmA, xmmB, xmmC, 0);
3870 e->vfixupimmpd(xmmA, xmmB, anyptr_gpC, 0);
3871 e->vfixupimmpd(ymmA, ymmB, ymmC, 0);
3872 e->vfixupimmpd(ymmA, ymmB, anyptr_gpC, 0);
3873 e->vfixupimmpd(zmmA, zmmB, zmmC, 0);
3874 e->vfixupimmpd(zmmA, zmmB, anyptr_gpC, 0);
3875 e->vfixupimmps(xmmA, xmmB, xmmC, 0);
3876 e->vfixupimmps(xmmA, xmmB, anyptr_gpC, 0);
3877 e->vfixupimmps(ymmA, ymmB, ymmC, 0);
3878 e->vfixupimmps(ymmA, ymmB, anyptr_gpC, 0);
3879 e->vfixupimmps(zmmA, zmmB, zmmC, 0);
3880 e->vfixupimmps(zmmA, zmmB, anyptr_gpC, 0);
3881 e->vfixupimmsd(xmmA, xmmB, xmmC, 0);
3882 e->vfixupimmsd(xmmA, xmmB, anyptr_gpC, 0);
3883 e->vfixupimmss(xmmA, xmmB, xmmC, 0);
3884 e->vfixupimmss(xmmA, xmmB, anyptr_gpC, 0);
3885 e->vfmadd132pd(xmmA, xmmB, xmmC);
3886 e->vfmadd132pd(xmmA, xmmB, anyptr_gpC);
3887 e->vfmadd132pd(ymmA, ymmB, ymmC);
3888 e->vfmadd132pd(ymmA, ymmB, anyptr_gpC);
3889 e->vfmadd132pd(zmmA, zmmB, zmmC);
3890 e->vfmadd132pd(zmmA, zmmB, anyptr_gpC);
3891 e->vfmadd132ps(xmmA, xmmB, xmmC);
3892 e->vfmadd132ps(xmmA, xmmB, anyptr_gpC);
3893 e->vfmadd132ps(ymmA, ymmB, ymmC);
3894 e->vfmadd132ps(ymmA, ymmB, anyptr_gpC);
3895 e->vfmadd132ps(zmmA, zmmB, zmmC);
3896 e->vfmadd132ps(zmmA, zmmB, anyptr_gpC);
3897 e->vfmadd132sd(xmmA, xmmB, xmmC);
3898 e->vfmadd132sd(xmmA, xmmB, anyptr_gpC);
3899 e->vfmadd132ss(xmmA, xmmB, xmmC);
3900 e->vfmadd132ss(xmmA, xmmB, anyptr_gpC);
3901 e->vfmadd213pd(xmmA, xmmB, xmmC);
3902 e->vfmadd213pd(xmmA, xmmB, anyptr_gpC);
3903 e->vfmadd213pd(ymmA, ymmB, ymmC);
3904 e->vfmadd213pd(ymmA, ymmB, anyptr_gpC);
3905 e->vfmadd213pd(zmmA, zmmB, zmmC);
3906 e->vfmadd213pd(zmmA, zmmB, anyptr_gpC);
3907 e->vfmadd213ps(xmmA, xmmB, xmmC);
3908 e->vfmadd213ps(xmmA, xmmB, anyptr_gpC);
3909 e->vfmadd213ps(ymmA, ymmB, ymmC);
3910 e->vfmadd213ps(ymmA, ymmB, anyptr_gpC);
3911 e->vfmadd213ps(zmmA, zmmB, zmmC);
3912 e->vfmadd213ps(zmmA, zmmB, anyptr_gpC);
3913 e->vfmadd213sd(xmmA, xmmB, xmmC);
3914 e->vfmadd213sd(xmmA, xmmB, anyptr_gpC);
3915 e->vfmadd213ss(xmmA, xmmB, xmmC);
3916 e->vfmadd213ss(xmmA, xmmB, anyptr_gpC);
3917 e->vfmadd231pd(xmmA, xmmB, xmmC);
3918 e->vfmadd231pd(xmmA, xmmB, anyptr_gpC);
3919 e->vfmadd231pd(ymmA, ymmB, ymmC);
3920 e->vfmadd231pd(ymmA, ymmB, anyptr_gpC);
3921 e->vfmadd231pd(zmmA, zmmB, zmmC);
3922 e->vfmadd231pd(zmmA, zmmB, anyptr_gpC);
3923 e->vfmadd231ps(xmmA, xmmB, xmmC);
3924 e->vfmadd231ps(xmmA, xmmB, anyptr_gpC);
3925 e->vfmadd231ps(ymmA, ymmB, ymmC);
3926 e->vfmadd231ps(ymmA, ymmB, anyptr_gpC);
3927 e->vfmadd231ps(zmmA, zmmB, zmmC);
3928 e->vfmadd231ps(zmmA, zmmB, anyptr_gpC);
3929 e->vfmadd231sd(xmmA, xmmB, xmmC);
3930 e->vfmadd231sd(xmmA, xmmB, anyptr_gpC);
3931 e->vfmadd231ss(xmmA, xmmB, xmmC);
3932 e->vfmadd231ss(xmmA, xmmB, anyptr_gpC);
3933 e->vfmaddsub132pd(xmmA, xmmB, xmmC);
3934 e->vfmaddsub132pd(xmmA, xmmB, anyptr_gpC);
3935 e->vfmaddsub132pd(ymmA, ymmB, ymmC);
3936 e->vfmaddsub132pd(ymmA, ymmB, anyptr_gpC);
3937 e->vfmaddsub132pd(zmmA, zmmB, zmmC);
3938 e->vfmaddsub132pd(zmmA, zmmB, anyptr_gpC);
3939 e->vfmaddsub132ps(xmmA, xmmB, xmmC);
3940 e->vfmaddsub132ps(xmmA, xmmB, anyptr_gpC);
3941 e->vfmaddsub132ps(ymmA, ymmB, ymmC);
3942 e->vfmaddsub132ps(ymmA, ymmB, anyptr_gpC);
3943 e->vfmaddsub132ps(zmmA, zmmB, zmmC);
3944 e->vfmaddsub132ps(zmmA, zmmB, anyptr_gpC);
3945 e->vfmaddsub213pd(xmmA, xmmB, xmmC);
3946 e->vfmaddsub213pd(xmmA, xmmB, anyptr_gpC);
3947 e->vfmaddsub213pd(ymmA, ymmB, ymmC);
3948 e->vfmaddsub213pd(ymmA, ymmB, anyptr_gpC);
3949 e->vfmaddsub213pd(zmmA, zmmB, zmmC);
3950 e->vfmaddsub213pd(zmmA, zmmB, anyptr_gpC);
3951 e->vfmaddsub213ps(xmmA, xmmB, xmmC);
3952 e->vfmaddsub213ps(xmmA, xmmB, anyptr_gpC);
3953 e->vfmaddsub213ps(ymmA, ymmB, ymmC);
3954 e->vfmaddsub213ps(ymmA, ymmB, anyptr_gpC);
3955 e->vfmaddsub213ps(zmmA, zmmB, zmmC);
3956 e->vfmaddsub213ps(zmmA, zmmB, anyptr_gpC);
3957 e->vfmaddsub231pd(xmmA, xmmB, xmmC);
3958 e->vfmaddsub231pd(xmmA, xmmB, anyptr_gpC);
3959 e->vfmaddsub231pd(ymmA, ymmB, ymmC);
3960 e->vfmaddsub231pd(ymmA, ymmB, anyptr_gpC);
3961 e->vfmaddsub231pd(zmmA, zmmB, zmmC);
3962 e->vfmaddsub231pd(zmmA, zmmB, anyptr_gpC);
3963 e->vfmaddsub231ps(xmmA, xmmB, xmmC);
3964 e->vfmaddsub231ps(xmmA, xmmB, anyptr_gpC);
3965 e->vfmaddsub231ps(ymmA, ymmB, ymmC);
3966 e->vfmaddsub231ps(ymmA, ymmB, anyptr_gpC);
3967 e->vfmaddsub231ps(zmmA, zmmB, zmmC);
3968 e->vfmaddsub231ps(zmmA, zmmB, anyptr_gpC);
3969 e->vfmsub132pd(xmmA, xmmB, xmmC);
3970 e->vfmsub132pd(xmmA, xmmB, anyptr_gpC);
3971 e->vfmsub132pd(ymmA, ymmB, ymmC);
3972 e->vfmsub132pd(ymmA, ymmB, anyptr_gpC);
3973 e->vfmsub132pd(zmmA, zmmB, zmmC);
3974 e->vfmsub132pd(zmmA, zmmB, anyptr_gpC);
3975 e->vfmsub132ps(xmmA, xmmB, xmmC);
3976 e->vfmsub132ps(xmmA, xmmB, anyptr_gpC);
3977 e->vfmsub132ps(ymmA, ymmB, ymmC);
3978 e->vfmsub132ps(ymmA, ymmB, anyptr_gpC);
3979 e->vfmsub132ps(zmmA, zmmB, zmmC);
3980 e->vfmsub132ps(zmmA, zmmB, anyptr_gpC);
3981 e->vfmsub132sd(xmmA, xmmB, xmmC);
3982 e->vfmsub132sd(xmmA, xmmB, anyptr_gpC);
3983 e->vfmsub132ss(xmmA, xmmB, xmmC);
3984 e->vfmsub132ss(xmmA, xmmB, anyptr_gpC);
3985 e->vfmsub213pd(xmmA, xmmB, xmmC);
3986 e->vfmsub213pd(xmmA, xmmB, anyptr_gpC);
3987 e->vfmsub213pd(ymmA, ymmB, ymmC);
3988 e->vfmsub213pd(ymmA, ymmB, anyptr_gpC);
3989 e->vfmsub213pd(zmmA, zmmB, zmmC);
3990 e->vfmsub213pd(zmmA, zmmB, anyptr_gpC);
3991 e->vfmsub213ps(xmmA, xmmB, xmmC);
3992 e->vfmsub213ps(xmmA, xmmB, anyptr_gpC);
3993 e->vfmsub213ps(ymmA, ymmB, ymmC);
3994 e->vfmsub213ps(ymmA, ymmB, anyptr_gpC);
3995 e->vfmsub213ps(zmmA, zmmB, zmmC);
3996 e->vfmsub213ps(zmmA, zmmB, anyptr_gpC);
3997 e->vfmsub213sd(xmmA, xmmB, xmmC);
3998 e->vfmsub213sd(xmmA, xmmB, anyptr_gpC);
3999 e->vfmsub213ss(xmmA, xmmB, xmmC);
4000 e->vfmsub213ss(xmmA, xmmB, anyptr_gpC);
4001 e->vfmsub231pd(xmmA, xmmB, xmmC);
4002 e->vfmsub231pd(xmmA, xmmB, anyptr_gpC);
4003 e->vfmsub231pd(ymmA, ymmB, ymmC);
4004 e->vfmsub231pd(ymmA, ymmB, anyptr_gpC);
4005 e->vfmsub231pd(zmmA, zmmB, zmmC);
4006 e->vfmsub231pd(zmmA, zmmB, anyptr_gpC);
4007 e->vfmsub231ps(xmmA, xmmB, xmmC);
4008 e->vfmsub231ps(xmmA, xmmB, anyptr_gpC);
4009 e->vfmsub231ps(ymmA, ymmB, ymmC);
4010 e->vfmsub231ps(ymmA, ymmB, anyptr_gpC);
4011 e->vfmsub231ps(zmmA, zmmB, zmmC);
4012 e->vfmsub231ps(zmmA, zmmB, anyptr_gpC);
4013 e->vfmsub231sd(xmmA, xmmB, xmmC);
4014 e->vfmsub231sd(xmmA, xmmB, anyptr_gpC);
4015 e->vfmsub231ss(xmmA, xmmB, xmmC);
4016 e->vfmsub231ss(xmmA, xmmB, anyptr_gpC);
4017 e->vfmsubadd132pd(xmmA, xmmB, xmmC);
4018 e->vfmsubadd132pd(xmmA, xmmB, anyptr_gpC);
4019 e->vfmsubadd132pd(ymmA, ymmB, ymmC);
4020 e->vfmsubadd132pd(ymmA, ymmB, anyptr_gpC);
4021 e->vfmsubadd132pd(zmmA, zmmB, zmmC);
4022 e->vfmsubadd132pd(zmmA, zmmB, anyptr_gpC);
4023 e->vfmsubadd132ps(xmmA, xmmB, xmmC);
4024 e->vfmsubadd132ps(xmmA, xmmB, anyptr_gpC);
4025 e->vfmsubadd132ps(ymmA, ymmB, ymmC);
4026 e->vfmsubadd132ps(ymmA, ymmB, anyptr_gpC);
4027 e->vfmsubadd132ps(zmmA, zmmB, zmmC);
4028 e->vfmsubadd132ps(zmmA, zmmB, anyptr_gpC);
4029 e->vfmsubadd213pd(xmmA, xmmB, xmmC);
4030 e->vfmsubadd213pd(xmmA, xmmB, anyptr_gpC);
4031 e->vfmsubadd213pd(ymmA, ymmB, ymmC);
4032 e->vfmsubadd213pd(ymmA, ymmB, anyptr_gpC);
4033 e->vfmsubadd213pd(zmmA, zmmB, zmmC);
4034 e->vfmsubadd213pd(zmmA, zmmB, anyptr_gpC);
4035 e->vfmsubadd213ps(xmmA, xmmB, xmmC);
4036 e->vfmsubadd213ps(xmmA, xmmB, anyptr_gpC);
4037 e->vfmsubadd213ps(ymmA, ymmB, ymmC);
4038 e->vfmsubadd213ps(ymmA, ymmB, anyptr_gpC);
4039 e->vfmsubadd213ps(zmmA, zmmB, zmmC);
4040 e->vfmsubadd213ps(zmmA, zmmB, anyptr_gpC);
4041 e->vfmsubadd231pd(xmmA, xmmB, xmmC);
4042 e->vfmsubadd231pd(xmmA, xmmB, anyptr_gpC);
4043 e->vfmsubadd231pd(ymmA, ymmB, ymmC);
4044 e->vfmsubadd231pd(ymmA, ymmB, anyptr_gpC);
4045 e->vfmsubadd231pd(zmmA, zmmB, zmmC);
4046 e->vfmsubadd231pd(zmmA, zmmB, anyptr_gpC);
4047 e->vfmsubadd231ps(xmmA, xmmB, xmmC);
4048 e->vfmsubadd231ps(xmmA, xmmB, anyptr_gpC);
4049 e->vfmsubadd231ps(ymmA, ymmB, ymmC);
4050 e->vfmsubadd231ps(ymmA, ymmB, anyptr_gpC);
4051 e->vfmsubadd231ps(zmmA, zmmB, zmmC);
4052 e->vfmsubadd231ps(zmmA, zmmB, anyptr_gpC);
4053 e->vfnmadd132pd(xmmA, xmmB, xmmC);
4054 e->vfnmadd132pd(xmmA, xmmB, anyptr_gpC);
4055 e->vfnmadd132pd(ymmA, ymmB, ymmC);
4056 e->vfnmadd132pd(ymmA, ymmB, anyptr_gpC);
4057 e->vfnmadd132pd(zmmA, zmmB, zmmC);
4058 e->vfnmadd132pd(zmmA, zmmB, anyptr_gpC);
4059 e->vfnmadd132ps(xmmA, xmmB, xmmC);
4060 e->vfnmadd132ps(xmmA, xmmB, anyptr_gpC);
4061 e->vfnmadd132ps(ymmA, ymmB, ymmC);
4062 e->vfnmadd132ps(ymmA, ymmB, anyptr_gpC);
4063 e->vfnmadd132ps(zmmA, zmmB, zmmC);
4064 e->vfnmadd132ps(zmmA, zmmB, anyptr_gpC);
4065 e->vfnmadd132sd(xmmA, xmmB, xmmC);
4066 e->vfnmadd132sd(xmmA, xmmB, anyptr_gpC);
4067 e->vfnmadd132ss(xmmA, xmmB, xmmC);
4068 e->vfnmadd132ss(xmmA, xmmB, anyptr_gpC);
4069 e->vfnmadd213pd(xmmA, xmmB, xmmC);
4070 e->vfnmadd213pd(xmmA, xmmB, anyptr_gpC);
4071 e->vfnmadd213pd(ymmA, ymmB, ymmC);
4072 e->vfnmadd213pd(ymmA, ymmB, anyptr_gpC);
4073 e->vfnmadd213pd(zmmA, zmmB, zmmC);
4074 e->vfnmadd213pd(zmmA, zmmB, anyptr_gpC);
4075 e->vfnmadd213ps(xmmA, xmmB, xmmC);
4076 e->vfnmadd213ps(xmmA, xmmB, anyptr_gpC);
4077 e->vfnmadd213ps(ymmA, ymmB, ymmC);
4078 e->vfnmadd213ps(ymmA, ymmB, anyptr_gpC);
4079 e->vfnmadd213ps(zmmA, zmmB, zmmC);
4080 e->vfnmadd213ps(zmmA, zmmB, anyptr_gpC);
4081 e->vfnmadd213sd(xmmA, xmmB, xmmC);
4082 e->vfnmadd213sd(xmmA, xmmB, anyptr_gpC);
4083 e->vfnmadd213ss(xmmA, xmmB, xmmC);
4084 e->vfnmadd213ss(xmmA, xmmB, anyptr_gpC);
4085 e->vfnmadd231pd(xmmA, xmmB, xmmC);
4086 e->vfnmadd231pd(xmmA, xmmB, anyptr_gpC);
4087 e->vfnmadd231pd(ymmA, ymmB, ymmC);
4088 e->vfnmadd231pd(ymmA, ymmB, anyptr_gpC);
4089 e->vfnmadd231pd(zmmA, zmmB, zmmC);
4090 e->vfnmadd231pd(zmmA, zmmB, anyptr_gpC);
4091 e->vfnmadd231ps(xmmA, xmmB, xmmC);
4092 e->vfnmadd231ps(xmmA, xmmB, anyptr_gpC);
4093 e->vfnmadd231ps(ymmA, ymmB, ymmC);
4094 e->vfnmadd231ps(ymmA, ymmB, anyptr_gpC);
4095 e->vfnmadd231ps(zmmA, zmmB, zmmC);
4096 e->vfnmadd231ps(zmmA, zmmB, anyptr_gpC);
4097 e->vfnmadd231sd(xmmA, xmmB, xmmC);
4098 e->vfnmadd231sd(xmmA, xmmB, anyptr_gpC);
4099 e->vfnmadd231ss(xmmA, xmmB, xmmC);
4100 e->vfnmadd231ss(xmmA, xmmB, anyptr_gpC);
4101 e->vfnmsub132pd(xmmA, xmmB, xmmC);
4102 e->vfnmsub132pd(xmmA, xmmB, anyptr_gpC);
4103 e->vfnmsub132pd(ymmA, ymmB, ymmC);
4104 e->vfnmsub132pd(ymmA, ymmB, anyptr_gpC);
4105 e->vfnmsub132pd(zmmA, zmmB, zmmC);
4106 e->vfnmsub132pd(zmmA, zmmB, anyptr_gpC);
4107 e->vfnmsub132ps(xmmA, xmmB, xmmC);
4108 e->vfnmsub132ps(xmmA, xmmB, anyptr_gpC);
4109 e->vfnmsub132ps(ymmA, ymmB, ymmC);
4110 e->vfnmsub132ps(ymmA, ymmB, anyptr_gpC);
4111 e->vfnmsub132ps(zmmA, zmmB, zmmC);
4112 e->vfnmsub132ps(zmmA, zmmB, anyptr_gpC);
4113 e->vfnmsub132sd(xmmA, xmmB, xmmC);
4114 e->vfnmsub132sd(xmmA, xmmB, anyptr_gpC);
4115 e->vfnmsub132ss(xmmA, xmmB, xmmC);
4116 e->vfnmsub132ss(xmmA, xmmB, anyptr_gpC);
4117 e->vfnmsub213pd(xmmA, xmmB, xmmC);
4118 e->vfnmsub213pd(xmmA, xmmB, anyptr_gpC);
4119 e->vfnmsub213pd(ymmA, ymmB, ymmC);
4120 e->vfnmsub213pd(ymmA, ymmB, anyptr_gpC);
4121 e->vfnmsub213pd(zmmA, zmmB, zmmC);
4122 e->vfnmsub213pd(zmmA, zmmB, anyptr_gpC);
4123 e->vfnmsub213ps(xmmA, xmmB, xmmC);
4124 e->vfnmsub213ps(xmmA, xmmB, anyptr_gpC);
4125 e->vfnmsub213ps(ymmA, ymmB, ymmC);
4126 e->vfnmsub213ps(ymmA, ymmB, anyptr_gpC);
4127 e->vfnmsub213ps(zmmA, zmmB, zmmC);
4128 e->vfnmsub213ps(zmmA, zmmB, anyptr_gpC);
4129 e->vfnmsub213sd(xmmA, xmmB, xmmC);
4130 e->vfnmsub213sd(xmmA, xmmB, anyptr_gpC);
4131 e->vfnmsub213ss(xmmA, xmmB, xmmC);
4132 e->vfnmsub213ss(xmmA, xmmB, anyptr_gpC);
4133 e->vfnmsub231pd(xmmA, xmmB, xmmC);
4134 e->vfnmsub231pd(xmmA, xmmB, anyptr_gpC);
4135 e->vfnmsub231pd(ymmA, ymmB, ymmC);
4136 e->vfnmsub231pd(ymmA, ymmB, anyptr_gpC);
4137 e->vfnmsub231pd(zmmA, zmmB, zmmC);
4138 e->vfnmsub231pd(zmmA, zmmB, anyptr_gpC);
4139 e->vfnmsub231ps(xmmA, xmmB, xmmC);
4140 e->vfnmsub231ps(xmmA, xmmB, anyptr_gpC);
4141 e->vfnmsub231ps(ymmA, ymmB, ymmC);
4142 e->vfnmsub231ps(ymmA, ymmB, anyptr_gpC);
4143 e->vfnmsub231ps(zmmA, zmmB, zmmC);
4144 e->vfnmsub231ps(zmmA, zmmB, anyptr_gpC);
4145 e->vfnmsub231sd(xmmA, xmmB, xmmC);
4146 e->vfnmsub231sd(xmmA, xmmB, anyptr_gpC);
4147 e->vfnmsub231ss(xmmA, xmmB, xmmC);
4148 e->vfnmsub231ss(xmmA, xmmB, anyptr_gpC);
4149 e->vfpclasspd(kA, xmmB, 0);
4150 e->vfpclasspd(kA, anyptr_gpB, 0);
4151 e->vfpclasspd(kA, ymmB, 0);
4152 e->vfpclasspd(kA, anyptr_gpB, 0);
4153 e->vfpclasspd(kA, zmmB, 0);
4154 e->vfpclasspd(kA, anyptr_gpB, 0);
4155 e->vfpclassps(kA, xmmB, 0);
4156 e->vfpclassps(kA, anyptr_gpB, 0);
4157 e->vfpclassps(kA, ymmB, 0);
4158 e->vfpclassps(kA, anyptr_gpB, 0);
4159 e->vfpclassps(kA, zmmB, 0);
4160 e->vfpclassps(kA, anyptr_gpB, 0);
4161 e->vfpclasssd(kA, xmmB, 0);
4162 e->vfpclasssd(kA, anyptr_gpB, 0);
4163 e->vfpclassss(kA, xmmB, 0);
4164 e->vfpclassss(kA, anyptr_gpB, 0);
4165 e->vgatherdpd(xmmA, vx_ptr);
4166 e->vgatherdpd(ymmA, vy_ptr);
4167 e->vgatherdpd(zmmA, vz_ptr);
4168 e->vgatherdps(xmmA, vx_ptr);
4169 e->vgatherdps(ymmA, vy_ptr);
4170 e->vgatherdps(zmmA, vz_ptr);
4171 e->vgatherpf0dpd(vy_ptr);
4172 e->vgatherpf0dps(vz_ptr);
4173 e->vgatherpf0qpd(vz_ptr);
4174 e->vgatherpf0qps(vz_ptr);
4175 e->vgatherpf1dpd(vy_ptr);
4176 e->vgatherpf1dps(vz_ptr);
4177 e->vgatherpf1qpd(vz_ptr);
4178 e->vgatherpf1qps(vz_ptr);
4179 e->vgatherqpd(xmmA, vx_ptr);
4180 e->vgatherqpd(ymmA, vy_ptr);
4181 e->vgatherqpd(zmmA, vz_ptr);
4182 e->vgatherqps(xmmA, vx_ptr);
4183 e->vgatherqps(ymmA, vy_ptr);
4184 e->vgatherqps(zmmA, vz_ptr);
4185 e->vgetexppd(xmmA, xmmB);
4186 e->vgetexppd(xmmA, anyptr_gpB);
4187 e->vgetexppd(ymmA, ymmB);
4188 e->vgetexppd(ymmA, anyptr_gpB);
4189 e->vgetexppd(zmmA, zmmB);
4190 e->vgetexppd(zmmA, anyptr_gpB);
4191 e->vgetexpps(xmmA, xmmB);
4192 e->vgetexpps(xmmA, anyptr_gpB);
4193 e->vgetexpps(ymmA, ymmB);
4194 e->vgetexpps(ymmA, anyptr_gpB);
4195 e->vgetexpps(zmmA, zmmB);
4196 e->vgetexpps(zmmA, anyptr_gpB);
4197 e->vgetexpsd(xmmA, xmmB, xmmC);
4198 e->vgetexpsd(xmmA, xmmB, anyptr_gpB);
4199 e->vgetexpss(xmmA, xmmB, xmmC);
4200 e->vgetexpss(xmmA, xmmB, anyptr_gpB);
4201 e->vgetmantpd(xmmA, xmmB, 0);
4202 e->vgetmantpd(xmmA, anyptr_gpB, 0);
4203 e->vgetmantpd(ymmA, ymmB, 0);
4204 e->vgetmantpd(ymmA, anyptr_gpB, 0);
4205 e->vgetmantpd(zmmA, zmmB, 0);
4206 e->vgetmantpd(zmmA, anyptr_gpB, 0);
4207 e->vgetmantps(xmmA, xmmB, 0);
4208 e->vgetmantps(xmmA, anyptr_gpB, 0);
4209 e->vgetmantps(ymmA, ymmB, 0);
4210 e->vgetmantps(ymmA, anyptr_gpB, 0);
4211 e->vgetmantps(zmmA, zmmB, 0);
4212 e->vgetmantps(zmmA, anyptr_gpB, 0);
4213 e->vgetmantsd(xmmA, xmmB, xmmC, 0);
4214 e->vgetmantsd(xmmA, xmmB, anyptr_gpB, 0);
4215 e->vgetmantss(xmmA, xmmB, xmmC, 0);
4216 e->vgetmantss(xmmA, xmmB, anyptr_gpB, 0);
4217 e->vinsertf32x4(ymmA, ymmB, xmmC, 0);
4218 e->vinsertf32x4(ymmA, ymmB, anyptr_gpC, 0);
4219 e->vinsertf32x4(zmmA, zmmB, xmmC, 0);
4220 e->vinsertf32x4(zmmA, zmmB, anyptr_gpC, 0);
4221 e->vinsertf32x8(zmmA, zmmB, ymmC, 0);
4222 e->vinsertf32x8(zmmA, zmmB, anyptr_gpC, 0);
4223 e->vinsertf64x2(ymmA, ymmB, xmmC, 0);
4224 e->vinsertf64x2(ymmA, ymmB, anyptr_gpC, 0);
4225 e->vinsertf64x2(zmmA, zmmB, xmmC, 0);
4226 e->vinsertf64x2(zmmA, zmmB, anyptr_gpC, 0);
4227 e->vinsertf64x4(zmmA, zmmB, ymmC, 0);
4228 e->vinsertf64x4(zmmA, zmmB, anyptr_gpC, 0);
4229 e->vinserti32x4(ymmA, ymmB, xmmC, 0);
4230 e->vinserti32x4(ymmA, ymmB, anyptr_gpC, 0);
4231 e->vinserti32x4(zmmA, zmmB, xmmC, 0);
4232 e->vinserti32x4(zmmA, zmmB, anyptr_gpC, 0);
4233 e->vinserti32x8(zmmA, zmmB, ymmC, 0);
4234 e->vinserti32x8(zmmA, zmmB, anyptr_gpC, 0);
4235 e->vinserti64x2(ymmA, ymmB, xmmC, 0);
4236 e->vinserti64x2(ymmA, ymmB, anyptr_gpC, 0);
4237 e->vinserti64x2(zmmA, zmmB, xmmC, 0);
4238 e->vinserti64x2(zmmA, zmmB, anyptr_gpC, 0);
4239 e->vinserti64x4(zmmA, zmmB, ymmC, 0);
4240 e->vinserti64x4(zmmA, zmmB, anyptr_gpC, 0);
4241 e->vinsertps(xmmA, xmmB, xmmC, 0);
4242 e->vinsertps(xmmA, xmmB, anyptr_gpC, 0);
4243 e->vmaxpd(xmmA, xmmB, xmmC);
4244 e->vmaxpd(xmmA, xmmB, anyptr_gpC);
4245 e->vmaxpd(ymmA, ymmB, ymmC);
4246 e->vmaxpd(ymmA, ymmB, anyptr_gpC);
4247 e->vmaxpd(zmmA, zmmB, zmmC);
4248 e->vmaxpd(zmmA, zmmB, anyptr_gpC);
4249 e->vmaxps(xmmA, xmmB, xmmC);
4250 e->vmaxps(xmmA, xmmB, anyptr_gpC);
4251 e->vmaxps(ymmA, ymmB, ymmC);
4252 e->vmaxps(ymmA, ymmB, anyptr_gpC);
4253 e->vmaxps(zmmA, zmmB, zmmC);
4254 e->vmaxps(zmmA, zmmB, anyptr_gpC);
4255 e->vmaxsd(xmmA, xmmB, xmmC);
4256 e->vmaxsd(xmmA, xmmB, anyptr_gpC);
4257 e->vmaxss(xmmA, xmmB, xmmC);
4258 e->vmaxss(xmmA, xmmB, anyptr_gpC);
4259 e->vminpd(xmmA, xmmB, xmmC);
4260 e->vminpd(xmmA, xmmB, anyptr_gpC);
4261 e->vminpd(ymmA, ymmB, ymmC);
4262 e->vminpd(ymmA, ymmB, anyptr_gpC);
4263 e->vminpd(zmmA, zmmB, zmmC);
4264 e->vminpd(zmmA, zmmB, anyptr_gpC);
4265 e->vminps(xmmA, xmmB, xmmC);
4266 e->vminps(xmmA, xmmB, anyptr_gpC);
4267 e->vminps(ymmA, ymmB, ymmC);
4268 e->vminps(ymmA, ymmB, anyptr_gpC);
4269 e->vminps(zmmA, zmmB, zmmC);
4270 e->vminps(zmmA, zmmB, anyptr_gpC);
4271 e->vminsd(xmmA, xmmB, xmmC);
4272 e->vminsd(xmmA, xmmB, anyptr_gpC);
4273 e->vminss(xmmA, xmmB, xmmC);
4274 e->vminss(xmmA, xmmB, anyptr_gpC);
4275 e->vmovapd(xmmA, xmmB);
4276 e->vmovapd(xmmA, anyptr_gpB);
4277 e->vmovapd(xmmA, xmmB);
4278 e->vmovapd(anyptr_gpA, xmmB);
4279 e->vmovapd(ymmA, ymmB);
4280 e->vmovapd(ymmA, anyptr_gpB);
4281 e->vmovapd(ymmA, ymmB);
4282 e->vmovapd(anyptr_gpA, ymmB);
4283 e->vmovapd(zmmA, zmmB);
4284 e->vmovapd(zmmA, anyptr_gpB);
4285 e->vmovapd(zmmA, zmmB);
4286 e->vmovapd(anyptr_gpA, zmmB);
4287 e->vmovaps(xmmA, xmmB);
4288 e->vmovaps(xmmA, anyptr_gpB);
4289 e->vmovaps(xmmA, xmmB);
4290 e->vmovaps(anyptr_gpA, xmmB);
4291 e->vmovaps(ymmA, ymmB);
4292 e->vmovaps(ymmA, anyptr_gpB);
4293 e->vmovaps(ymmA, ymmB);
4294 e->vmovaps(anyptr_gpA, ymmB);
4295 e->vmovaps(zmmA, zmmB);
4296 e->vmovaps(zmmA, anyptr_gpB);
4297 e->vmovaps(zmmA, zmmB);
4298 e->vmovaps(anyptr_gpA, zmmB);
4299 e->vmovd(gdA, xmmB);
4300 e->vmovd(gzA, xmmB);
4301 e->vmovd(anyptr_gpA, xmmB);
4302 e->vmovd(xmmA, gdB);
4303 e->vmovd(xmmA, gzB);
4304 e->vmovd(xmmA, anyptr_gpB);
4305 e->vmovddup(xmmA, xmmB);
4306 e->vmovddup(xmmA, anyptr_gpB);
4307 e->vmovddup(ymmA, ymmB);
4308 e->vmovddup(ymmA, anyptr_gpB);
4309 e->vmovddup(zmmA, zmmB);
4310 e->vmovddup(zmmA, anyptr_gpB);
4311 e->vmovdqa32(xmmA, xmmB);
4312 e->vmovdqa32(xmmA, anyptr_gpB);
4313 e->vmovdqa32(xmmA, xmmB);
4314 e->vmovdqa32(anyptr_gpA, xmmB);
4315 e->vmovdqa32(ymmA, ymmB);
4316 e->vmovdqa32(ymmA, anyptr_gpB);
4317 e->vmovdqa32(ymmA, ymmB);
4318 e->vmovdqa32(anyptr_gpA, ymmB);
4319 e->vmovdqa32(zmmA, zmmB);
4320 e->vmovdqa32(zmmA, anyptr_gpB);
4321 e->vmovdqa32(zmmA, zmmB);
4322 e->vmovdqa32(anyptr_gpA, zmmB);
4323 e->vmovdqa64(xmmA, xmmB);
4324 e->vmovdqa64(xmmA, anyptr_gpB);
4325 e->vmovdqa64(xmmA, xmmB);
4326 e->vmovdqa64(anyptr_gpA, xmmB);
4327 e->vmovdqa64(ymmA, ymmB);
4328 e->vmovdqa64(ymmA, anyptr_gpB);
4329 e->vmovdqa64(ymmA, ymmB);
4330 e->vmovdqa64(anyptr_gpA, ymmB);
4331 e->vmovdqa64(zmmA, zmmB);
4332 e->vmovdqa64(zmmA, anyptr_gpB);
4333 e->vmovdqa64(zmmA, zmmB);
4334 e->vmovdqa64(anyptr_gpA, zmmB);
4335 e->vmovdqu16(xmmA, xmmB);
4336 e->vmovdqu16(xmmA, anyptr_gpB);
4337 e->vmovdqu16(xmmA, xmmB);
4338 e->vmovdqu16(anyptr_gpA, xmmB);
4339 e->vmovdqu16(ymmA, ymmB);
4340 e->vmovdqu16(ymmA, anyptr_gpB);
4341 e->vmovdqu16(ymmA, ymmB);
4342 e->vmovdqu16(anyptr_gpA, ymmB);
4343 e->vmovdqu16(zmmA, zmmB);
4344 e->vmovdqu16(zmmA, anyptr_gpB);
4345 e->vmovdqu16(zmmA, zmmB);
4346 e->vmovdqu16(anyptr_gpA, zmmB);
4347 e->vmovdqu32(xmmA, xmmB);
4348 e->vmovdqu32(xmmA, anyptr_gpB);
4349 e->vmovdqu32(xmmA, xmmB);
4350 e->vmovdqu32(anyptr_gpA, xmmB);
4351 e->vmovdqu32(ymmA, ymmB);
4352 e->vmovdqu32(ymmA, anyptr_gpB);
4353 e->vmovdqu32(ymmA, ymmB);
4354 e->vmovdqu32(anyptr_gpA, ymmB);
4355 e->vmovdqu32(zmmA, zmmB);
4356 e->vmovdqu32(zmmA, anyptr_gpB);
4357 e->vmovdqu32(zmmA, zmmB);
4358 e->vmovdqu32(anyptr_gpA, zmmB);
4359 e->vmovdqu64(xmmA, xmmB);
4360 e->vmovdqu64(xmmA, anyptr_gpB);
4361 e->vmovdqu64(xmmA, xmmB);
4362 e->vmovdqu64(anyptr_gpA, xmmB);
4363 e->vmovdqu64(ymmA, ymmB);
4364 e->vmovdqu64(ymmA, anyptr_gpB);
4365 e->vmovdqu64(ymmA, ymmB);
4366 e->vmovdqu64(anyptr_gpA, ymmB);
4367 e->vmovdqu64(zmmA, zmmB);
4368 e->vmovdqu64(zmmA, anyptr_gpB);
4369 e->vmovdqu64(zmmA, zmmB);
4370 e->vmovdqu64(anyptr_gpA, zmmB);
4371 e->vmovdqu8(xmmA, xmmB);
4372 e->vmovdqu8(xmmA, anyptr_gpB);
4373 e->vmovdqu8(xmmA, xmmB);
4374 e->vmovdqu8(anyptr_gpA, xmmB);
4375 e->vmovdqu8(ymmA, ymmB);
4376 e->vmovdqu8(ymmA, anyptr_gpB);
4377 e->vmovdqu8(ymmA, ymmB);
4378 e->vmovdqu8(anyptr_gpA, ymmB);
4379 e->vmovdqu8(zmmA, zmmB);
4380 e->vmovdqu8(zmmA, anyptr_gpB);
4381 e->vmovdqu8(zmmA, zmmB);
4382 e->vmovdqu8(anyptr_gpA, zmmB);
4383 e->vmovhlps(xmmA, xmmB, xmmC);
4384 e->vmovhpd(anyptr_gpA, xmmB);
4385 e->vmovhpd(xmmA, xmmB, anyptr_gpC);
4386 e->vmovhps(anyptr_gpA, xmmB);
4387 e->vmovhps(xmmA, xmmB, anyptr_gpC);
4388 e->vmovlhps(xmmA, xmmB, xmmC);
4389 e->vmovlpd(anyptr_gpA, xmmB);
4390 e->vmovlpd(xmmA, xmmB, anyptr_gpC);
4391 e->vmovlps(anyptr_gpA, xmmB);
4392 e->vmovlps(xmmA, xmmB, anyptr_gpC);
4393 e->vmovntdq(anyptr_gpA, xmmB);
4394 e->vmovntdq(anyptr_gpA, ymmB);
4395 e->vmovntdq(anyptr_gpA, zmmB);
4396 e->vmovntdqa(xmmA, anyptr_gpB);
4397 e->vmovntdqa(ymmA, anyptr_gpB);
4398 e->vmovntdqa(zmmA, anyptr_gpB);
4399 e->vmovntpd(anyptr_gpA, xmmB);
4400 e->vmovntpd(anyptr_gpA, ymmB);
4401 e->vmovntpd(anyptr_gpA, zmmB);
4402 e->vmovntps(anyptr_gpA, xmmB);
4403 e->vmovntps(anyptr_gpA, ymmB);
4404 e->vmovntps(anyptr_gpA, zmmB);
4405 if (isX64) e->vmovq(gzA, xmmB);
4406 if (isX64) e->vmovq(xmmA, gzB);
4407 e->vmovq(anyptr_gpA, xmmB);
4408 e->vmovq(xmmA, anyptr_gpB);
4409 e->vmovq(xmmA, xmmB);
4410 e->vmovq(xmmA, anyptr_gpB);
4411 e->vmovq(xmmA, xmmB);
4412 e->vmovq(anyptr_gpA, xmmB);
4413 e->vmovsd(anyptr_gpA, xmmB);
4414 e->vmovsd(xmmA, anyptr_gpB);
4415 e->vmovsd(xmmA, xmmB, xmmC);
4416 e->vmovsd(xmmA, xmmB, xmmC);
4417 e->vmovshdup(xmmA, xmmB);
4418 e->vmovshdup(xmmA, anyptr_gpB);
4419 e->vmovshdup(ymmA, ymmB);
4420 e->vmovshdup(ymmA, anyptr_gpB);
4421 e->vmovshdup(zmmA, zmmB);
4422 e->vmovshdup(zmmA, anyptr_gpB);
4423 e->vmovsldup(xmmA, xmmB);
4424 e->vmovsldup(xmmA, anyptr_gpB);
4425 e->vmovsldup(ymmA, ymmB);
4426 e->vmovsldup(ymmA, anyptr_gpB);
4427 e->vmovsldup(zmmA, zmmB);
4428 e->vmovsldup(zmmA, anyptr_gpB);
4429 e->vmovss(anyptr_gpA, xmmB);
4430 e->vmovss(xmmA, anyptr_gpB);
4431 e->vmovss(xmmA, xmmB, xmmC);
4432 e->vmovss(xmmA, xmmB, xmmC);
4433 e->vmovupd(xmmA, xmmB);
4434 e->vmovupd(xmmA, anyptr_gpB);
4435 e->vmovupd(xmmA, xmmB);
4436 e->vmovupd(anyptr_gpA, xmmB);
4437 e->vmovupd(ymmA, ymmB);
4438 e->vmovupd(ymmA, anyptr_gpB);
4439 e->vmovupd(ymmA, ymmB);
4440 e->vmovupd(anyptr_gpA, ymmB);
4441 e->vmovupd(zmmA, zmmB);
4442 e->vmovupd(zmmA, anyptr_gpB);
4443 e->vmovupd(zmmA, zmmB);
4444 e->vmovupd(anyptr_gpA, zmmB);
4445 e->vmovups(xmmA, xmmB);
4446 e->vmovups(xmmA, anyptr_gpB);
4447 e->vmovups(xmmA, xmmB);
4448 e->vmovups(anyptr_gpA, xmmB);
4449 e->vmovups(ymmA, ymmB);
4450 e->vmovups(ymmA, anyptr_gpB);
4451 e->vmovups(ymmA, ymmB);
4452 e->vmovups(anyptr_gpA, ymmB);
4453 e->vmovups(zmmA, zmmB);
4454 e->vmovups(zmmA, anyptr_gpB);
4455 e->vmovups(zmmA, zmmB);
4456 e->vmovups(anyptr_gpA, zmmB);
4457 e->vmulpd(xmmA, xmmB, xmmC);
4458 e->vmulpd(xmmA, xmmB, anyptr_gpC);
4459 e->vmulpd(ymmA, ymmB, ymmC);
4460 e->vmulpd(ymmA, ymmB, anyptr_gpC);
4461 e->vmulpd(zmmA, zmmB, zmmC);
4462 e->vmulpd(zmmA, zmmB, anyptr_gpC);
4463 e->vmulps(xmmA, xmmB, xmmC);
4464 e->vmulps(xmmA, xmmB, anyptr_gpC);
4465 e->vmulps(ymmA, ymmB, ymmC);
4466 e->vmulps(ymmA, ymmB, anyptr_gpC);
4467 e->vmulps(zmmA, zmmB, zmmC);
4468 e->vmulps(zmmA, zmmB, anyptr_gpC);
4469 e->vmulsd(xmmA, xmmB, xmmC);
4470 e->vmulsd(xmmA, xmmB, anyptr_gpC);
4471 e->vmulss(xmmA, xmmB, xmmC);
4472 e->vmulss(xmmA, xmmB, anyptr_gpC);
4473 e->vorpd(xmmA, xmmB, xmmC);
4474 e->vorpd(xmmA, xmmB, anyptr_gpC);
4475 e->vorpd(ymmA, ymmB, ymmC);
4476 e->vorpd(ymmA, ymmB, anyptr_gpC);
4477 e->vorpd(zmmA, zmmB, zmmC);
4478 e->vorpd(zmmA, zmmB, anyptr_gpC);
4479 e->vorps(xmmA, xmmB, xmmC);
4480 e->vorps(xmmA, xmmB, anyptr_gpC);
4481 e->vorps(ymmA, ymmB, ymmC);
4482 e->vorps(ymmA, ymmB, anyptr_gpC);
4483 e->vorps(zmmA, zmmB, zmmC);
4484 e->vorps(zmmA, zmmB, anyptr_gpC);
4485 e->vpabsb(xmmA, xmmB);
4486 e->vpabsb(xmmA, anyptr_gpB);
4487 e->vpabsb(ymmA, ymmB);
4488 e->vpabsb(ymmA, anyptr_gpB);
4489 e->vpabsb(zmmA, zmmB);
4490 e->vpabsb(zmmA, anyptr_gpB);
4491 e->vpabsd(xmmA, xmmB);
4492 e->vpabsd(xmmA, anyptr_gpB);
4493 e->vpabsd(ymmA, ymmB);
4494 e->vpabsd(ymmA, anyptr_gpB);
4495 e->vpabsd(zmmA, zmmB);
4496 e->vpabsd(zmmA, anyptr_gpB);
4497 e->vpabsq(xmmA, xmmB);
4498 e->vpabsq(xmmA, anyptr_gpB);
4499 e->vpabsq(ymmA, ymmB);
4500 e->vpabsq(ymmA, anyptr_gpB);
4501 e->vpabsq(zmmA, zmmB);
4502 e->vpabsq(zmmA, anyptr_gpB);
4503 e->vpabsw(xmmA, xmmB);
4504 e->vpabsw(xmmA, anyptr_gpB);
4505 e->vpabsw(ymmA, ymmB);
4506 e->vpabsw(ymmA, anyptr_gpB);
4507 e->vpabsw(zmmA, zmmB);
4508 e->vpabsw(zmmA, anyptr_gpB);
4509 e->vpackssdw(xmmA, xmmB, xmmC);
4510 e->vpackssdw(xmmA, xmmB, anyptr_gpC);
4511 e->vpackssdw(ymmA, ymmB, ymmC);
4512 e->vpackssdw(ymmA, ymmB, anyptr_gpC);
4513 e->vpackssdw(zmmA, zmmB, zmmC);
4514 e->vpackssdw(zmmA, zmmB, anyptr_gpC);
4515 e->vpacksswb(xmmA, xmmB, xmmC);
4516 e->vpacksswb(xmmA, xmmB, anyptr_gpC);
4517 e->vpacksswb(ymmA, ymmB, ymmC);
4518 e->vpacksswb(ymmA, ymmB, anyptr_gpC);
4519 e->vpacksswb(zmmA, zmmB, zmmC);
4520 e->vpacksswb(zmmA, zmmB, anyptr_gpC);
4521 e->vpackusdw(xmmA, xmmB, xmmC);
4522 e->vpackusdw(xmmA, xmmB, anyptr_gpC);
4523 e->vpackusdw(ymmA, ymmB, ymmC);
4524 e->vpackusdw(ymmA, ymmB, anyptr_gpC);
4525 e->vpackusdw(zmmA, zmmB, zmmC);
4526 e->vpackusdw(zmmA, zmmB, anyptr_gpC);
4527 e->vpackuswb(xmmA, xmmB, xmmC);
4528 e->vpackuswb(xmmA, xmmB, anyptr_gpC);
4529 e->vpackuswb(ymmA, ymmB, ymmC);
4530 e->vpackuswb(ymmA, ymmB, anyptr_gpC);
4531 e->vpackuswb(zmmA, zmmB, zmmC);
4532 e->vpackuswb(zmmA, zmmB, anyptr_gpC);
4533 e->vpaddb(xmmA, xmmB, xmmC);
4534 e->vpaddb(xmmA, xmmB, anyptr_gpC);
4535 e->vpaddb(ymmA, ymmB, ymmC);
4536 e->vpaddb(ymmA, ymmB, anyptr_gpC);
4537 e->vpaddb(zmmA, zmmB, zmmC);
4538 e->vpaddb(zmmA, zmmB, anyptr_gpC);
4539 e->vpaddd(xmmA, xmmB, xmmC);
4540 e->vpaddd(xmmA, xmmB, anyptr_gpC);
4541 e->vpaddd(ymmA, ymmB, ymmC);
4542 e->vpaddd(ymmA, ymmB, anyptr_gpC);
4543 e->vpaddd(zmmA, zmmB, zmmC);
4544 e->vpaddd(zmmA, zmmB, anyptr_gpC);
4545 e->vpaddq(xmmA, xmmB, xmmC);
4546 e->vpaddq(xmmA, xmmB, anyptr_gpC);
4547 e->vpaddq(ymmA, ymmB, ymmC);
4548 e->vpaddq(ymmA, ymmB, anyptr_gpC);
4549 e->vpaddq(zmmA, zmmB, zmmC);
4550 e->vpaddq(zmmA, zmmB, anyptr_gpC);
4551 e->vpaddsb(xmmA, xmmB, xmmC);
4552 e->vpaddsb(xmmA, xmmB, anyptr_gpC);
4553 e->vpaddsb(ymmA, ymmB, ymmC);
4554 e->vpaddsb(ymmA, ymmB, anyptr_gpC);
4555 e->vpaddsb(zmmA, zmmB, zmmC);
4556 e->vpaddsb(zmmA, zmmB, anyptr_gpC);
4557 e->vpaddsw(xmmA, xmmB, xmmC);
4558 e->vpaddsw(xmmA, xmmB, anyptr_gpC);
4559 e->vpaddsw(ymmA, ymmB, ymmC);
4560 e->vpaddsw(ymmA, ymmB, anyptr_gpC);
4561 e->vpaddsw(zmmA, zmmB, zmmC);
4562 e->vpaddsw(zmmA, zmmB, anyptr_gpC);
4563 e->vpaddusb(xmmA, xmmB, xmmC);
4564 e->vpaddusb(xmmA, xmmB, anyptr_gpC);
4565 e->vpaddusb(ymmA, ymmB, ymmC);
4566 e->vpaddusb(ymmA, ymmB, anyptr_gpC);
4567 e->vpaddusb(zmmA, zmmB, zmmC);
4568 e->vpaddusb(zmmA, zmmB, anyptr_gpC);
4569 e->vpaddusw(xmmA, xmmB, xmmC);
4570 e->vpaddusw(xmmA, xmmB, anyptr_gpC);
4571 e->vpaddusw(ymmA, ymmB, ymmC);
4572 e->vpaddusw(ymmA, ymmB, anyptr_gpC);
4573 e->vpaddusw(zmmA, zmmB, zmmC);
4574 e->vpaddusw(zmmA, zmmB, anyptr_gpC);
4575 e->vpaddw(xmmA, xmmB, xmmC);
4576 e->vpaddw(xmmA, xmmB, anyptr_gpC);
4577 e->vpaddw(ymmA, ymmB, ymmC);
4578 e->vpaddw(ymmA, ymmB, anyptr_gpC);
4579 e->vpaddw(zmmA, zmmB, zmmC);
4580 e->vpaddw(zmmA, zmmB, anyptr_gpC);
4581 e->vpalignr(xmmA, xmmB, xmmC, 0);
4582 e->vpalignr(xmmA, xmmB, anyptr_gpC, 0);
4583 e->vpalignr(ymmA, ymmB, ymmC, 0);
4584 e->vpalignr(ymmA, ymmB, anyptr_gpC, 0);
4585 e->vpalignr(zmmA, zmmB, zmmC, 0);
4586 e->vpalignr(zmmA, zmmB, anyptr_gpC, 0);
4587 e->vpandd(xmmA, xmmB, xmmC);
4588 e->vpandd(xmmA, xmmB, anyptr_gpC);
4589 e->vpandd(ymmA, ymmB, ymmC);
4590 e->vpandd(ymmA, ymmB, anyptr_gpC);
4591 e->vpandd(zmmA, zmmB, zmmC);
4592 e->vpandd(zmmA, zmmB, anyptr_gpC);
4593 e->vpandnd(xmmA, xmmB, xmmC);
4594 e->vpandnd(xmmA, xmmB, anyptr_gpC);
4595 e->vpandnd(ymmA, ymmB, ymmC);
4596 e->vpandnd(ymmA, ymmB, anyptr_gpC);
4597 e->vpandnd(zmmA, zmmB, zmmC);
4598 e->vpandnd(zmmA, zmmB, anyptr_gpC);
4599 e->vpandnq(xmmA, xmmB, xmmC);
4600 e->vpandnq(xmmA, xmmB, anyptr_gpC);
4601 e->vpandnq(ymmA, ymmB, ymmC);
4602 e->vpandnq(ymmA, ymmB, anyptr_gpC);
4603 e->vpandnq(zmmA, zmmB, zmmC);
4604 e->vpandnq(zmmA, zmmB, anyptr_gpC);
4605 e->vpandq(xmmA, xmmB, xmmC);
4606 e->vpandq(xmmA, xmmB, anyptr_gpC);
4607 e->vpandq(ymmA, ymmB, ymmC);
4608 e->vpandq(ymmA, ymmB, anyptr_gpC);
4609 e->vpandq(zmmA, zmmB, zmmC);
4610 e->vpandq(zmmA, zmmB, anyptr_gpC);
4611 e->vpavgb(xmmA, xmmB, xmmC);
4612 e->vpavgb(xmmA, xmmB, anyptr_gpC);
4613 e->vpavgb(ymmA, ymmB, ymmC);
4614 e->vpavgb(ymmA, ymmB, anyptr_gpC);
4615 e->vpavgb(zmmA, zmmB, zmmC);
4616 e->vpavgb(zmmA, zmmB, anyptr_gpC);
4617 e->vpavgw(xmmA, xmmB, xmmC);
4618 e->vpavgw(xmmA, xmmB, anyptr_gpC);
4619 e->vpavgw(ymmA, ymmB, ymmC);
4620 e->vpavgw(ymmA, ymmB, anyptr_gpC);
4621 e->vpavgw(zmmA, zmmB, zmmC);
4622 e->vpavgw(zmmA, zmmB, anyptr_gpC);
4623 e->vpbroadcastb(xmmA, gdB);
4624 e->vpbroadcastb(xmmA, gzB);
4625 e->vpbroadcastb(xmmA, xmmB);
4626 e->vpbroadcastb(xmmA, anyptr_gpB);
4627 e->vpbroadcastb(ymmA, gdB);
4628 e->vpbroadcastb(ymmA, gzB);
4629 e->vpbroadcastb(ymmA, xmmB);
4630 e->vpbroadcastb(ymmA, anyptr_gpB);
4631 e->vpbroadcastb(zmmA, gdB);
4632 e->vpbroadcastb(zmmA, gzB);
4633 e->vpbroadcastb(zmmA, xmmB);
4634 e->vpbroadcastb(zmmA, anyptr_gpB);
4635 e->vpbroadcastd(xmmA, gdB);
4636 e->vpbroadcastd(xmmA, gzB);
4637 e->vpbroadcastd(xmmA, xmmB);
4638 e->vpbroadcastd(xmmA, anyptr_gpB);
4639 e->vpbroadcastd(ymmA, gdB);
4640 e->vpbroadcastd(ymmA, gzB);
4641 e->vpbroadcastd(ymmA, xmmB);
4642 e->vpbroadcastd(ymmA, anyptr_gpB);
4643 e->vpbroadcastd(zmmA, gdB);
4644 e->vpbroadcastd(zmmA, gzB);
4645 e->vpbroadcastd(zmmA, xmmB);
4646 e->vpbroadcastd(zmmA, anyptr_gpB);
4647 e->vpbroadcastmb2d(xmmA, kB);
4648 e->vpbroadcastmb2d(ymmA, kB);
4649 e->vpbroadcastmb2d(zmmA, kB);
4650 e->vpbroadcastmb2q(xmmA, kB);
4651 e->vpbroadcastmb2q(ymmA, kB);
4652 e->vpbroadcastmb2q(zmmA, kB);
4653 if (isX64) e->vpbroadcastq(xmmA, gzB);
4654 e->vpbroadcastq(xmmA, xmmB);
4655 e->vpbroadcastq(xmmA, anyptr_gpB);
4656 if (isX64) e->vpbroadcastq(ymmA, gzB);
4657 e->vpbroadcastq(ymmA, xmmB);
4658 e->vpbroadcastq(ymmA, anyptr_gpB);
4659 if (isX64) e->vpbroadcastq(zmmA, gzB);
4660 e->vpbroadcastq(zmmA, xmmB);
4661 e->vpbroadcastq(zmmA, anyptr_gpB);
4662 e->vpbroadcastw(xmmA, gdB);
4663 e->vpbroadcastw(xmmA, gzB);
4664 e->vpbroadcastw(xmmA, xmmB);
4665 e->vpbroadcastw(xmmA, anyptr_gpB);
4666 e->vpbroadcastw(ymmA, gdB);
4667 e->vpbroadcastw(ymmA, gzB);
4668 e->vpbroadcastw(ymmA, xmmB);
4669 e->vpbroadcastw(ymmA, anyptr_gpB);
4670 e->vpbroadcastw(zmmA, gdB);
4671 e->vpbroadcastw(zmmA, gzB);
4672 e->vpbroadcastw(zmmA, xmmB);
4673 e->vpbroadcastw(zmmA, anyptr_gpB);
4674 e->vpcmpb(kA, xmmB, xmmC, 0);
4675 e->vpcmpb(kA, xmmB, anyptr_gpC, 0);
4676 e->vpcmpb(kA, ymmB, ymmC, 0);
4677 e->vpcmpb(kA, ymmB, anyptr_gpC, 0);
4678 e->vpcmpb(kA, zmmB, zmmC, 0);
4679 e->vpcmpb(kA, zmmB, anyptr_gpC, 0);
4680 e->vpcmpd(kA, xmmB, xmmC, 0);
4681 e->vpcmpd(kA, xmmB, anyptr_gpC, 0);
4682 e->vpcmpd(kA, ymmB, ymmC, 0);
4683 e->vpcmpd(kA, ymmB, anyptr_gpC, 0);
4684 e->vpcmpd(kA, zmmB, zmmC, 0);
4685 e->vpcmpd(kA, zmmB, anyptr_gpC, 0);
4686 e->vpcmpeqb(kA, xmmB, xmmC);
4687 e->vpcmpeqb(kA, xmmB, anyptr_gpC);
4688 e->vpcmpeqb(kA, ymmB, ymmC);
4689 e->vpcmpeqb(kA, ymmB, anyptr_gpC);
4690 e->vpcmpeqb(kA, zmmB, zmmC);
4691 e->vpcmpeqb(kA, zmmB, anyptr_gpC);
4692 e->vpcmpeqd(kA, xmmB, xmmC);
4693 e->vpcmpeqd(kA, xmmB, anyptr_gpC);
4694 e->vpcmpeqd(kA, ymmB, ymmC);
4695 e->vpcmpeqd(kA, ymmB, anyptr_gpC);
4696 e->vpcmpeqd(kA, zmmB, zmmC);
4697 e->vpcmpeqd(kA, zmmB, anyptr_gpC);
4698 e->vpcmpeqq(kA, xmmB, xmmC);
4699 e->vpcmpeqq(kA, xmmB, anyptr_gpC);
4700 e->vpcmpeqq(kA, ymmB, ymmC);
4701 e->vpcmpeqq(kA, ymmB, anyptr_gpC);
4702 e->vpcmpeqq(kA, zmmB, zmmC);
4703 e->vpcmpeqq(kA, zmmB, anyptr_gpC);
4704 e->vpcmpeqw(kA, xmmB, xmmC);
4705 e->vpcmpeqw(kA, xmmB, anyptr_gpC);
4706 e->vpcmpeqw(kA, ymmB, ymmC);
4707 e->vpcmpeqw(kA, ymmB, anyptr_gpC);
4708 e->vpcmpeqw(kA, zmmB, zmmC);
4709 e->vpcmpeqw(kA, zmmB, anyptr_gpC);
4710 e->vpcmpgtb(kA, xmmB, xmmC);
4711 e->vpcmpgtb(kA, xmmB, anyptr_gpC);
4712 e->vpcmpgtb(kA, ymmB, ymmC);
4713 e->vpcmpgtb(kA, ymmB, anyptr_gpC);
4714 e->vpcmpgtb(kA, zmmB, zmmC);
4715 e->vpcmpgtb(kA, zmmB, anyptr_gpC);
4716 e->vpcmpgtd(kA, xmmB, xmmC);
4717 e->vpcmpgtd(kA, xmmB, anyptr_gpC);
4718 e->vpcmpgtd(kA, ymmB, ymmC);
4719 e->vpcmpgtd(kA, ymmB, anyptr_gpC);
4720 e->vpcmpgtd(kA, zmmB, zmmC);
4721 e->vpcmpgtd(kA, zmmB, anyptr_gpC);
4722 e->vpcmpgtq(kA, xmmB, xmmC);
4723 e->vpcmpgtq(kA, xmmB, anyptr_gpC);
4724 e->vpcmpgtq(kA, ymmB, ymmC);
4725 e->vpcmpgtq(kA, ymmB, anyptr_gpC);
4726 e->vpcmpgtq(kA, zmmB, zmmC);
4727 e->vpcmpgtq(kA, zmmB, anyptr_gpC);
4728 e->vpcmpgtw(kA, xmmB, xmmC);
4729 e->vpcmpgtw(kA, xmmB, anyptr_gpC);
4730 e->vpcmpgtw(kA, ymmB, ymmC);
4731 e->vpcmpgtw(kA, ymmB, anyptr_gpC);
4732 e->vpcmpgtw(kA, zmmB, zmmC);
4733 e->vpcmpgtw(kA, zmmB, anyptr_gpC);
4734 e->vpcmpq(kA, xmmB, xmmC, 0);
4735 e->vpcmpq(kA, xmmB, anyptr_gpC, 0);
4736 e->vpcmpq(kA, ymmB, ymmC, 0);
4737 e->vpcmpq(kA, ymmB, anyptr_gpC, 0);
4738 e->vpcmpq(kA, zmmB, zmmC, 0);
4739 e->vpcmpq(kA, zmmB, anyptr_gpC, 0);
4740 e->vpcmpub(kA, xmmB, xmmC, 0);
4741 e->vpcmpub(kA, xmmB, anyptr_gpC, 0);
4742 e->vpcmpub(kA, ymmB, ymmC, 0);
4743 e->vpcmpub(kA, ymmB, anyptr_gpC, 0);
4744 e->vpcmpub(kA, zmmB, zmmC, 0);
4745 e->vpcmpub(kA, zmmB, anyptr_gpC, 0);
4746 e->vpcmpud(kA, xmmB, xmmC, 0);
4747 e->vpcmpud(kA, xmmB, anyptr_gpC, 0);
4748 e->vpcmpud(kA, ymmB, ymmC, 0);
4749 e->vpcmpud(kA, ymmB, anyptr_gpC, 0);
4750 e->vpcmpud(kA, zmmB, zmmC, 0);
4751 e->vpcmpud(kA, zmmB, anyptr_gpC, 0);
4752 e->vpcmpuq(kA, xmmB, xmmC, 0);
4753 e->vpcmpuq(kA, xmmB, anyptr_gpC, 0);
4754 e->vpcmpuq(kA, ymmB, ymmC, 0);
4755 e->vpcmpuq(kA, ymmB, anyptr_gpC, 0);
4756 e->vpcmpuq(kA, zmmB, zmmC, 0);
4757 e->vpcmpuq(kA, zmmB, anyptr_gpC, 0);
4758 e->vpcmpuw(kA, xmmB, xmmC, 0);
4759 e->vpcmpuw(kA, xmmB, anyptr_gpC, 0);
4760 e->vpcmpuw(kA, ymmB, ymmC, 0);
4761 e->vpcmpuw(kA, ymmB, anyptr_gpC, 0);
4762 e->vpcmpuw(kA, zmmB, zmmC, 0);
4763 e->vpcmpuw(kA, zmmB, anyptr_gpC, 0);
4764 e->vpcmpw(kA, xmmB, xmmC, 0);
4765 e->vpcmpw(kA, xmmB, anyptr_gpC, 0);
4766 e->vpcmpw(kA, ymmB, ymmC, 0);
4767 e->vpcmpw(kA, ymmB, anyptr_gpC, 0);
4768 e->vpcmpw(kA, zmmB, zmmC, 0);
4769 e->vpcmpw(kA, zmmB, anyptr_gpC, 0);
4770 e->vpcompressd(xmmA, xmmB);
4771 e->vpcompressd(anyptr_gpA, xmmB);
4772 e->vpcompressd(ymmA, ymmB);
4773 e->vpcompressd(anyptr_gpA, ymmB);
4774 e->vpcompressd(zmmA, zmmB);
4775 e->vpcompressd(anyptr_gpA, zmmB);
4776 e->vpcompressq(xmmA, xmmB);
4777 e->vpcompressq(anyptr_gpA, xmmB);
4778 e->vpcompressq(ymmA, ymmB);
4779 e->vpcompressq(anyptr_gpA, ymmB);
4780 e->vpcompressq(zmmA, zmmB);
4781 e->vpcompressq(anyptr_gpA, zmmB);
4782 e->vpconflictd(xmmA, xmmB);
4783 e->vpconflictd(xmmA, anyptr_gpB);
4784 e->vpconflictd(ymmA, ymmB);
4785 e->vpconflictd(ymmA, anyptr_gpB);
4786 e->vpconflictd(zmmA, zmmB);
4787 e->vpconflictd(zmmA, anyptr_gpB);
4788 e->vpconflictq(xmmA, xmmB);
4789 e->vpconflictq(xmmA, anyptr_gpB);
4790 e->vpconflictq(ymmA, ymmB);
4791 e->vpconflictq(ymmA, anyptr_gpB);
4792 e->vpconflictq(zmmA, zmmB);
4793 e->vpconflictq(zmmA, anyptr_gpB);
4794 e->vpermb(xmmA, xmmB, xmmC);
4795 e->vpermb(xmmA, xmmB, anyptr_gpC);
4796 e->vpermb(ymmA, ymmB, ymmC);
4797 e->vpermb(ymmA, ymmB, anyptr_gpC);
4798 e->vpermb(zmmA, zmmB, zmmC);
4799 e->vpermb(zmmA, zmmB, anyptr_gpC);
4800 e->vpermd(ymmA, ymmB, ymmC);
4801 e->vpermd(ymmA, ymmB, anyptr_gpC);
4802 e->vpermd(zmmA, zmmB, zmmC);
4803 e->vpermd(zmmA, zmmB, anyptr_gpC);
4804 e->vpermi2b(xmmA, xmmB, xmmC);
4805 e->vpermi2b(xmmA, xmmB, anyptr_gpC);
4806 e->vpermi2b(ymmA, ymmB, ymmC);
4807 e->vpermi2b(ymmA, ymmB, anyptr_gpC);
4808 e->vpermi2b(zmmA, zmmB, zmmC);
4809 e->vpermi2b(zmmA, zmmB, anyptr_gpC);
4810 e->vpermi2d(xmmA, xmmB, xmmC);
4811 e->vpermi2d(xmmA, xmmB, anyptr_gpC);
4812 e->vpermi2d(ymmA, ymmB, ymmC);
4813 e->vpermi2d(ymmA, ymmB, anyptr_gpC);
4814 e->vpermi2d(zmmA, zmmB, zmmC);
4815 e->vpermi2d(zmmA, zmmB, anyptr_gpC);
4816 e->vpermi2pd(xmmA, xmmB, xmmC);
4817 e->vpermi2pd(xmmA, xmmB, anyptr_gpC);
4818 e->vpermi2pd(ymmA, ymmB, ymmC);
4819 e->vpermi2pd(ymmA, ymmB, anyptr_gpC);
4820 e->vpermi2pd(zmmA, zmmB, zmmC);
4821 e->vpermi2pd(zmmA, zmmB, anyptr_gpC);
4822 e->vpermi2ps(xmmA, xmmB, xmmC);
4823 e->vpermi2ps(xmmA, xmmB, anyptr_gpC);
4824 e->vpermi2ps(ymmA, ymmB, ymmC);
4825 e->vpermi2ps(ymmA, ymmB, anyptr_gpC);
4826 e->vpermi2ps(zmmA, zmmB, zmmC);
4827 e->vpermi2ps(zmmA, zmmB, anyptr_gpC);
4828 e->vpermi2q(xmmA, xmmB, xmmC);
4829 e->vpermi2q(xmmA, xmmB, anyptr_gpC);
4830 e->vpermi2q(ymmA, ymmB, ymmC);
4831 e->vpermi2q(ymmA, ymmB, anyptr_gpC);
4832 e->vpermi2q(zmmA, zmmB, zmmC);
4833 e->vpermi2q(zmmA, zmmB, anyptr_gpC);
4834 e->vpermi2w(xmmA, xmmB, xmmC);
4835 e->vpermi2w(xmmA, xmmB, anyptr_gpC);
4836 e->vpermi2w(ymmA, ymmB, ymmC);
4837 e->vpermi2w(ymmA, ymmB, anyptr_gpC);
4838 e->vpermi2w(zmmA, zmmB, zmmC);
4839 e->vpermi2w(zmmA, zmmB, anyptr_gpC);
4840 e->vpermilpd(xmmA, xmmB, xmmC);
4841 e->vpermilpd(xmmA, xmmB, anyptr_gpC);
4842 e->vpermilpd(ymmA, ymmB, ymmC);
4843 e->vpermilpd(ymmA, ymmB, anyptr_gpC);
4844 e->vpermilpd(zmmA, zmmB, zmmC);
4845 e->vpermilpd(zmmA, zmmB, anyptr_gpC);
4846 e->vpermilpd(xmmA, xmmB, 0);
4847 e->vpermilpd(xmmA, anyptr_gpB, 0);
4848 e->vpermilpd(ymmA, ymmB, 0);
4849 e->vpermilpd(ymmA, anyptr_gpB, 0);
4850 e->vpermilpd(zmmA, zmmB, 0);
4851 e->vpermilpd(zmmA, anyptr_gpB, 0);
4852 e->vpermilps(xmmA, xmmB, xmmC);
4853 e->vpermilps(xmmA, xmmB, anyptr_gpC);
4854 e->vpermilps(ymmA, ymmB, ymmC);
4855 e->vpermilps(ymmA, ymmB, anyptr_gpC);
4856 e->vpermilps(zmmA, zmmB, zmmC);
4857 e->vpermilps(zmmA, zmmB, anyptr_gpC);
4858 e->vpermilps(xmmA, xmmB, 0);
4859 e->vpermilps(xmmA, anyptr_gpB, 0);
4860 e->vpermilps(ymmA, ymmB, 0);
4861 e->vpermilps(ymmA, anyptr_gpB, 0);
4862 e->vpermilps(zmmA, zmmB, 0);
4863 e->vpermilps(zmmA, anyptr_gpB, 0);
4864 e->vpermq(ymmA, ymmB, ymmC);
4865 e->vpermq(ymmA, ymmB, anyptr_gpC);
4866 e->vpermq(zmmA, zmmB, zmmC);
4867 e->vpermq(zmmA, zmmB, anyptr_gpC);
4868 e->vpermq(ymmA, ymmB, 0);
4869 e->vpermq(ymmA, anyptr_gpB, 0);
4870 e->vpermq(zmmA, zmmB, 0);
4871 e->vpermq(zmmA, anyptr_gpB, 0);
4872 e->vpermt2b(xmmA, xmmB, xmmC);
4873 e->vpermt2b(xmmA, xmmB, anyptr_gpC);
4874 e->vpermt2b(ymmA, ymmB, ymmC);
4875 e->vpermt2b(ymmA, ymmB, anyptr_gpC);
4876 e->vpermt2b(zmmA, zmmB, zmmC);
4877 e->vpermt2b(zmmA, zmmB, anyptr_gpC);
4878 e->vpermt2d(xmmA, xmmB, xmmC);
4879 e->vpermt2d(xmmA, xmmB, anyptr_gpC);
4880 e->vpermt2d(ymmA, ymmB, ymmC);
4881 e->vpermt2d(ymmA, ymmB, anyptr_gpC);
4882 e->vpermt2d(zmmA, zmmB, zmmC);
4883 e->vpermt2d(zmmA, zmmB, anyptr_gpC);
4884 e->vpermt2pd(xmmA, xmmB, xmmC);
4885 e->vpermt2pd(xmmA, xmmB, anyptr_gpC);
4886 e->vpermt2pd(ymmA, ymmB, ymmC);
4887 e->vpermt2pd(ymmA, ymmB, anyptr_gpC);
4888 e->vpermt2pd(zmmA, zmmB, zmmC);
4889 e->vpermt2pd(zmmA, zmmB, anyptr_gpC);
4890 e->vpermt2ps(xmmA, xmmB, xmmC);
4891 e->vpermt2ps(xmmA, xmmB, anyptr_gpC);
4892 e->vpermt2ps(ymmA, ymmB, ymmC);
4893 e->vpermt2ps(ymmA, ymmB, anyptr_gpC);
4894 e->vpermt2ps(zmmA, zmmB, zmmC);
4895 e->vpermt2ps(zmmA, zmmB, anyptr_gpC);
4896 e->vpermt2q(xmmA, xmmB, xmmC);
4897 e->vpermt2q(xmmA, xmmB, anyptr_gpC);
4898 e->vpermt2q(ymmA, ymmB, ymmC);
4899 e->vpermt2q(ymmA, ymmB, anyptr_gpC);
4900 e->vpermt2q(zmmA, zmmB, zmmC);
4901 e->vpermt2q(zmmA, zmmB, anyptr_gpC);
4902 e->vpermt2w(xmmA, xmmB, xmmC);
4903 e->vpermt2w(xmmA, xmmB, anyptr_gpC);
4904 e->vpermt2w(ymmA, ymmB, ymmC);
4905 e->vpermt2w(ymmA, ymmB, anyptr_gpC);
4906 e->vpermt2w(zmmA, zmmB, zmmC);
4907 e->vpermt2w(zmmA, zmmB, anyptr_gpC);
4908 e->vpermw(xmmA, xmmB, xmmC);
4909 e->vpermw(xmmA, xmmB, anyptr_gpC);
4910 e->vpermw(ymmA, ymmB, ymmC);
4911 e->vpermw(ymmA, ymmB, anyptr_gpC);
4912 e->vpermw(zmmA, zmmB, zmmC);
4913 e->vpermw(zmmA, zmmB, anyptr_gpC);
4914 e->vpexpandd(xmmA, xmmB);
4915 e->vpexpandd(xmmA, anyptr_gpB);
4916 e->vpexpandd(ymmA, ymmB);
4917 e->vpexpandd(ymmA, anyptr_gpB);
4918 e->vpexpandd(zmmA, zmmB);
4919 e->vpexpandd(zmmA, anyptr_gpB);
4920 e->vpexpandq(xmmA, xmmB);
4921 e->vpexpandq(xmmA, anyptr_gpB);
4922 e->vpexpandq(ymmA, ymmB);
4923 e->vpexpandq(ymmA, anyptr_gpB);
4924 e->vpexpandq(zmmA, zmmB);
4925 e->vpexpandq(zmmA, anyptr_gpB);
4926 e->vpextrb(gdA, xmmB, 0);
4927 e->vpextrb(anyptr_gpA, xmmB, 0);
4928 e->vpextrb(gzA, xmmB, 0);
4929 e->vpextrd(gdA, xmmB, 0);
4930 e->vpextrd(anyptr_gpA, xmmB, 0);
4931 if (isX64) e->vpextrd(gzA, xmmB, 0);
4932 if (isX64) e->vpextrq(gzA, xmmB, 0);
4933 e->vpextrq(anyptr_gpA, xmmB, 0);
4934 e->vpextrw(gdA, xmmB, 0);
4935 e->vpextrw(gzA, xmmB, 0);
4936 e->vpextrw(gdA, xmmB, 0);
4937 e->vpextrw(anyptr_gpA, xmmB, 0);
4938 e->vpextrw(gzA, xmmB, 0);
4939 e->vpgatherdd(xmmA, vx_ptr);
4940 e->vpgatherdd(ymmA, vy_ptr);
4941 e->vpgatherdd(zmmA, vz_ptr);
4942 e->vpgatherdq(xmmA, vx_ptr);
4943 e->vpgatherdq(ymmA, vy_ptr);
4944 e->vpgatherdq(zmmA, vz_ptr);
4945 e->vpgatherqd(xmmA, vx_ptr);
4946 e->vpgatherqd(ymmA, vy_ptr);
4947 e->vpgatherqd(zmmA, vz_ptr);
4948 e->vpgatherqq(xmmA, vx_ptr);
4949 e->vpgatherqq(ymmA, vy_ptr);
4950 e->vpgatherqq(zmmA, vz_ptr);
4951 e->vpinsrb(xmmA, xmmB, gdC, 0);
4952 e->vpinsrb(xmmA, xmmB, anyptr_gpC, 0);
4953 e->vpinsrb(xmmA, xmmB, gzC, 0);
4954 e->vpinsrd(xmmA, xmmB, gdC, 0);
4955 e->vpinsrd(xmmA, xmmB, anyptr_gpC, 0);
4956 e->vpinsrd(xmmA, xmmB, gzC, 0);
4957 if (isX64) e->vpinsrq(xmmA, xmmB, gzC, 0);
4958 e->vpinsrq(xmmA, xmmB, anyptr_gpC, 0);
4959 e->vpinsrw(xmmA, xmmB, gdC, 0);
4960 e->vpinsrw(xmmA, xmmB, anyptr_gpC, 0);
4961 e->vpinsrw(xmmA, xmmB, gzC, 0);
4962 e->vplzcntd(xmmA, xmmB);
4963 e->vplzcntd(xmmA, anyptr_gpB);
4964 e->vplzcntd(ymmA, ymmB);
4965 e->vplzcntd(ymmA, anyptr_gpB);
4966 e->vplzcntd(zmmA, zmmB);
4967 e->vplzcntd(zmmA, anyptr_gpB);
4968 e->vplzcntq(xmmA, xmmB);
4969 e->vplzcntq(xmmA, anyptr_gpB);
4970 e->vplzcntq(ymmA, ymmB);
4971 e->vplzcntq(ymmA, anyptr_gpB);
4972 e->vplzcntq(zmmA, zmmB);
4973 e->vplzcntq(zmmA, anyptr_gpB);
4974 e->vpmadd52huq(xmmA, xmmB, xmmC);
4975 e->vpmadd52huq(xmmA, xmmB, anyptr_gpC);
4976 e->vpmadd52huq(ymmA, ymmB, ymmC);
4977 e->vpmadd52huq(ymmA, ymmB, anyptr_gpC);
4978 e->vpmadd52huq(zmmA, zmmB, zmmC);
4979 e->vpmadd52huq(zmmA, zmmB, anyptr_gpC);
4980 e->vpmadd52luq(xmmA, xmmB, xmmC);
4981 e->vpmadd52luq(xmmA, xmmB, anyptr_gpC);
4982 e->vpmadd52luq(ymmA, ymmB, ymmC);
4983 e->vpmadd52luq(ymmA, ymmB, anyptr_gpC);
4984 e->vpmadd52luq(zmmA, zmmB, zmmC);
4985 e->vpmadd52luq(zmmA, zmmB, anyptr_gpC);
4986 e->vpmaddubsw(xmmA, xmmB, xmmC);
4987 e->vpmaddubsw(xmmA, xmmB, anyptr_gpC);
4988 e->vpmaddubsw(ymmA, ymmB, ymmC);
4989 e->vpmaddubsw(ymmA, ymmB, anyptr_gpC);
4990 e->vpmaddubsw(zmmA, zmmB, zmmC);
4991 e->vpmaddubsw(zmmA, zmmB, anyptr_gpC);
4992 e->vpmaddwd(xmmA, xmmB, xmmC);
4993 e->vpmaddwd(xmmA, xmmB, anyptr_gpC);
4994 e->vpmaddwd(ymmA, ymmB, ymmC);
4995 e->vpmaddwd(ymmA, ymmB, anyptr_gpC);
4996 e->vpmaddwd(zmmA, zmmB, zmmC);
4997 e->vpmaddwd(zmmA, zmmB, anyptr_gpC);
4998 e->vpmaxsb(xmmA, xmmB, xmmC);
4999 e->vpmaxsb(xmmA, xmmB, anyptr_gpC);
5000 e->vpmaxsb(ymmA, ymmB, ymmC);
5001 e->vpmaxsb(ymmA, ymmB, anyptr_gpC);
5002 e->vpmaxsb(zmmA, zmmB, zmmC);
5003 e->vpmaxsb(zmmA, zmmB, anyptr_gpC);
5004 e->vpmaxsd(xmmA, xmmB, xmmC);
5005 e->vpmaxsd(xmmA, xmmB, anyptr_gpC);
5006 e->vpmaxsd(ymmA, ymmB, ymmC);
5007 e->vpmaxsd(ymmA, ymmB, anyptr_gpC);
5008 e->vpmaxsd(zmmA, zmmB, zmmC);
5009 e->vpmaxsd(zmmA, zmmB, anyptr_gpC);
5010 e->vpmaxsq(xmmA, xmmB, xmmC);
5011 e->vpmaxsq(xmmA, xmmB, anyptr_gpC);
5012 e->vpmaxsq(ymmA, ymmB, ymmC);
5013 e->vpmaxsq(ymmA, ymmB, anyptr_gpC);
5014 e->vpmaxsq(zmmA, zmmB, zmmC);
5015 e->vpmaxsq(zmmA, zmmB, anyptr_gpC);
5016 e->vpmaxsw(xmmA, xmmB, xmmC);
5017 e->vpmaxsw(xmmA, xmmB, anyptr_gpC);
5018 e->vpmaxsw(ymmA, ymmB, ymmC);
5019 e->vpmaxsw(ymmA, ymmB, anyptr_gpC);
5020 e->vpmaxsw(zmmA, zmmB, zmmC);
5021 e->vpmaxsw(zmmA, zmmB, anyptr_gpC);
5022 e->vpmaxub(xmmA, xmmB, xmmC);
5023 e->vpmaxub(xmmA, xmmB, anyptr_gpC);
5024 e->vpmaxub(ymmA, ymmB, ymmC);
5025 e->vpmaxub(ymmA, ymmB, anyptr_gpC);
5026 e->vpmaxub(zmmA, zmmB, zmmC);
5027 e->vpmaxub(zmmA, zmmB, anyptr_gpC);
5028 e->vpmaxud(xmmA, xmmB, xmmC);
5029 e->vpmaxud(xmmA, xmmB, anyptr_gpC);
5030 e->vpmaxud(ymmA, ymmB, ymmC);
5031 e->vpmaxud(ymmA, ymmB, anyptr_gpC);
5032 e->vpmaxud(zmmA, zmmB, zmmC);
5033 e->vpmaxud(zmmA, zmmB, anyptr_gpC);
5034 e->vpmaxuq(xmmA, xmmB, xmmC);
5035 e->vpmaxuq(xmmA, xmmB, anyptr_gpC);
5036 e->vpmaxuq(ymmA, ymmB, ymmC);
5037 e->vpmaxuq(ymmA, ymmB, anyptr_gpC);
5038 e->vpmaxuq(zmmA, zmmB, zmmC);
5039 e->vpmaxuq(zmmA, zmmB, anyptr_gpC);
5040 e->vpmaxuw(xmmA, xmmB, xmmC);
5041 e->vpmaxuw(xmmA, xmmB, anyptr_gpC);
5042 e->vpmaxuw(ymmA, ymmB, ymmC);
5043 e->vpmaxuw(ymmA, ymmB, anyptr_gpC);
5044 e->vpmaxuw(zmmA, zmmB, zmmC);
5045 e->vpmaxuw(zmmA, zmmB, anyptr_gpC);
5046 e->vpminsb(xmmA, xmmB, xmmC);
5047 e->vpminsb(xmmA, xmmB, anyptr_gpC);
5048 e->vpminsb(ymmA, ymmB, ymmC);
5049 e->vpminsb(ymmA, ymmB, anyptr_gpC);
5050 e->vpminsb(zmmA, zmmB, zmmC);
5051 e->vpminsb(zmmA, zmmB, anyptr_gpC);
5052 e->vpminsd(xmmA, xmmB, xmmC);
5053 e->vpminsd(xmmA, xmmB, anyptr_gpC);
5054 e->vpminsd(ymmA, ymmB, ymmC);
5055 e->vpminsd(ymmA, ymmB, anyptr_gpC);
5056 e->vpminsd(zmmA, zmmB, zmmC);
5057 e->vpminsd(zmmA, zmmB, anyptr_gpC);
5058 e->vpminsq(xmmA, xmmB, xmmC);
5059 e->vpminsq(xmmA, xmmB, anyptr_gpC);
5060 e->vpminsq(ymmA, ymmB, ymmC);
5061 e->vpminsq(ymmA, ymmB, anyptr_gpC);
5062 e->vpminsq(zmmA, zmmB, zmmC);
5063 e->vpminsq(zmmA, zmmB, anyptr_gpC);
5064 e->vpminsw(xmmA, xmmB, xmmC);
5065 e->vpminsw(xmmA, xmmB, anyptr_gpC);
5066 e->vpminsw(ymmA, ymmB, ymmC);
5067 e->vpminsw(ymmA, ymmB, anyptr_gpC);
5068 e->vpminsw(zmmA, zmmB, zmmC);
5069 e->vpminsw(zmmA, zmmB, anyptr_gpC);
5070 e->vpminub(xmmA, xmmB, xmmC);
5071 e->vpminub(xmmA, xmmB, anyptr_gpC);
5072 e->vpminub(ymmA, ymmB, ymmC);
5073 e->vpminub(ymmA, ymmB, anyptr_gpC);
5074 e->vpminub(zmmA, zmmB, zmmC);
5075 e->vpminub(zmmA, zmmB, anyptr_gpC);
5076 e->vpminud(xmmA, xmmB, xmmC);
5077 e->vpminud(xmmA, xmmB, anyptr_gpC);
5078 e->vpminud(ymmA, ymmB, ymmC);
5079 e->vpminud(ymmA, ymmB, anyptr_gpC);
5080 e->vpminud(zmmA, zmmB, zmmC);
5081 e->vpminud(zmmA, zmmB, anyptr_gpC);
5082 e->vpminuq(xmmA, xmmB, xmmC);
5083 e->vpminuq(xmmA, xmmB, anyptr_gpC);
5084 e->vpminuq(ymmA, ymmB, ymmC);
5085 e->vpminuq(ymmA, ymmB, anyptr_gpC);
5086 e->vpminuq(zmmA, zmmB, zmmC);
5087 e->vpminuq(zmmA, zmmB, anyptr_gpC);
5088 e->vpminuw(xmmA, xmmB, xmmC);
5089 e->vpminuw(xmmA, xmmB, anyptr_gpC);
5090 e->vpminuw(ymmA, ymmB, ymmC);
5091 e->vpminuw(ymmA, ymmB, anyptr_gpC);
5092 e->vpminuw(zmmA, zmmB, zmmC);
5093 e->vpminuw(zmmA, zmmB, anyptr_gpC);
5094 e->vpmovb2m(kA, xmmB);
5095 e->vpmovb2m(kA, ymmB);
5096 e->vpmovb2m(kA, zmmB);
5097 e->vpmovd2m(kA, xmmB);
5098 e->vpmovd2m(kA, ymmB);
5099 e->vpmovd2m(kA, zmmB);
5100 e->vpmovdb(xmmA, xmmB);
5101 e->vpmovdb(anyptr_gpA, xmmB);
5102 e->vpmovdb(xmmA, ymmB);
5103 e->vpmovdb(anyptr_gpA, ymmB);
5104 e->vpmovdb(xmmA, zmmB);
5105 e->vpmovdb(anyptr_gpA, zmmB);
5106 e->vpmovdw(xmmA, xmmB);
5107 e->vpmovdw(anyptr_gpA, xmmB);
5108 e->vpmovdw(xmmA, ymmB);
5109 e->vpmovdw(anyptr_gpA, ymmB);
5110 e->vpmovdw(ymmA, zmmB);
5111 e->vpmovdw(anyptr_gpA, zmmB);
5112 e->vpmovm2b(xmmA, kB);
5113 e->vpmovm2b(ymmA, kB);
5114 e->vpmovm2b(zmmA, kB);
5115 e->vpmovm2d(xmmA, kB);
5116 e->vpmovm2d(ymmA, kB);
5117 e->vpmovm2d(zmmA, kB);
5118 e->vpmovm2q(xmmA, kB);
5119 e->vpmovm2q(ymmA, kB);
5120 e->vpmovm2q(zmmA, kB);
5121 e->vpmovm2w(xmmA, kB);
5122 e->vpmovm2w(ymmA, kB);
5123 e->vpmovm2w(zmmA, kB);
5124 e->vpmovq2m(kA, xmmB);
5125 e->vpmovq2m(kA, ymmB);
5126 e->vpmovq2m(kA, zmmB);
5127 e->vpmovqb(xmmA, xmmB);
5128 e->vpmovqb(anyptr_gpA, xmmB);
5129 e->vpmovqb(xmmA, ymmB);
5130 e->vpmovqb(anyptr_gpA, ymmB);
5131 e->vpmovqb(xmmA, zmmB);
5132 e->vpmovqb(anyptr_gpA, zmmB);
5133 e->vpmovqd(xmmA, xmmB);
5134 e->vpmovqd(anyptr_gpA, xmmB);
5135 e->vpmovqd(xmmA, ymmB);
5136 e->vpmovqd(anyptr_gpA, ymmB);
5137 e->vpmovqd(ymmA, zmmB);
5138 e->vpmovqd(anyptr_gpA, zmmB);
5139 e->vpmovqw(xmmA, xmmB);
5140 e->vpmovqw(anyptr_gpA, xmmB);
5141 e->vpmovqw(xmmA, ymmB);
5142 e->vpmovqw(anyptr_gpA, ymmB);
5143 e->vpmovqw(xmmA, zmmB);
5144 e->vpmovqw(anyptr_gpA, zmmB);
5145 e->vpmovsdb(xmmA, xmmB);
5146 e->vpmovsdb(anyptr_gpA, xmmB);
5147 e->vpmovsdb(xmmA, ymmB);
5148 e->vpmovsdb(anyptr_gpA, ymmB);
5149 e->vpmovsdb(xmmA, zmmB);
5150 e->vpmovsdb(anyptr_gpA, zmmB);
5151 e->vpmovsdw(xmmA, xmmB);
5152 e->vpmovsdw(anyptr_gpA, xmmB);
5153 e->vpmovsdw(xmmA, ymmB);
5154 e->vpmovsdw(anyptr_gpA, ymmB);
5155 e->vpmovsdw(ymmA, zmmB);
5156 e->vpmovsdw(anyptr_gpA, zmmB);
5157 e->vpmovsqb(xmmA, xmmB);
5158 e->vpmovsqb(anyptr_gpA, xmmB);
5159 e->vpmovsqb(xmmA, ymmB);
5160 e->vpmovsqb(anyptr_gpA, ymmB);
5161 e->vpmovsqb(xmmA, zmmB);
5162 e->vpmovsqb(anyptr_gpA, zmmB);
5163 e->vpmovsqd(xmmA, xmmB);
5164 e->vpmovsqd(anyptr_gpA, xmmB);
5165 e->vpmovsqd(xmmA, ymmB);
5166 e->vpmovsqd(anyptr_gpA, ymmB);
5167 e->vpmovsqd(ymmA, zmmB);
5168 e->vpmovsqd(anyptr_gpA, zmmB);
5169 e->vpmovsqw(xmmA, xmmB);
5170 e->vpmovsqw(anyptr_gpA, xmmB);
5171 e->vpmovsqw(xmmA, ymmB);
5172 e->vpmovsqw(anyptr_gpA, ymmB);
5173 e->vpmovsqw(xmmA, zmmB);
5174 e->vpmovsqw(anyptr_gpA, zmmB);
5175 e->vpmovswb(xmmA, xmmB);
5176 e->vpmovswb(anyptr_gpA, xmmB);
5177 e->vpmovswb(xmmA, ymmB);
5178 e->vpmovswb(anyptr_gpA, ymmB);
5179 e->vpmovswb(ymmA, zmmB);
5180 e->vpmovswb(anyptr_gpA, zmmB);
5181 e->vpmovsxbd(xmmA, xmmB);
5182 e->vpmovsxbd(xmmA, anyptr_gpB);
5183 e->vpmovsxbd(ymmA, xmmB);
5184 e->vpmovsxbd(ymmA, anyptr_gpB);
5185 e->vpmovsxbd(zmmA, xmmB);
5186 e->vpmovsxbd(zmmA, anyptr_gpB);
5187 e->vpmovsxbq(xmmA, xmmB);
5188 e->vpmovsxbq(xmmA, anyptr_gpB);
5189 e->vpmovsxbq(ymmA, xmmB);
5190 e->vpmovsxbq(ymmA, anyptr_gpB);
5191 e->vpmovsxbq(zmmA, xmmB);
5192 e->vpmovsxbq(zmmA, anyptr_gpB);
5193 e->vpmovsxbw(xmmA, xmmB);
5194 e->vpmovsxbw(xmmA, anyptr_gpB);
5195 e->vpmovsxbw(ymmA, xmmB);
5196 e->vpmovsxbw(ymmA, anyptr_gpB);
5197 e->vpmovsxbw(zmmA, ymmB);
5198 e->vpmovsxbw(zmmA, anyptr_gpB);
5199 e->vpmovsxdq(xmmA, xmmB);
5200 e->vpmovsxdq(xmmA, anyptr_gpB);
5201 e->vpmovsxdq(ymmA, xmmB);
5202 e->vpmovsxdq(ymmA, anyptr_gpB);
5203 e->vpmovsxdq(zmmA, ymmB);
5204 e->vpmovsxdq(zmmA, anyptr_gpB);
5205 e->vpmovsxwd(xmmA, xmmB);
5206 e->vpmovsxwd(xmmA, anyptr_gpB);
5207 e->vpmovsxwd(ymmA, xmmB);
5208 e->vpmovsxwd(ymmA, anyptr_gpB);
5209 e->vpmovsxwd(zmmA, ymmB);
5210 e->vpmovsxwd(zmmA, anyptr_gpB);
5211 e->vpmovsxwq(xmmA, xmmB);
5212 e->vpmovsxwq(xmmA, anyptr_gpB);
5213 e->vpmovsxwq(ymmA, xmmB);
5214 e->vpmovsxwq(ymmA, anyptr_gpB);
5215 e->vpmovsxwq(zmmA, xmmB);
5216 e->vpmovsxwq(zmmA, anyptr_gpB);
5217 e->vpmovusdb(xmmA, xmmB);
5218 e->vpmovusdb(anyptr_gpA, xmmB);
5219 e->vpmovusdb(xmmA, ymmB);
5220 e->vpmovusdb(anyptr_gpA, ymmB);
5221 e->vpmovusdb(xmmA, zmmB);
5222 e->vpmovusdb(anyptr_gpA, zmmB);
5223 e->vpmovusdw(xmmA, xmmB);
5224 e->vpmovusdw(anyptr_gpA, xmmB);
5225 e->vpmovusdw(xmmA, ymmB);
5226 e->vpmovusdw(anyptr_gpA, ymmB);
5227 e->vpmovusdw(ymmA, zmmB);
5228 e->vpmovusdw(anyptr_gpA, zmmB);
5229 e->vpmovusqb(xmmA, xmmB);
5230 e->vpmovusqb(anyptr_gpA, xmmB);
5231 e->vpmovusqb(xmmA, ymmB);
5232 e->vpmovusqb(anyptr_gpA, ymmB);
5233 e->vpmovusqb(xmmA, zmmB);
5234 e->vpmovusqb(anyptr_gpA, zmmB);
5235 e->vpmovusqd(xmmA, xmmB);
5236 e->vpmovusqd(anyptr_gpA, xmmB);
5237 e->vpmovusqd(xmmA, ymmB);
5238 e->vpmovusqd(anyptr_gpA, ymmB);
5239 e->vpmovusqd(ymmA, zmmB);
5240 e->vpmovusqd(anyptr_gpA, zmmB);
5241 e->vpmovusqw(xmmA, xmmB);
5242 e->vpmovusqw(anyptr_gpA, xmmB);
5243 e->vpmovusqw(xmmA, ymmB);
5244 e->vpmovusqw(anyptr_gpA, ymmB);
5245 e->vpmovusqw(xmmA, zmmB);
5246 e->vpmovusqw(anyptr_gpA, zmmB);
5247 e->vpmovuswb(xmmA, xmmB);
5248 e->vpmovuswb(anyptr_gpA, xmmB);
5249 e->vpmovuswb(xmmA, ymmB);
5250 e->vpmovuswb(anyptr_gpA, ymmB);
5251 e->vpmovuswb(ymmA, zmmB);
5252 e->vpmovuswb(anyptr_gpA, zmmB);
5253 e->vpmovw2m(kA, xmmB);
5254 e->vpmovw2m(kA, ymmB);
5255 e->vpmovw2m(kA, zmmB);
5256 e->vpmovwb(xmmA, xmmB);
5257 e->vpmovwb(anyptr_gpA, xmmB);
5258 e->vpmovwb(xmmA, ymmB);
5259 e->vpmovwb(anyptr_gpA, ymmB);
5260 e->vpmovwb(ymmA, zmmB);
5261 e->vpmovwb(anyptr_gpA, zmmB);
5262 e->vpmovzxbd(xmmA, xmmB);
5263 e->vpmovzxbd(xmmA, anyptr_gpB);
5264 e->vpmovzxbd(ymmA, xmmB);
5265 e->vpmovzxbd(ymmA, anyptr_gpB);
5266 e->vpmovzxbd(zmmA, xmmB);
5267 e->vpmovzxbd(zmmA, anyptr_gpB);
5268 e->vpmovzxbq(xmmA, xmmB);
5269 e->vpmovzxbq(xmmA, anyptr_gpB);
5270 e->vpmovzxbq(ymmA, xmmB);
5271 e->vpmovzxbq(ymmA, anyptr_gpB);
5272 e->vpmovzxbq(zmmA, xmmB);
5273 e->vpmovzxbq(zmmA, anyptr_gpB);
5274 e->vpmovzxbw(xmmA, xmmB);
5275 e->vpmovzxbw(xmmA, anyptr_gpB);
5276 e->vpmovzxbw(ymmA, xmmB);
5277 e->vpmovzxbw(ymmA, anyptr_gpB);
5278 e->vpmovzxbw(zmmA, ymmB);
5279 e->vpmovzxbw(zmmA, anyptr_gpB);
5280 e->vpmovzxdq(xmmA, xmmB);
5281 e->vpmovzxdq(xmmA, anyptr_gpB);
5282 e->vpmovzxdq(ymmA, xmmB);
5283 e->vpmovzxdq(ymmA, anyptr_gpB);
5284 e->vpmovzxdq(zmmA, ymmB);
5285 e->vpmovzxdq(zmmA, anyptr_gpB);
5286 e->vpmovzxwd(xmmA, xmmB);
5287 e->vpmovzxwd(xmmA, anyptr_gpB);
5288 e->vpmovzxwd(ymmA, xmmB);
5289 e->vpmovzxwd(ymmA, anyptr_gpB);
5290 e->vpmovzxwd(zmmA, ymmB);
5291 e->vpmovzxwd(zmmA, anyptr_gpB);
5292 e->vpmovzxwq(xmmA, xmmB);
5293 e->vpmovzxwq(xmmA, anyptr_gpB);
5294 e->vpmovzxwq(ymmA, xmmB);
5295 e->vpmovzxwq(ymmA, anyptr_gpB);
5296 e->vpmovzxwq(zmmA, xmmB);
5297 e->vpmovzxwq(zmmA, anyptr_gpB);
5298 e->vpmuldq(xmmA, xmmB, xmmC);
5299 e->vpmuldq(xmmA, xmmB, anyptr_gpC);
5300 e->vpmuldq(ymmA, ymmB, ymmC);
5301 e->vpmuldq(ymmA, ymmB, anyptr_gpC);
5302 e->vpmuldq(zmmA, zmmB, zmmC);
5303 e->vpmuldq(zmmA, zmmB, anyptr_gpC);
5304 e->vpmulhrsw(xmmA, xmmB, xmmC);
5305 e->vpmulhrsw(xmmA, xmmB, anyptr_gpC);
5306 e->vpmulhrsw(ymmA, ymmB, ymmC);
5307 e->vpmulhrsw(ymmA, ymmB, anyptr_gpC);
5308 e->vpmulhrsw(zmmA, zmmB, zmmC);
5309 e->vpmulhrsw(zmmA, zmmB, anyptr_gpC);
5310 e->vpmulhuw(xmmA, xmmB, xmmC);
5311 e->vpmulhuw(xmmA, xmmB, anyptr_gpC);
5312 e->vpmulhuw(ymmA, ymmB, ymmC);
5313 e->vpmulhuw(ymmA, ymmB, anyptr_gpC);
5314 e->vpmulhuw(zmmA, zmmB, zmmC);
5315 e->vpmulhuw(zmmA, zmmB, anyptr_gpC);
5316 e->vpmulhw(xmmA, xmmB, xmmC);
5317 e->vpmulhw(xmmA, xmmB, anyptr_gpC);
5318 e->vpmulhw(ymmA, ymmB, ymmC);
5319 e->vpmulhw(ymmA, ymmB, anyptr_gpC);
5320 e->vpmulhw(zmmA, zmmB, zmmC);
5321 e->vpmulhw(zmmA, zmmB, anyptr_gpC);
5322 e->vpmulld(xmmA, xmmB, xmmC);
5323 e->vpmulld(xmmA, xmmB, anyptr_gpC);
5324 e->vpmulld(ymmA, ymmB, ymmC);
5325 e->vpmulld(ymmA, ymmB, anyptr_gpC);
5326 e->vpmulld(zmmA, zmmB, zmmC);
5327 e->vpmulld(zmmA, zmmB, anyptr_gpC);
5328 e->vpmullq(xmmA, xmmB, xmmC);
5329 e->vpmullq(xmmA, xmmB, anyptr_gpC);
5330 e->vpmullq(ymmA, ymmB, ymmC);
5331 e->vpmullq(ymmA, ymmB, anyptr_gpC);
5332 e->vpmullq(zmmA, zmmB, zmmC);
5333 e->vpmullq(zmmA, zmmB, anyptr_gpC);
5334 e->vpmullw(xmmA, xmmB, xmmC);
5335 e->vpmullw(xmmA, xmmB, anyptr_gpC);
5336 e->vpmullw(ymmA, ymmB, ymmC);
5337 e->vpmullw(ymmA, ymmB, anyptr_gpC);
5338 e->vpmullw(zmmA, zmmB, zmmC);
5339 e->vpmullw(zmmA, zmmB, anyptr_gpC);
5340 e->vpmultishiftqb(xmmA, xmmB, xmmC);
5341 e->vpmultishiftqb(xmmA, xmmB, anyptr_gpC);
5342 e->vpmultishiftqb(ymmA, ymmB, ymmC);
5343 e->vpmultishiftqb(ymmA, ymmB, anyptr_gpC);
5344 e->vpmultishiftqb(zmmA, zmmB, zmmC);
5345 e->vpmultishiftqb(zmmA, zmmB, anyptr_gpC);
5346 e->vpmuludq(xmmA, xmmB, xmmC);
5347 e->vpmuludq(xmmA, xmmB, anyptr_gpC);
5348 e->vpmuludq(ymmA, ymmB, ymmC);
5349 e->vpmuludq(ymmA, ymmB, anyptr_gpC);
5350 e->vpmuludq(zmmA, zmmB, zmmC);
5351 e->vpmuludq(zmmA, zmmB, anyptr_gpC);
5352 e->vpopcntd(zmmA, zmmB);
5353 e->vpopcntd(zmmA, anyptr_gpB);
5354 e->vpopcntq(zmmA, zmmB);
5355 e->vpopcntq(zmmA, anyptr_gpB);
5356 e->vpord(xmmA, xmmB, xmmC);
5357 e->vpord(xmmA, xmmB, anyptr_gpC);
5358 e->vpord(ymmA, ymmB, ymmC);
5359 e->vpord(ymmA, ymmB, anyptr_gpC);
5360 e->vpord(zmmA, zmmB, zmmC);
5361 e->vpord(zmmA, zmmB, anyptr_gpC);
5362 e->vporq(xmmA, xmmB, xmmC);
5363 e->vporq(xmmA, xmmB, anyptr_gpC);
5364 e->vporq(ymmA, ymmB, ymmC);
5365 e->vporq(ymmA, ymmB, anyptr_gpC);
5366 e->vporq(zmmA, zmmB, zmmC);
5367 e->vporq(zmmA, zmmB, anyptr_gpC);
5368 e->vprold(xmmA, xmmB, 0);
5369 e->vprold(xmmA, anyptr_gpB, 0);
5370 e->vprold(ymmA, ymmB, 0);
5371 e->vprold(ymmA, anyptr_gpB, 0);
5372 e->vprold(zmmA, zmmB, 0);
5373 e->vprold(zmmA, anyptr_gpB, 0);
5374 e->vprolq(xmmA, xmmB, 0);
5375 e->vprolq(xmmA, anyptr_gpB, 0);
5376 e->vprolq(ymmA, ymmB, 0);
5377 e->vprolq(ymmA, anyptr_gpB, 0);
5378 e->vprolq(zmmA, zmmB, 0);
5379 e->vprolq(zmmA, anyptr_gpB, 0);
5380 e->vprolvd(xmmA, xmmB, xmmC);
5381 e->vprolvd(xmmA, xmmB, anyptr_gpC);
5382 e->vprolvd(ymmA, ymmB, ymmC);
5383 e->vprolvd(ymmA, ymmB, anyptr_gpC);
5384 e->vprolvd(zmmA, zmmB, zmmC);
5385 e->vprolvd(zmmA, zmmB, anyptr_gpC);
5386 e->vprolvq(xmmA, xmmB, xmmC);
5387 e->vprolvq(xmmA, xmmB, anyptr_gpC);
5388 e->vprolvq(ymmA, ymmB, ymmC);
5389 e->vprolvq(ymmA, ymmB, anyptr_gpC);
5390 e->vprolvq(zmmA, zmmB, zmmC);
5391 e->vprolvq(zmmA, zmmB, anyptr_gpC);
5392 e->vprord(xmmA, xmmB, 0);
5393 e->vprord(xmmA, anyptr_gpB, 0);
5394 e->vprord(ymmA, ymmB, 0);
5395 e->vprord(ymmA, anyptr_gpB, 0);
5396 e->vprord(zmmA, zmmB, 0);
5397 e->vprord(zmmA, anyptr_gpB, 0);
5398 e->vprorq(xmmA, xmmB, 0);
5399 e->vprorq(xmmA, anyptr_gpB, 0);
5400 e->vprorq(ymmA, ymmB, 0);
5401 e->vprorq(ymmA, anyptr_gpB, 0);
5402 e->vprorq(zmmA, zmmB, 0);
5403 e->vprorq(zmmA, anyptr_gpB, 0);
5404 e->vprorvd(xmmA, xmmB, xmmC);
5405 e->vprorvd(xmmA, xmmB, anyptr_gpC);
5406 e->vprorvd(ymmA, ymmB, ymmC);
5407 e->vprorvd(ymmA, ymmB, anyptr_gpC);
5408 e->vprorvd(zmmA, zmmB, zmmC);
5409 e->vprorvd(zmmA, zmmB, anyptr_gpC);
5410 e->vprorvq(xmmA, xmmB, xmmC);
5411 e->vprorvq(xmmA, xmmB, anyptr_gpC);
5412 e->vprorvq(ymmA, ymmB, ymmC);
5413 e->vprorvq(ymmA, ymmB, anyptr_gpC);
5414 e->vprorvq(zmmA, zmmB, zmmC);
5415 e->vprorvq(zmmA, zmmB, anyptr_gpC);
5416 e->vpsadbw(xmmA, xmmB, xmmC);
5417 e->vpsadbw(xmmA, xmmB, anyptr_gpC);
5418 e->vpsadbw(ymmA, ymmB, ymmC);
5419 e->vpsadbw(ymmA, ymmB, anyptr_gpC);
5420 e->vpsadbw(zmmA, zmmB, zmmC);
5421 e->vpsadbw(zmmA, zmmB, anyptr_gpC);
5422 e->vpscatterdd(vx_ptr, xmmB);
5423 e->vpscatterdd(vy_ptr, ymmB);
5424 e->vpscatterdd(vz_ptr, zmmB);
5425 e->vpscatterdq(vx_ptr, xmmB);
5426 e->vpscatterdq(vy_ptr, ymmB);
5427 e->vpscatterdq(vz_ptr, zmmB);
5428 e->vpscatterqd(vx_ptr, xmmB);
5429 e->vpscatterqd(vy_ptr, xmmB);
5430 e->vpscatterqd(vz_ptr, ymmB);
5431 e->vpscatterqq(vx_ptr, xmmB);
5432 e->vpscatterqq(vy_ptr, ymmB);
5433 e->vpscatterqq(vz_ptr, zmmB);
5434 e->vpshufb(xmmA, xmmB, xmmC);
5435 e->vpshufb(xmmA, xmmB, anyptr_gpC);
5436 e->vpshufb(ymmA, ymmB, ymmC);
5437 e->vpshufb(ymmA, ymmB, anyptr_gpC);
5438 e->vpshufb(zmmA, zmmB, zmmC);
5439 e->vpshufb(zmmA, zmmB, anyptr_gpC);
5440 e->vpshufd(xmmA, xmmB, 0);
5441 e->vpshufd(xmmA, anyptr_gpB, 0);
5442 e->vpshufd(ymmA, ymmB, 0);
5443 e->vpshufd(ymmA, anyptr_gpB, 0);
5444 e->vpshufd(zmmA, zmmB, 0);
5445 e->vpshufd(zmmA, anyptr_gpB, 0);
5446 e->vpshufhw(xmmA, xmmB, 0);
5447 e->vpshufhw(xmmA, anyptr_gpB, 0);
5448 e->vpshufhw(ymmA, ymmB, 0);
5449 e->vpshufhw(ymmA, anyptr_gpB, 0);
5450 e->vpshufhw(zmmA, zmmB, 0);
5451 e->vpshufhw(zmmA, anyptr_gpB, 0);
5452 e->vpshuflw(xmmA, xmmB, 0);
5453 e->vpshuflw(xmmA, anyptr_gpB, 0);
5454 e->vpshuflw(ymmA, ymmB, 0);
5455 e->vpshuflw(ymmA, anyptr_gpB, 0);
5456 e->vpshuflw(zmmA, zmmB, 0);
5457 e->vpshuflw(zmmA, anyptr_gpB, 0);
5458 e->vpslld(xmmA, xmmB, xmmC);
5459 e->vpslld(xmmA, xmmB, anyptr_gpC);
5460 e->vpslld(xmmA, xmmB, 0);
5461 e->vpslld(xmmA, anyptr_gpB, 0);
5462 e->vpslld(ymmA, ymmB, xmmC);
5463 e->vpslld(ymmA, ymmB, anyptr_gpC);
5464 e->vpslld(ymmA, ymmB, 0);
5465 e->vpslld(ymmA, anyptr_gpB, 0);
5466 e->vpslld(zmmA, zmmB, xmmC);
5467 e->vpslld(zmmA, zmmB, anyptr_gpC);
5468 e->vpslld(zmmA, zmmB, 0);
5469 e->vpslld(zmmA, anyptr_gpB, 0);
5470 e->vpslldq(xmmA, xmmB, 0);
5471 e->vpslldq(xmmA, anyptr_gpB, 0);
5472 e->vpslldq(ymmA, ymmB, 0);
5473 e->vpslldq(ymmA, anyptr_gpB, 0);
5474 e->vpslldq(zmmA, zmmB, 0);
5475 e->vpslldq(zmmA, anyptr_gpB, 0);
5476 e->vpsllq(xmmA, xmmB, xmmC);
5477 e->vpsllq(xmmA, xmmB, anyptr_gpC);
5478 e->vpsllq(xmmA, xmmB, 0);
5479 e->vpsllq(xmmA, anyptr_gpB, 0);
5480 e->vpsllq(ymmA, ymmB, xmmC);
5481 e->vpsllq(ymmA, ymmB, anyptr_gpC);
5482 e->vpsllq(ymmA, ymmB, 0);
5483 e->vpsllq(ymmA, anyptr_gpB, 0);
5484 e->vpsllq(zmmA, zmmB, xmmC);
5485 e->vpsllq(zmmA, zmmB, anyptr_gpC);
5486 e->vpsllq(zmmA, zmmB, 0);
5487 e->vpsllq(zmmA, anyptr_gpB, 0);
5488 e->vpsllvd(xmmA, xmmB, xmmC);
5489 e->vpsllvd(xmmA, xmmB, anyptr_gpC);
5490 e->vpsllvd(ymmA, ymmB, ymmC);
5491 e->vpsllvd(ymmA, ymmB, anyptr_gpC);
5492 e->vpsllvd(zmmA, zmmB, zmmC);
5493 e->vpsllvd(zmmA, zmmB, anyptr_gpC);
5494 e->vpsllvq(xmmA, xmmB, xmmC);
5495 e->vpsllvq(xmmA, xmmB, anyptr_gpC);
5496 e->vpsllvq(ymmA, ymmB, ymmC);
5497 e->vpsllvq(ymmA, ymmB, anyptr_gpC);
5498 e->vpsllvq(zmmA, zmmB, zmmC);
5499 e->vpsllvq(zmmA, zmmB, anyptr_gpC);
5500 e->vpsllvw(xmmA, xmmB, xmmC);
5501 e->vpsllvw(xmmA, xmmB, anyptr_gpC);
5502 e->vpsllvw(ymmA, ymmB, ymmC);
5503 e->vpsllvw(ymmA, ymmB, anyptr_gpC);
5504 e->vpsllvw(zmmA, zmmB, zmmC);
5505 e->vpsllvw(zmmA, zmmB, anyptr_gpC);
5506 e->vpsllw(xmmA, xmmB, xmmC);
5507 e->vpsllw(xmmA, xmmB, anyptr_gpC);
5508 e->vpsllw(xmmA, xmmB, 0);
5509 e->vpsllw(xmmA, anyptr_gpB, 0);
5510 e->vpsllw(ymmA, ymmB, xmmC);
5511 e->vpsllw(ymmA, ymmB, anyptr_gpC);
5512 e->vpsllw(ymmA, ymmB, 0);
5513 e->vpsllw(ymmA, anyptr_gpB, 0);
5514 e->vpsllw(zmmA, zmmB, xmmC);
5515 e->vpsllw(zmmA, zmmB, anyptr_gpC);
5516 e->vpsllw(zmmA, zmmB, 0);
5517 e->vpsllw(zmmA, anyptr_gpB, 0);
5518 e->vpsrad(xmmA, xmmB, xmmC);
5519 e->vpsrad(xmmA, xmmB, anyptr_gpC);
5520 e->vpsrad(xmmA, xmmB, 0);
5521 e->vpsrad(xmmA, anyptr_gpB, 0);
5522 e->vpsrad(ymmA, ymmB, xmmC);
5523 e->vpsrad(ymmA, ymmB, anyptr_gpC);
5524 e->vpsrad(ymmA, ymmB, 0);
5525 e->vpsrad(ymmA, anyptr_gpB, 0);
5526 e->vpsrad(zmmA, zmmB, xmmC);
5527 e->vpsrad(zmmA, zmmB, anyptr_gpC);
5528 e->vpsrad(zmmA, zmmB, 0);
5529 e->vpsrad(zmmA, anyptr_gpB, 0);
5530 e->vpsraq(xmmA, xmmB, xmmC);
5531 e->vpsraq(xmmA, xmmB, anyptr_gpC);
5532 e->vpsraq(xmmA, xmmB, 0);
5533 e->vpsraq(xmmA, anyptr_gpB, 0);
5534 e->vpsraq(ymmA, ymmB, xmmC);
5535 e->vpsraq(ymmA, ymmB, anyptr_gpC);
5536 e->vpsraq(ymmA, ymmB, 0);
5537 e->vpsraq(ymmA, anyptr_gpB, 0);
5538 e->vpsraq(zmmA, zmmB, xmmC);
5539 e->vpsraq(zmmA, zmmB, anyptr_gpC);
5540 e->vpsraq(zmmA, zmmB, 0);
5541 e->vpsraq(zmmA, anyptr_gpB, 0);
5542 e->vpsravd(xmmA, xmmB, xmmC);
5543 e->vpsravd(xmmA, xmmB, anyptr_gpC);
5544 e->vpsravd(ymmA, ymmB, ymmC);
5545 e->vpsravd(ymmA, ymmB, anyptr_gpC);
5546 e->vpsravd(zmmA, zmmB, zmmC);
5547 e->vpsravd(zmmA, zmmB, anyptr_gpC);
5548 e->vpsravq(xmmA, xmmB, xmmC);
5549 e->vpsravq(xmmA, xmmB, anyptr_gpC);
5550 e->vpsravq(ymmA, ymmB, ymmC);
5551 e->vpsravq(ymmA, ymmB, anyptr_gpC);
5552 e->vpsravq(zmmA, zmmB, zmmC);
5553 e->vpsravq(zmmA, zmmB, anyptr_gpC);
5554 e->vpsravw(xmmA, xmmB, xmmC);
5555 e->vpsravw(xmmA, xmmB, anyptr_gpC);
5556 e->vpsravw(ymmA, ymmB, ymmC);
5557 e->vpsravw(ymmA, ymmB, anyptr_gpC);
5558 e->vpsravw(zmmA, zmmB, zmmC);
5559 e->vpsravw(zmmA, zmmB, anyptr_gpC);
5560 e->vpsraw(xmmA, xmmB, xmmC);
5561 e->vpsraw(xmmA, xmmB, anyptr_gpC);
5562 e->vpsraw(xmmA, xmmB, 0);
5563 e->vpsraw(xmmA, anyptr_gpB, 0);
5564 e->vpsraw(ymmA, ymmB, xmmC);
5565 e->vpsraw(ymmA, ymmB, anyptr_gpC);
5566 e->vpsraw(ymmA, ymmB, 0);
5567 e->vpsraw(ymmA, anyptr_gpB, 0);
5568 e->vpsraw(zmmA, zmmB, xmmC);
5569 e->vpsraw(zmmA, zmmB, anyptr_gpC);
5570 e->vpsraw(zmmA, zmmB, 0);
5571 e->vpsraw(zmmA, anyptr_gpB, 0);
5572 e->vpsrld(xmmA, xmmB, xmmC);
5573 e->vpsrld(xmmA, xmmB, anyptr_gpC);
5574 e->vpsrld(xmmA, xmmB, 0);
5575 e->vpsrld(xmmA, anyptr_gpB, 0);
5576 e->vpsrld(ymmA, ymmB, xmmC);
5577 e->vpsrld(ymmA, ymmB, anyptr_gpC);
5578 e->vpsrld(ymmA, ymmB, 0);
5579 e->vpsrld(ymmA, anyptr_gpB, 0);
5580 e->vpsrld(zmmA, zmmB, xmmC);
5581 e->vpsrld(zmmA, zmmB, anyptr_gpC);
5582 e->vpsrld(zmmA, zmmB, 0);
5583 e->vpsrld(zmmA, anyptr_gpB, 0);
5584 e->vpsrldq(xmmA, xmmB, 0);
5585 e->vpsrldq(xmmA, anyptr_gpB, 0);
5586 e->vpsrldq(ymmA, ymmB, 0);
5587 e->vpsrldq(ymmA, anyptr_gpB, 0);
5588 e->vpsrldq(zmmA, zmmB, 0);
5589 e->vpsrldq(zmmA, anyptr_gpB, 0);
5590 e->vpsrlq(xmmA, xmmB, xmmC);
5591 e->vpsrlq(xmmA, xmmB, anyptr_gpC);
5592 e->vpsrlq(xmmA, xmmB, 0);
5593 e->vpsrlq(xmmA, anyptr_gpB, 0);
5594 e->vpsrlq(ymmA, ymmB, xmmC);
5595 e->vpsrlq(ymmA, ymmB, anyptr_gpC);
5596 e->vpsrlq(ymmA, ymmB, 0);
5597 e->vpsrlq(ymmA, anyptr_gpB, 0);
5598 e->vpsrlq(zmmA, zmmB, xmmC);
5599 e->vpsrlq(zmmA, zmmB, anyptr_gpC);
5600 e->vpsrlq(zmmA, zmmB, 0);
5601 e->vpsrlq(zmmA, anyptr_gpB, 0);
5602 e->vpsrlvd(xmmA, xmmB, xmmC);
5603 e->vpsrlvd(xmmA, xmmB, anyptr_gpC);
5604 e->vpsrlvd(ymmA, ymmB, ymmC);
5605 e->vpsrlvd(ymmA, ymmB, anyptr_gpC);
5606 e->vpsrlvd(zmmA, zmmB, zmmC);
5607 e->vpsrlvd(zmmA, zmmB, anyptr_gpC);
5608 e->vpsrlvq(xmmA, xmmB, xmmC);
5609 e->vpsrlvq(xmmA, xmmB, anyptr_gpC);
5610 e->vpsrlvq(ymmA, ymmB, ymmC);
5611 e->vpsrlvq(ymmA, ymmB, anyptr_gpC);
5612 e->vpsrlvq(zmmA, zmmB, zmmC);
5613 e->vpsrlvq(zmmA, zmmB, anyptr_gpC);
5614 e->vpsrlvw(xmmA, xmmB, xmmC);
5615 e->vpsrlvw(xmmA, xmmB, anyptr_gpC);
5616 e->vpsrlvw(ymmA, ymmB, ymmC);
5617 e->vpsrlvw(ymmA, ymmB, anyptr_gpC);
5618 e->vpsrlvw(zmmA, zmmB, zmmC);
5619 e->vpsrlvw(zmmA, zmmB, anyptr_gpC);
5620 e->vpsrlw(xmmA, xmmB, xmmC);
5621 e->vpsrlw(xmmA, xmmB, anyptr_gpC);
5622 e->vpsrlw(xmmA, xmmB, 0);
5623 e->vpsrlw(xmmA, anyptr_gpB, 0);
5624 e->vpsrlw(ymmA, ymmB, xmmC);
5625 e->vpsrlw(ymmA, ymmB, anyptr_gpC);
5626 e->vpsrlw(ymmA, ymmB, 0);
5627 e->vpsrlw(ymmA, anyptr_gpB, 0);
5628 e->vpsrlw(zmmA, zmmB, xmmC);
5629 e->vpsrlw(zmmA, zmmB, anyptr_gpC);
5630 e->vpsrlw(zmmA, zmmB, 0);
5631 e->vpsrlw(zmmA, anyptr_gpB, 0);
5632 e->vpsubb(xmmA, xmmB, xmmC);
5633 e->vpsubb(xmmA, xmmB, anyptr_gpC);
5634 e->vpsubb(ymmA, ymmB, ymmC);
5635 e->vpsubb(ymmA, ymmB, anyptr_gpC);
5636 e->vpsubb(zmmA, zmmB, zmmC);
5637 e->vpsubb(zmmA, zmmB, anyptr_gpC);
5638 e->vpsubd(xmmA, xmmB, xmmC);
5639 e->vpsubd(xmmA, xmmB, anyptr_gpC);
5640 e->vpsubd(ymmA, ymmB, ymmC);
5641 e->vpsubd(ymmA, ymmB, anyptr_gpC);
5642 e->vpsubd(zmmA, zmmB, zmmC);
5643 e->vpsubd(zmmA, zmmB, anyptr_gpC);
5644 e->vpsubq(xmmA, xmmB, xmmC);
5645 e->vpsubq(xmmA, xmmB, anyptr_gpC);
5646 e->vpsubq(ymmA, ymmB, ymmC);
5647 e->vpsubq(ymmA, ymmB, anyptr_gpC);
5648 e->vpsubq(zmmA, zmmB, zmmC);
5649 e->vpsubq(zmmA, zmmB, anyptr_gpC);
5650 e->vpsubsb(xmmA, xmmB, xmmC);
5651 e->vpsubsb(xmmA, xmmB, anyptr_gpC);
5652 e->vpsubsb(ymmA, ymmB, ymmC);
5653 e->vpsubsb(ymmA, ymmB, anyptr_gpC);
5654 e->vpsubsb(zmmA, zmmB, zmmC);
5655 e->vpsubsb(zmmA, zmmB, anyptr_gpC);
5656 e->vpsubsw(xmmA, xmmB, xmmC);
5657 e->vpsubsw(xmmA, xmmB, anyptr_gpC);
5658 e->vpsubsw(ymmA, ymmB, ymmC);
5659 e->vpsubsw(ymmA, ymmB, anyptr_gpC);
5660 e->vpsubsw(zmmA, zmmB, zmmC);
5661 e->vpsubsw(zmmA, zmmB, anyptr_gpC);
5662 e->vpsubusb(xmmA, xmmB, xmmC);
5663 e->vpsubusb(xmmA, xmmB, anyptr_gpC);
5664 e->vpsubusb(ymmA, ymmB, ymmC);
5665 e->vpsubusb(ymmA, ymmB, anyptr_gpC);
5666 e->vpsubusb(zmmA, zmmB, zmmC);
5667 e->vpsubusb(zmmA, zmmB, anyptr_gpC);
5668 e->vpsubusw(xmmA, xmmB, xmmC);
5669 e->vpsubusw(xmmA, xmmB, anyptr_gpC);
5670 e->vpsubusw(ymmA, ymmB, ymmC);
5671 e->vpsubusw(ymmA, ymmB, anyptr_gpC);
5672 e->vpsubusw(zmmA, zmmB, zmmC);
5673 e->vpsubusw(zmmA, zmmB, anyptr_gpC);
5674 e->vpsubw(xmmA, xmmB, xmmC);
5675 e->vpsubw(xmmA, xmmB, anyptr_gpC);
5676 e->vpsubw(ymmA, ymmB, ymmC);
5677 e->vpsubw(ymmA, ymmB, anyptr_gpC);
5678 e->vpsubw(zmmA, zmmB, zmmC);
5679 e->vpsubw(zmmA, zmmB, anyptr_gpC);
5680 e->vpternlogd(xmmA, xmmB, xmmC, 0);
5681 e->vpternlogd(xmmA, xmmB, anyptr_gpC, 0);
5682 e->vpternlogd(ymmA, ymmB, ymmC, 0);
5683 e->vpternlogd(ymmA, ymmB, anyptr_gpC, 0);
5684 e->vpternlogd(zmmA, zmmB, zmmC, 0);
5685 e->vpternlogd(zmmA, zmmB, anyptr_gpC, 0);
5686 e->vpternlogq(xmmA, xmmB, xmmC, 0);
5687 e->vpternlogq(xmmA, xmmB, anyptr_gpC, 0);
5688 e->vpternlogq(ymmA, ymmB, ymmC, 0);
5689 e->vpternlogq(ymmA, ymmB, anyptr_gpC, 0);
5690 e->vpternlogq(zmmA, zmmB, zmmC, 0);
5691 e->vpternlogq(zmmA, zmmB, anyptr_gpC, 0);
5692 e->vptestmb(kA, xmmB, xmmC);
5693 e->vptestmb(kA, xmmB, anyptr_gpC);
5694 e->vptestmb(kA, ymmB, ymmC);
5695 e->vptestmb(kA, ymmB, anyptr_gpC);
5696 e->vptestmb(kA, zmmB, zmmC);
5697 e->vptestmb(kA, zmmB, anyptr_gpC);
5698 e->vptestmd(kA, xmmB, xmmC);
5699 e->vptestmd(kA, xmmB, anyptr_gpC);
5700 e->vptestmd(kA, ymmB, ymmC);
5701 e->vptestmd(kA, ymmB, anyptr_gpC);
5702 e->vptestmd(kA, zmmB, zmmC);
5703 e->vptestmd(kA, zmmB, anyptr_gpC);
5704 e->vptestmq(kA, xmmB, xmmC);
5705 e->vptestmq(kA, xmmB, anyptr_gpC);
5706 e->vptestmq(kA, ymmB, ymmC);
5707 e->vptestmq(kA, ymmB, anyptr_gpC);
5708 e->vptestmq(kA, zmmB, zmmC);
5709 e->vptestmq(kA, zmmB, anyptr_gpC);
5710 e->vptestmw(kA, xmmB, xmmC);
5711 e->vptestmw(kA, xmmB, anyptr_gpC);
5712 e->vptestmw(kA, ymmB, ymmC);
5713 e->vptestmw(kA, ymmB, anyptr_gpC);
5714 e->vptestmw(kA, zmmB, zmmC);
5715 e->vptestmw(kA, zmmB, anyptr_gpC);
5716 e->vptestnmb(kA, xmmB, xmmC);
5717 e->vptestnmb(kA, xmmB, anyptr_gpC);
5718 e->vptestnmb(kA, ymmB, ymmC);
5719 e->vptestnmb(kA, ymmB, anyptr_gpC);
5720 e->vptestnmb(kA, zmmB, zmmC);
5721 e->vptestnmb(kA, zmmB, anyptr_gpC);
5722 e->vptestnmd(kA, xmmB, xmmC);
5723 e->vptestnmd(kA, xmmB, anyptr_gpC);
5724 e->vptestnmd(kA, ymmB, ymmC);
5725 e->vptestnmd(kA, ymmB, anyptr_gpC);
5726 e->vptestnmd(kA, zmmB, zmmC);
5727 e->vptestnmd(kA, zmmB, anyptr_gpC);
5728 e->vptestnmq(kA, xmmB, xmmC);
5729 e->vptestnmq(kA, xmmB, anyptr_gpC);
5730 e->vptestnmq(kA, ymmB, ymmC);
5731 e->vptestnmq(kA, ymmB, anyptr_gpC);
5732 e->vptestnmq(kA, zmmB, zmmC);
5733 e->vptestnmq(kA, zmmB, anyptr_gpC);
5734 e->vptestnmw(kA, xmmB, xmmC);
5735 e->vptestnmw(kA, xmmB, anyptr_gpC);
5736 e->vptestnmw(kA, ymmB, ymmC);
5737 e->vptestnmw(kA, ymmB, anyptr_gpC);
5738 e->vptestnmw(kA, zmmB, zmmC);
5739 e->vptestnmw(kA, zmmB, anyptr_gpC);
5740 e->vpunpckhbw(xmmA, xmmB, xmmC);
5741 e->vpunpckhbw(xmmA, xmmB, anyptr_gpC);
5742 e->vpunpckhbw(ymmA, ymmB, ymmC);
5743 e->vpunpckhbw(ymmA, ymmB, anyptr_gpC);
5744 e->vpunpckhbw(zmmA, zmmB, zmmC);
5745 e->vpunpckhbw(zmmA, zmmB, anyptr_gpC);
5746 e->vpunpckhdq(xmmA, xmmB, xmmC);
5747 e->vpunpckhdq(xmmA, xmmB, anyptr_gpC);
5748 e->vpunpckhdq(ymmA, ymmB, ymmC);
5749 e->vpunpckhdq(ymmA, ymmB, anyptr_gpC);
5750 e->vpunpckhdq(zmmA, zmmB, zmmC);
5751 e->vpunpckhdq(zmmA, zmmB, anyptr_gpC);
5752 e->vpunpckhqdq(xmmA, xmmB, xmmC);
5753 e->vpunpckhqdq(xmmA, xmmB, anyptr_gpC);
5754 e->vpunpckhqdq(ymmA, ymmB, ymmC);
5755 e->vpunpckhqdq(ymmA, ymmB, anyptr_gpC);
5756 e->vpunpckhqdq(zmmA, zmmB, zmmC);
5757 e->vpunpckhqdq(zmmA, zmmB, anyptr_gpC);
5758 e->vpunpckhwd(xmmA, xmmB, xmmC);
5759 e->vpunpckhwd(xmmA, xmmB, anyptr_gpC);
5760 e->vpunpckhwd(ymmA, ymmB, ymmC);
5761 e->vpunpckhwd(ymmA, ymmB, anyptr_gpC);
5762 e->vpunpckhwd(zmmA, zmmB, zmmC);
5763 e->vpunpckhwd(zmmA, zmmB, anyptr_gpC);
5764 e->vpunpcklbw(xmmA, xmmB, xmmC);
5765 e->vpunpcklbw(xmmA, xmmB, anyptr_gpC);
5766 e->vpunpcklbw(ymmA, ymmB, ymmC);
5767 e->vpunpcklbw(ymmA, ymmB, anyptr_gpC);
5768 e->vpunpcklbw(zmmA, zmmB, zmmC);
5769 e->vpunpcklbw(zmmA, zmmB, anyptr_gpC);
5770 e->vpunpckldq(xmmA, xmmB, xmmC);
5771 e->vpunpckldq(xmmA, xmmB, anyptr_gpC);
5772 e->vpunpckldq(ymmA, ymmB, ymmC);
5773 e->vpunpckldq(ymmA, ymmB, anyptr_gpC);
5774 e->vpunpckldq(zmmA, zmmB, zmmC);
5775 e->vpunpckldq(zmmA, zmmB, anyptr_gpC);
5776 e->vpunpcklqdq(xmmA, xmmB, xmmC);
5777 e->vpunpcklqdq(xmmA, xmmB, anyptr_gpC);
5778 e->vpunpcklqdq(ymmA, ymmB, ymmC);
5779 e->vpunpcklqdq(ymmA, ymmB, anyptr_gpC);
5780 e->vpunpcklqdq(zmmA, zmmB, zmmC);
5781 e->vpunpcklqdq(zmmA, zmmB, anyptr_gpC);
5782 e->vpunpcklwd(xmmA, xmmB, xmmC);
5783 e->vpunpcklwd(xmmA, xmmB, anyptr_gpC);
5784 e->vpunpcklwd(ymmA, ymmB, ymmC);
5785 e->vpunpcklwd(ymmA, ymmB, anyptr_gpC);
5786 e->vpunpcklwd(zmmA, zmmB, zmmC);
5787 e->vpunpcklwd(zmmA, zmmB, anyptr_gpC);
5788 e->vpxord(xmmA, xmmB, xmmC);
5789 e->vpxord(xmmA, xmmB, anyptr_gpC);
5790 e->vpxord(ymmA, ymmB, ymmC);
5791 e->vpxord(ymmA, ymmB, anyptr_gpC);
5792 e->vpxord(zmmA, zmmB, zmmC);
5793 e->vpxord(zmmA, zmmB, anyptr_gpC);
5794 e->vpxorq(xmmA, xmmB, xmmC);
5795 e->vpxorq(xmmA, xmmB, anyptr_gpC);
5796 e->vpxorq(ymmA, ymmB, ymmC);
5797 e->vpxorq(ymmA, ymmB, anyptr_gpC);
5798 e->vpxorq(zmmA, zmmB, zmmC);
5799 e->vpxorq(zmmA, zmmB, anyptr_gpC);
5800 e->vrangepd(xmmA, xmmB, xmmC, 0);
5801 e->vrangepd(xmmA, xmmB, anyptr_gpC, 0);
5802 e->vrangepd(ymmA, ymmB, ymmC, 0);
5803 e->vrangepd(ymmA, ymmB, anyptr_gpC, 0);
5804 e->vrangepd(zmmA, zmmB, zmmC, 0);
5805 e->vrangepd(zmmA, zmmB, anyptr_gpC, 0);
5806 e->vrangeps(xmmA, xmmB, xmmC, 0);
5807 e->vrangeps(xmmA, xmmB, anyptr_gpC, 0);
5808 e->vrangeps(ymmA, ymmB, ymmC, 0);
5809 e->vrangeps(ymmA, ymmB, anyptr_gpC, 0);
5810 e->vrangeps(zmmA, zmmB, zmmC, 0);
5811 e->vrangeps(zmmA, zmmB, anyptr_gpC, 0);
5812 e->vrangesd(xmmA, xmmB, xmmC, 0);
5813 e->vrangesd(xmmA, xmmB, anyptr_gpC, 0);
5814 e->vrangess(xmmA, xmmB, xmmC, 0);
5815 e->vrangess(xmmA, xmmB, anyptr_gpC, 0);
5816 e->vrcp14pd(xmmA, xmmB);
5817 e->vrcp14pd(xmmA, anyptr_gpB);
5818 e->vrcp14pd(ymmA, ymmB);
5819 e->vrcp14pd(ymmA, anyptr_gpB);
5820 e->vrcp14pd(zmmA, zmmB);
5821 e->vrcp14pd(zmmA, anyptr_gpB);
5822 e->vrcp14ps(xmmA, xmmB);
5823 e->vrcp14ps(xmmA, anyptr_gpB);
5824 e->vrcp14ps(ymmA, ymmB);
5825 e->vrcp14ps(ymmA, anyptr_gpB);
5826 e->vrcp14ps(zmmA, zmmB);
5827 e->vrcp14ps(zmmA, anyptr_gpB);
5828 e->vrcp14sd(xmmA, xmmB, xmmC);
5829 e->vrcp14sd(xmmA, xmmB, anyptr_gpC);
5830 e->vrcp14ss(xmmA, xmmB, xmmC);
5831 e->vrcp14ss(xmmA, xmmB, anyptr_gpC);
5832 e->vrcp28pd(zmmA, zmmB);
5833 e->vrcp28pd(zmmA, anyptr_gpB);
5834 e->vrcp28ps(zmmA, zmmB);
5835 e->vrcp28ps(zmmA, anyptr_gpB);
5836 e->vrcp28sd(xmmA, xmmB, xmmC);
5837 e->vrcp28sd(xmmA, xmmB, anyptr_gpC);
5838 e->vrcp28ss(xmmA, xmmB, xmmC);
5839 e->vrcp28ss(xmmA, xmmB, anyptr_gpC);
5840 e->vreducepd(xmmA, xmmB, 0);
5841 e->vreducepd(xmmA, anyptr_gpB, 0);
5842 e->vreducepd(ymmA, ymmB, 0);
5843 e->vreducepd(ymmA, anyptr_gpB, 0);
5844 e->vreducepd(zmmA, zmmB, 0);
5845 e->vreducepd(zmmA, anyptr_gpB, 0);
5846 e->vreduceps(xmmA, xmmB, 0);
5847 e->vreduceps(xmmA, anyptr_gpB, 0);
5848 e->vreduceps(ymmA, ymmB, 0);
5849 e->vreduceps(ymmA, anyptr_gpB, 0);
5850 e->vreduceps(zmmA, zmmB, 0);
5851 e->vreduceps(zmmA, anyptr_gpB, 0);
5852 e->vreducesd(xmmA, xmmB, xmmC, 0);
5853 e->vreducesd(xmmA, xmmB, anyptr_gpC, 0);
5854 e->vreducess(xmmA, xmmB, xmmC, 0);
5855 e->vreducess(xmmA, xmmB, anyptr_gpC, 0);
5856 e->vrndscalepd(xmmA, xmmB, 0);
5857 e->vrndscalepd(xmmA, anyptr_gpB, 0);
5858 e->vrndscalepd(ymmA, ymmB, 0);
5859 e->vrndscalepd(ymmA, anyptr_gpB, 0);
5860 e->vrndscalepd(zmmA, zmmB, 0);
5861 e->vrndscalepd(zmmA, anyptr_gpB, 0);
5862 e->vrndscaleps(xmmA, xmmB, 0);
5863 e->vrndscaleps(xmmA, anyptr_gpB, 0);
5864 e->vrndscaleps(ymmA, ymmB, 0);
5865 e->vrndscaleps(ymmA, anyptr_gpB, 0);
5866 e->vrndscaleps(zmmA, zmmB, 0);
5867 e->vrndscaleps(zmmA, anyptr_gpB, 0);
5868 e->vrndscalesd(xmmA, xmmB, xmmC, 0);
5869 e->vrndscalesd(xmmA, xmmB, anyptr_gpC, 0);
5870 e->vrndscaless(xmmA, xmmB, xmmC, 0);
5871 e->vrndscaless(xmmA, xmmB, anyptr_gpC, 0);
5872 e->vrsqrt14pd(xmmA, xmmB);
5873 e->vrsqrt14pd(xmmA, anyptr_gpB);
5874 e->vrsqrt14pd(ymmA, ymmB);
5875 e->vrsqrt14pd(ymmA, anyptr_gpB);
5876 e->vrsqrt14pd(zmmA, zmmB);
5877 e->vrsqrt14pd(zmmA, anyptr_gpB);
5878 e->vrsqrt14ps(xmmA, xmmB);
5879 e->vrsqrt14ps(xmmA, anyptr_gpB);
5880 e->vrsqrt14ps(ymmA, ymmB);
5881 e->vrsqrt14ps(ymmA, anyptr_gpB);
5882 e->vrsqrt14ps(zmmA, zmmB);
5883 e->vrsqrt14ps(zmmA, anyptr_gpB);
5884 e->vrsqrt14sd(xmmA, xmmB, xmmC);
5885 e->vrsqrt14sd(xmmA, xmmB, anyptr_gpC);
5886 e->vrsqrt14ss(xmmA, xmmB, xmmC);
5887 e->vrsqrt14ss(xmmA, xmmB, anyptr_gpC);
5888 e->vrsqrt28pd(zmmA, zmmB);
5889 e->vrsqrt28pd(zmmA, anyptr_gpB);
5890 e->vrsqrt28ps(zmmA, zmmB);
5891 e->vrsqrt28ps(zmmA, anyptr_gpB);
5892 e->vrsqrt28sd(xmmA, xmmB, xmmC);
5893 e->vrsqrt28sd(xmmA, xmmB, anyptr_gpC);
5894 e->vrsqrt28ss(xmmA, xmmB, xmmC);
5895 e->vrsqrt28ss(xmmA, xmmB, anyptr_gpC);
5896 e->vscalefpd(xmmA, xmmB, xmmC);
5897 e->vscalefpd(xmmA, xmmB, anyptr_gpC);
5898 e->vscalefpd(ymmA, ymmB, ymmC);
5899 e->vscalefpd(ymmA, ymmB, anyptr_gpC);
5900 e->vscalefpd(zmmA, zmmB, zmmC);
5901 e->vscalefpd(zmmA, zmmB, anyptr_gpC);
5902 e->vscalefps(xmmA, xmmB, xmmC);
5903 e->vscalefps(xmmA, xmmB, anyptr_gpC);
5904 e->vscalefps(ymmA, ymmB, ymmC);
5905 e->vscalefps(ymmA, ymmB, anyptr_gpC);
5906 e->vscalefps(zmmA, zmmB, zmmC);
5907 e->vscalefps(zmmA, zmmB, anyptr_gpC);
5908 e->vscalefsd(xmmA, xmmB, xmmC);
5909 e->vscalefsd(xmmA, xmmB, anyptr_gpC);
5910 e->vscalefss(xmmA, xmmB, xmmC);
5911 e->vscalefss(xmmA, xmmB, anyptr_gpC);
5912 e->vscatterdpd(vx_ptr, xmmB);
5913 e->vscatterdpd(vx_ptr, ymmB);
5914 e->vscatterdpd(vy_ptr, zmmB);
5915 e->vscatterdps(vx_ptr, xmmB);
5916 e->vscatterdps(vy_ptr, ymmB);
5917 e->vscatterdps(vz_ptr, zmmB);
5918 e->vscatterpf0dpd(vy_ptr);
5919 e->vscatterpf0dps(vz_ptr);
5920 e->vscatterpf0qpd(vz_ptr);
5921 e->vscatterpf0qps(vz_ptr);
5922 e->vscatterpf1dpd(vy_ptr);
5923 e->vscatterpf1dps(vz_ptr);
5924 e->vscatterpf1qpd(vz_ptr);
5925 e->vscatterpf1qps(vz_ptr);
5926 e->vscatterqpd(vx_ptr, xmmB);
5927 e->vscatterqpd(vy_ptr, ymmB);
5928 e->vscatterqpd(vz_ptr, zmmB);
5929 e->vscatterqps(vx_ptr, xmmB);
5930 e->vscatterqps(vy_ptr, xmmB);
5931 e->vscatterqps(vz_ptr, ymmB);
5932 e->vshuff32x4(ymmA, ymmB, ymmC, 0);
5933 e->vshuff32x4(ymmA, ymmB, anyptr_gpC, 0);
5934 e->vshuff32x4(zmmA, zmmB, zmmC, 0);
5935 e->vshuff32x4(zmmA, zmmB, anyptr_gpC, 0);
5936 e->vshuff64x2(ymmA, ymmB, ymmC, 0);
5937 e->vshuff64x2(ymmA, ymmB, anyptr_gpC, 0);
5938 e->vshuff64x2(zmmA, zmmB, zmmC, 0);
5939 e->vshuff64x2(zmmA, zmmB, anyptr_gpC, 0);
5940 e->vshufi32x4(ymmA, ymmB, ymmC, 0);
5941 e->vshufi32x4(ymmA, ymmB, anyptr_gpC, 0);
5942 e->vshufi32x4(zmmA, zmmB, zmmC, 0);
5943 e->vshufi32x4(zmmA, zmmB, anyptr_gpC, 0);
5944 e->vshufi64x2(ymmA, ymmB, ymmC, 0);
5945 e->vshufi64x2(ymmA, ymmB, anyptr_gpC, 0);
5946 e->vshufi64x2(zmmA, zmmB, zmmC, 0);
5947 e->vshufi64x2(zmmA, zmmB, anyptr_gpC, 0);
5948 e->vshufpd(xmmA, xmmB, xmmC, 0);
5949 e->vshufpd(xmmA, xmmB, anyptr_gpC, 0);
5950 e->vshufpd(ymmA, ymmB, ymmC, 0);
5951 e->vshufpd(ymmA, ymmB, anyptr_gpC, 0);
5952 e->vshufpd(zmmA, zmmB, zmmC, 0);
5953 e->vshufpd(zmmA, zmmB, anyptr_gpC, 0);
5954 e->vshufps(xmmA, xmmB, xmmC, 0);
5955 e->vshufps(xmmA, xmmB, anyptr_gpC, 0);
5956 e->vshufps(ymmA, ymmB, ymmC, 0);
5957 e->vshufps(ymmA, ymmB, anyptr_gpC, 0);
5958 e->vshufps(zmmA, zmmB, zmmC, 0);
5959 e->vshufps(zmmA, zmmB, anyptr_gpC, 0);
5960 e->vsqrtpd(xmmA, xmmB);
5961 e->vsqrtpd(xmmA, anyptr_gpB);
5962 e->vsqrtpd(ymmA, ymmB);
5963 e->vsqrtpd(ymmA, anyptr_gpB);
5964 e->vsqrtpd(zmmA, zmmB);
5965 e->vsqrtpd(zmmA, anyptr_gpB);
5966 e->vsqrtps(xmmA, xmmB);
5967 e->vsqrtps(xmmA, anyptr_gpB);
5968 e->vsqrtps(ymmA, ymmB);
5969 e->vsqrtps(ymmA, anyptr_gpB);
5970 e->vsqrtps(zmmA, zmmB);
5971 e->vsqrtps(zmmA, anyptr_gpB);
5972 e->vsqrtsd(xmmA, xmmB, xmmC);
5973 e->vsqrtsd(xmmA, xmmB, anyptr_gpC);
5974 e->vsqrtss(xmmA, xmmB, xmmC);
5975 e->vsqrtss(xmmA, xmmB, anyptr_gpC);
5976 e->vsubpd(xmmA, xmmB, xmmC);
5977 e->vsubpd(xmmA, xmmB, anyptr_gpC);
5978 e->vsubpd(ymmA, ymmB, ymmC);
5979 e->vsubpd(ymmA, ymmB, anyptr_gpC);
5980 e->vsubpd(zmmA, zmmB, zmmC);
5981 e->vsubpd(zmmA, zmmB, anyptr_gpC);
5982 e->vsubps(xmmA, xmmB, xmmC);
5983 e->vsubps(xmmA, xmmB, anyptr_gpC);
5984 e->vsubps(ymmA, ymmB, ymmC);
5985 e->vsubps(ymmA, ymmB, anyptr_gpC);
5986 e->vsubps(zmmA, zmmB, zmmC);
5987 e->vsubps(zmmA, zmmB, anyptr_gpC);
5988 e->vsubsd(xmmA, xmmB, xmmC);
5989 e->vsubsd(xmmA, xmmB, anyptr_gpC);
5990 e->vsubss(xmmA, xmmB, xmmC);
5991 e->vsubss(xmmA, xmmB, anyptr_gpC);
5992 e->vucomisd(xmmA, xmmB);
5993 e->vucomisd(xmmA, anyptr_gpB);
5994 e->vucomiss(xmmA, xmmB);
5995 e->vucomiss(xmmA, anyptr_gpB);
5996 e->vunpckhpd(xmmA, xmmB, xmmC);
5997 e->vunpckhpd(xmmA, xmmB, anyptr_gpC);
5998 e->vunpckhpd(ymmA, ymmB, ymmC);
5999 e->vunpckhpd(ymmA, ymmB, anyptr_gpC);
6000 e->vunpckhpd(zmmA, zmmB, zmmC);
6001 e->vunpckhpd(zmmA, zmmB, anyptr_gpC);
6002 e->vunpckhps(xmmA, xmmB, xmmC);
6003 e->vunpckhps(xmmA, xmmB, anyptr_gpC);
6004 e->vunpckhps(ymmA, ymmB, ymmC);
6005 e->vunpckhps(ymmA, ymmB, anyptr_gpC);
6006 e->vunpckhps(zmmA, zmmB, zmmC);
6007 e->vunpckhps(zmmA, zmmB, anyptr_gpC);
6008 e->vunpcklpd(xmmA, xmmB, xmmC);
6009 e->vunpcklpd(xmmA, xmmB, anyptr_gpC);
6010 e->vunpcklpd(ymmA, ymmB, ymmC);
6011 e->vunpcklpd(ymmA, ymmB, anyptr_gpC);
6012 e->vunpcklpd(zmmA, zmmB, zmmC);
6013 e->vunpcklpd(zmmA, zmmB, anyptr_gpC);
6014 e->vunpcklps(xmmA, xmmB, xmmC);
6015 e->vunpcklps(xmmA, xmmB, anyptr_gpC);
6016 e->vunpcklps(ymmA, ymmB, ymmC);
6017 e->vunpcklps(ymmA, ymmB, anyptr_gpC);
6018 e->vunpcklps(zmmA, zmmB, zmmC);
6019 e->vunpcklps(zmmA, zmmB, anyptr_gpC);
6020 e->vxorpd(xmmA, xmmB, xmmC);
6021 e->vxorpd(xmmA, xmmB, anyptr_gpC);
6022 e->vxorpd(ymmA, ymmB, ymmC);
6023 e->vxorpd(ymmA, ymmB, anyptr_gpC);
6024 e->vxorpd(zmmA, zmmB, zmmC);
6025 e->vxorpd(zmmA, zmmB, anyptr_gpC);
6026 e->vxorps(xmmA, xmmB, xmmC);
6027 e->vxorps(xmmA, xmmB, anyptr_gpC);
6028 e->vxorps(ymmA, ymmB, ymmC);
6029 e->vxorps(ymmA, ymmB, anyptr_gpC);
6030 e->vxorps(zmmA, zmmB, zmmC);
6031 e->vxorps(zmmA, zmmB, anyptr_gpC);
6032
6033 // Mark the end.
6034 e->nop();
6035 e->nop();
6036 e->nop();
6037 e->nop();
6038 }
6039
6040 } // {asmtest}
6041
6042 #endif // _ASMJIT_TEST_OPCODE_H
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include "./asmjit.h"
7
8 using namespace asmjit;
9
10 // ============================================================================
11 // [DumpCpu]
12 // ============================================================================
13
14 struct DumpCpuFeature {
15 uint32_t feature;
16 const char* name;
17 };
18
19 static const char* hostArch() noexcept {
20 switch (ArchInfo::kIdHost) {
21 case ArchInfo::kIdX86: return "X86";
22 case ArchInfo::kIdX64: return "X64";
23 case ArchInfo::kIdA32: return "ARM32";
24 case ArchInfo::kIdA64: return "ARM64";
25 default: return "Unknown";
26 }
27 }
28
29 static void dumpFeatures(const CpuInfo& cpu, const DumpCpuFeature* data, size_t count) noexcept {
30 for (size_t i = 0; i < count; i++)
31 if (cpu.hasFeature(data[i].feature))
32 INFO(" %s", data[i].name);
33 }
34
35 static void dumpCpu(void) noexcept {
36 const CpuInfo& cpu = CpuInfo::host();
37
38 INFO("Host CPU:");
39 INFO(" Vendor : %s", cpu.vendor());
40 INFO(" Brand : %s", cpu.brand());
41 INFO(" Model ID : %u", cpu.modelId());
42 INFO(" Brand ID : %u", cpu.brandId());
43 INFO(" Family ID : %u", cpu.familyId());
44 INFO(" Stepping : %u", cpu.stepping());
45 INFO(" Processor Type : %u", cpu.processorType());
46 INFO(" Max logical Processors : %u", cpu.maxLogicalProcessors());
47 INFO(" Cache-Line Size : %u", cpu.cacheLineSize());
48 INFO(" HW-Thread Count : %u", cpu.hwThreadCount());
49 INFO("");
50
51 // --------------------------------------------------------------------------
52 // [X86]
53 // --------------------------------------------------------------------------
54
55 #if ASMJIT_ARCH_X86
56 static const DumpCpuFeature x86FeaturesList[] = {
57 { x86::Features::kNX , "NX" },
58 { x86::Features::kMT , "MT" },
59 { x86::Features::k3DNOW , "3DNOW" },
60 { x86::Features::k3DNOW2 , "3DNOW2" },
61 { x86::Features::kADX , "ADX" },
62 { x86::Features::kAESNI , "AESNI" },
63 { x86::Features::kALTMOVCR8 , "ALTMOVCR8" },
64 { x86::Features::kAVX , "AVX" },
65 { x86::Features::kAVX2 , "AVX2" },
66 { x86::Features::kAVX512_4FMAPS , "AVX512_4FMAPS" },
67 { x86::Features::kAVX512_4VNNIW , "AVX512_4VNNIW" },
68 { x86::Features::kAVX512_BITALG , "AVX512_BITALG" },
69 { x86::Features::kAVX512_BW , "AVX512_BW" },
70 { x86::Features::kAVX512_CDI , "AVX512_CDI" },
71 { x86::Features::kAVX512_DQ , "AVX512_DQ" },
72 { x86::Features::kAVX512_ERI , "AVX512_ERI" },
73 { x86::Features::kAVX512_F , "AVX512_F" },
74 { x86::Features::kAVX512_IFMA , "AVX512_IFMA" },
75 { x86::Features::kAVX512_PFI , "AVX512_PFI" },
76 { x86::Features::kAVX512_VBMI , "AVX512_VBMI" },
77 { x86::Features::kAVX512_VBMI2 , "AVX512_VBMI2" },
78 { x86::Features::kAVX512_VL , "AVX512_VL" },
79 { x86::Features::kAVX512_VNNI , "AVX512_VNNI" },
80 { x86::Features::kAVX512_VPOPCNTDQ, "AVX512_VPOPCNTDQ" },
81 { x86::Features::kBMI , "BMI" },
82 { x86::Features::kBMI2 , "BMI2" },
83 { x86::Features::kCLFLUSH , "CLFLUSH" },
84 { x86::Features::kCLFLUSHOPT , "CLFLUSHOPT" },
85 { x86::Features::kCLWB , "CLWB" },
86 { x86::Features::kCLZERO , "CLZERO" },
87 { x86::Features::kCMOV , "CMOV" },
88 { x86::Features::kCMPXCHG16B , "CMPXCHG16B" },
89 { x86::Features::kCMPXCHG8B , "CMPXCHG8B" },
90 { x86::Features::kERMS , "ERMS" },
91 { x86::Features::kF16C , "F16C" },
92 { x86::Features::kFMA , "FMA" },
93 { x86::Features::kFMA4 , "FMA4" },
94 { x86::Features::kFPU , "FPU" },
95 { x86::Features::kFSGSBASE , "FSGSBASE" },
96 { x86::Features::kFXSR , "FXSR" },
97 { x86::Features::kFXSROPT , "FXSROPT" },
98 { x86::Features::kGEODE , "GEODE" },
99 { x86::Features::kGFNI , "GFNI" },
100 { x86::Features::kHLE , "HLE" },
101 { x86::Features::kI486 , "I486" },
102 { x86::Features::kLAHFSAHF , "LAHFSAHF" },
103 { x86::Features::kLWP , "LWP" },
104 { x86::Features::kLZCNT , "LZCNT" },
105 { x86::Features::kMMX , "MMX" },
106 { x86::Features::kMMX2 , "MMX2" },
107 { x86::Features::kMONITOR , "MONITOR" },
108 { x86::Features::kMONITORX , "MONITORX" },
109 { x86::Features::kMOVBE , "MOVBE" },
110 { x86::Features::kMPX , "MPX" },
111 { x86::Features::kMSR , "MSR" },
112 { x86::Features::kMSSE , "MSSE" },
113 { x86::Features::kOSXSAVE , "OSXSAVE" },
114 { x86::Features::kPCLMULQDQ , "PCLMULQDQ" },
115 { x86::Features::kPCOMMIT , "PCOMMIT" },
116 { x86::Features::kPOPCNT , "POPCNT" },
117 { x86::Features::kPREFETCHW , "PREFETCHW" },
118 { x86::Features::kPREFETCHWT1 , "PREFETCHWT1" },
119 { x86::Features::kRDRAND , "RDRAND" },
120 { x86::Features::kRDSEED , "RDSEED" },
121 { x86::Features::kRDTSC , "RDTSC" },
122 { x86::Features::kRDTSCP , "RDTSCP" },
123 { x86::Features::kRTM , "RTM" },
124 { x86::Features::kSHA , "SHA" },
125 { x86::Features::kSKINIT , "SKINIT" },
126 { x86::Features::kSMAP , "SMAP" },
127 { x86::Features::kSMEP , "SMEP" },
128 { x86::Features::kSMX , "SMX" },
129 { x86::Features::kSSE , "SSE" },
130 { x86::Features::kSSE2 , "SSE2" },
131 { x86::Features::kSSE3 , "SSE3" },
132 { x86::Features::kSSE4_1 , "SSE4.1" },
133 { x86::Features::kSSE4_2 , "SSE4.2" },
134 { x86::Features::kSSE4A , "SSE4A" },
135 { x86::Features::kSSSE3 , "SSSE3" },
136 { x86::Features::kSVM , "SVM" },
137 { x86::Features::kTBM , "TBM" },
138 { x86::Features::kTSX , "TSX" },
139 { x86::Features::kVAES , "VAES" },
140 { x86::Features::kVMX , "VMX" },
141 { x86::Features::kVPCLMULQDQ , "VPCLMULQDQ" },
142 { x86::Features::kXOP , "XOP" },
143 { x86::Features::kXSAVE , "XSAVE" },
144 { x86::Features::kXSAVEC , "XSAVEC" },
145 { x86::Features::kXSAVEOPT , "XSAVEOPT" },
146 { x86::Features::kXSAVES , "XSAVES" }
147 };
148
149 INFO("X86 Features:");
150 dumpFeatures(cpu, x86FeaturesList, ASMJIT_ARRAY_SIZE(x86FeaturesList));
151 INFO("");
152 #endif
153
154 // --------------------------------------------------------------------------
155 // [ARM]
156 // --------------------------------------------------------------------------
157
158 #if ASMJIT_ARCH_ARM
159 static const DumpCpuFeature armFeaturesList[] = {
160 { arm::Features::kARMv6 , "ARMv6" },
161 { arm::Features::kARMv7 , "ARMv7" },
162 { arm::Features::kARMv8 , "ARMv8" },
163 { arm::Features::kTHUMB , "THUMB" },
164 { arm::Features::kTHUMBv2 , "THUMBv2" },
165 { arm::Features::kVFP2 , "VFPv2" },
166 { arm::Features::kVFP3 , "VFPv3" },
167 { arm::Features::kVFP4 , "VFPv4" },
168 { arm::Features::kVFP_D32 , "VFP D32" },
169 { arm::Features::kNEON , "NEON" },
170 { arm::Features::kDSP , "DSP" },
171 { arm::Features::kIDIV , "IDIV" },
172 { arm::Features::kAES , "AES" },
173 { arm::Features::kCRC32 , "CRC32" },
174 { arm::Features::kSHA1 , "SHA1" },
175 { arm::Features::kSHA256 , "SHA256" },
176 { arm::Features::kATOMIC64 , "ATOMIC64" }
177 };
178
179 INFO("ARM Features:");
180 dumpFeatures(cpu, armFeaturesList, ASMJIT_ARRAY_SIZE(armFeaturesList));
181 INFO("");
182 #endif
183 }
184
185 // ============================================================================
186 // [DumpSizeOf]
187 // ============================================================================
188
189 static void dumpSizeOf(void) noexcept {
190 #define DUMP_TYPE(...) \
191 INFO(" %-26s: %u", #__VA_ARGS__, uint32_t(sizeof(__VA_ARGS__)))
192
193 INFO("Size of C++ types:");
194 DUMP_TYPE(int8_t);
195 DUMP_TYPE(int16_t);
196 DUMP_TYPE(int32_t);
197 DUMP_TYPE(int64_t);
198 DUMP_TYPE(int);
199 DUMP_TYPE(long);
200 DUMP_TYPE(size_t);
201 DUMP_TYPE(intptr_t);
202 DUMP_TYPE(float);
203 DUMP_TYPE(double);
204 DUMP_TYPE(void*);
205 INFO("");
206
207 INFO("Size of base classes:");
208 DUMP_TYPE(BaseAssembler);
209 DUMP_TYPE(BaseEmitter);
210 DUMP_TYPE(CodeBuffer);
211 DUMP_TYPE(CodeHolder);
212 DUMP_TYPE(ConstPool);
213 DUMP_TYPE(LabelEntry);
214 DUMP_TYPE(RelocEntry);
215 DUMP_TYPE(Section);
216 DUMP_TYPE(String);
217 DUMP_TYPE(Target);
218 DUMP_TYPE(Zone);
219 DUMP_TYPE(ZoneAllocator);
220 DUMP_TYPE(ZoneBitVector);
221 DUMP_TYPE(ZoneHashNode);
222 DUMP_TYPE(ZoneHash<ZoneHashNode>);
223 DUMP_TYPE(ZoneList<int>);
224 DUMP_TYPE(ZoneVector<int>);
225 INFO("");
226
227 INFO("Size of operand classes:");
228 DUMP_TYPE(Operand);
229 DUMP_TYPE(BaseReg);
230 DUMP_TYPE(BaseMem);
231 DUMP_TYPE(Imm);
232 DUMP_TYPE(Label);
233 INFO("");
234
235 INFO("Size of function classes:");
236 DUMP_TYPE(CallConv);
237 DUMP_TYPE(FuncFrame);
238 DUMP_TYPE(FuncValue);
239 DUMP_TYPE(FuncDetail);
240 DUMP_TYPE(FuncSignature);
241 DUMP_TYPE(FuncArgsAssignment);
242 INFO("");
243
244 #ifndef ASMJIT_NO_BUILDER
245 INFO("Size of builder classes:");
246 DUMP_TYPE(BaseBuilder);
247 DUMP_TYPE(BaseNode);
248 DUMP_TYPE(InstNode);
249 DUMP_TYPE(InstExNode);
250 DUMP_TYPE(AlignNode);
251 DUMP_TYPE(LabelNode);
252 DUMP_TYPE(EmbedDataNode);
253 DUMP_TYPE(EmbedLabelNode);
254 DUMP_TYPE(ConstPoolNode);
255 DUMP_TYPE(CommentNode);
256 DUMP_TYPE(SentinelNode);
257 INFO("");
258 #endif
259
260 #ifndef ASMJIT_NO_COMPILER
261 INFO("Size of compiler classes:");
262 DUMP_TYPE(BaseCompiler);
263 DUMP_TYPE(FuncNode);
264 DUMP_TYPE(FuncRetNode);
265 DUMP_TYPE(FuncCallNode);
266 INFO("");
267 #endif
268
269 #ifdef ASMJIT_BUILD_X86
270 INFO("Size of x86-specific classes:");
271 DUMP_TYPE(x86::Assembler);
272 #ifndef ASMJIT_NO_BUILDER
273 DUMP_TYPE(x86::Builder);
274 #endif
275 #ifndef ASMJIT_NO_COMPILER
276 DUMP_TYPE(x86::Compiler);
277 #endif
278 DUMP_TYPE(x86::InstDB::InstInfo);
279 DUMP_TYPE(x86::InstDB::CommonInfo);
280 DUMP_TYPE(x86::InstDB::OpSignature);
281 DUMP_TYPE(x86::InstDB::InstSignature);
282 INFO("");
283 #endif
284
285 #undef DUMP_TYPE
286 }
287
288 // ============================================================================
289 // [Main]
290 // ============================================================================
291
292 static void onBeforeRun(void) noexcept {
293 dumpCpu();
294 dumpSizeOf();
295 }
296
297 int main(int argc, const char* argv[]) {
298 #if defined(ASMJIT_BUILD_DEBUG)
299 const char buildType[] = "Debug";
300 #else
301 const char buildType[] = "Release";
302 #endif
303
304 INFO("AsmJit Unit-Test v%u.%u.%u [Arch=%s] [Mode=%s]\n\n",
305 unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
306 unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
307 unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF),
308 hostArch(),
309 buildType
310 );
311
312 return BrokenAPI::run(argc, argv, onBeforeRun);
313 }
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9
10 #include "./asmjit.h"
11
12 using namespace asmjit;
13
14 // Signature of the generated function.
15 typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
16
17 // This function works with both x86::Assembler and x86::Builder. It shows how
18 // `x86::Emitter` can be used to make your code more generic.
19 static void makeRawFunc(x86::Emitter* emitter) noexcept {
20 // Decide which registers will be mapped to function arguments. Try changing
21 // registers of `dst`, `src_a`, and `src_b` and see what happens in function's
22 // prolog and epilog.
23 x86::Gp dst = emitter->zax();
24 x86::Gp src_a = emitter->zcx();
25 x86::Gp src_b = emitter->zdx();
26
27 // Decide which vector registers to use. We use these to keep the code generic,
28 // you can switch to any other registers when needed.
29 x86::Xmm vec0 = x86::xmm0;
30 x86::Xmm vec1 = x86::xmm1;
31
32 // Create and initialize `FuncDetail` and `FuncFrame`.
33 FuncDetail func;
34 func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConv::kIdHost));
35
36 FuncFrame frame;
37 frame.init(func);
38
39 // Make XMM0 and XMM1 dirty. VEC group includes XMM|YMM|ZMM registers.
40 frame.addDirtyRegs(x86::xmm0, x86::xmm1);
41
42 FuncArgsAssignment args(&func); // Create arguments assignment context.
43 args.assignAll(dst, src_a, src_b); // Assign our registers to arguments.
44 args.updateFuncFrame(frame); // Reflect our args in FuncFrame.
45 frame.finalize();
46
47 // Emit prolog and allocate arguments to registers.
48 emitter->emitProlog(frame);
49 emitter->emitArgsAssignment(frame, args);
50
51 emitter->movdqu(vec0, x86::ptr(src_a)); // Load 4 ints from [src_a] to XMM0.
52 emitter->movdqu(vec1, x86::ptr(src_b)); // Load 4 ints from [src_b] to XMM1.
53
54 emitter->paddd(vec0, vec1); // Add 4 ints in XMM1 to XMM0.
55 emitter->movdqu(x86::ptr(dst), vec0); // Store the result to [dst].
56
57 // Emit epilog and return.
58 emitter->emitEpilog(frame);
59 }
60
61 // This function works with x86::Compiler, provided for comparison.
62 static void makeCompiledFunc(x86::Compiler* cc) noexcept {
63 x86::Gp dst = cc->newIntPtr();
64 x86::Gp src_a = cc->newIntPtr();
65 x86::Gp src_b = cc->newIntPtr();
66
67 x86::Xmm vec0 = cc->newXmm();
68 x86::Xmm vec1 = cc->newXmm();
69
70 cc->addFunc(FuncSignatureT<void, int*, const int*, const int*>(CallConv::kIdHost));
71 cc->setArg(0, dst);
72 cc->setArg(1, src_a);
73 cc->setArg(2, src_b);
74
75 cc->movdqu(vec0, x86::ptr(src_a));
76 cc->movdqu(vec1, x86::ptr(src_b));
77 cc->paddd(vec0, vec1);
78 cc->movdqu(x86::ptr(dst), vec0);
79 cc->endFunc();
80 }
81
82 static uint32_t testFunc(JitRuntime& rt, uint32_t emitterType) noexcept {
83 FileLogger logger(stdout);
84
85 CodeHolder code;
86 code.init(rt.codeInfo());
87 code.setLogger(&logger);
88
89 Error err = kErrorOk;
90 switch (emitterType) {
91 case BaseEmitter::kTypeAssembler: {
92 printf("Using x86::Assembler:\n");
93 x86::Assembler a(&code);
94 makeRawFunc(a.as<x86::Emitter>());
95 break;
96 }
97
98 case BaseEmitter::kTypeBuilder: {
99 printf("Using x86::Builder:\n");
100 x86::Builder cb(&code);
101 makeRawFunc(cb.as<x86::Emitter>());
102
103 err = cb.finalize();
104 if (err) {
105 printf("x86::Builder::finalize() failed: %s\n", DebugUtils::errorAsString(err));
106 return 1;
107 }
108 break;
109 }
110
111 case BaseEmitter::kTypeCompiler: {
112 printf("Using x86::Compiler:\n");
113 x86::Compiler cc(&code);
114 makeCompiledFunc(&cc);
115
116 err = cc.finalize();
117 if (err) {
118 printf("x86::Compiler::finalize() failed: %s\n", DebugUtils::errorAsString(err));
119 return 1;
120 }
121 break;
122 }
123 }
124
125 // Add the code generated to the runtime.
126 SumIntsFunc fn;
127 err = rt.add(&fn, &code);
128
129 if (err) {
130 printf("JitRuntime::add() failed: %s\n", DebugUtils::errorAsString(err));
131 return 1;
132 }
133
134 // Execute the generated function.
135 int inA[4] = { 4, 3, 2, 1 };
136 int inB[4] = { 1, 5, 2, 8 };
137 int out[4];
138 fn(out, inA, inB);
139
140 // Should print {5 8 4 9}.
141 printf("Result = { %d %d %d %d }\n\n", out[0], out[1], out[2], out[3]);
142
143 rt.release(fn);
144 return !(out[0] == 5 && out[1] == 8 && out[2] == 4 && out[3] == 9);
145 }
146
147 int main(int argc, char* argv[]) {
148 ASMJIT_UNUSED(argc);
149 ASMJIT_UNUSED(argv);
150
151 unsigned nFailed = 0;
152 JitRuntime rt;
153
154 nFailed += testFunc(rt, BaseEmitter::kTypeAssembler);
155 nFailed += testFunc(rt, BaseEmitter::kTypeBuilder);
156 nFailed += testFunc(rt, BaseEmitter::kTypeCompiler);
157
158 if (!nFailed)
159 printf("[PASSED] All tests passed\n");
160 else
161 printf("[FAILED] %u %s failed\n", nFailed, nFailed == 1 ? "test" : "tests");
162
163 return nFailed ? 1 : 0;
164 }
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 #include <setjmp.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10
11 #include "./asmjit.h"
12 #include "./asmjit_test_misc.h"
13
14 #ifdef _MSC_VER
15 // Interaction between '_setjmp' and C++ object destruction is non-portable.
16 #pragma warning(disable: 4611)
17 #endif
18
19 using namespace asmjit;
20
21 // ============================================================================
22 // [CmdLine]
23 // ============================================================================
24
25 class CmdLine {
26 public:
27 CmdLine(int argc, const char* const* argv) noexcept
28 : _argc(argc),
29 _argv(argv) {}
30
31 bool hasArg(const char* arg) noexcept {
32 for (int i = 1; i < _argc; i++)
33 if (strcmp(_argv[i], arg) == 0)
34 return true;
35 return false;
36 }
37
38 int _argc;
39 const char* const* _argv;
40 };
41
42 // ============================================================================
43 // [SimpleErrorHandler]
44 // ============================================================================
45
46 class SimpleErrorHandler : public ErrorHandler {
47 public:
48 SimpleErrorHandler() : _err(kErrorOk) {}
49 virtual void handleError(Error err, const char* message, BaseEmitter* origin) {
50 ASMJIT_UNUSED(origin);
51 _err = err;
52 _message.assignString(message);
53 }
54
55 Error _err;
56 String _message;
57 };
58
59 // ============================================================================
60 // [X86Test]
61 // ============================================================================
62
63 //! Base test interface for testing `x86::Compiler`.
64 class X86Test {
65 public:
66 X86Test(const char* name = nullptr) { _name.assignString(name); }
67 virtual ~X86Test() {}
68
69 inline const char* name() const { return _name.data(); }
70
71 virtual void compile(x86::Compiler& c) = 0;
72 virtual bool run(void* func, String& result, String& expect) = 0;
73
74 String _name;
75 };
76
77 // ============================================================================
78 // [X86TestApp]
79 // ============================================================================
80
81 class X86TestApp {
82 public:
83 Zone _zone;
84 ZoneAllocator _allocator;
85 ZoneVector<X86Test*> _tests;
86
87 unsigned _nFailed;
88 size_t _outputSize;
89
90 bool _verbose;
91 bool _dumpAsm;
92
93 X86TestApp() noexcept
94 : _zone(8096 - Zone::kBlockOverhead),
95 _allocator(&_zone),
96 _nFailed(0),
97 _outputSize(0),
98 _verbose(false),
99 _dumpAsm(false) {}
100
101 ~X86TestApp() noexcept {
102 for (X86Test* test : _tests)
103 delete test;
104 }
105
106 Error add(X86Test* test) noexcept{
107 return _tests.append(&_allocator, test);
108 }
109
110 template<class T>
111 inline void addT() { T::add(*this); }
112
113 int handleArgs(int argc, const char* const* argv);
114 void showInfo();
115 int run();
116 };
117
118 int X86TestApp::handleArgs(int argc, const char* const* argv) {
119 CmdLine cmd(argc, argv);
120
121 if (cmd.hasArg("--verbose")) _verbose = true;
122 if (cmd.hasArg("--dump-asm")) _dumpAsm = true;
123
124 return 0;
125 }
126
127 void X86TestApp::showInfo() {
128 printf("AsmJit Compiler Test-Suite v%u.%u.%u [Arch=%s]:\n",
129 unsigned((ASMJIT_LIBRARY_VERSION >> 16) ),
130 unsigned((ASMJIT_LIBRARY_VERSION >> 8) & 0xFF),
131 unsigned((ASMJIT_LIBRARY_VERSION ) & 0xFF),
132 sizeof(void*) == 8 ? "X64" : "X86");
133 printf(" [%s] Verbose (use --verbose to turn verbose output ON)\n", _verbose ? "x" : " ");
134 printf(" [%s] DumpAsm (use --dump-asm to turn assembler dumps ON)\n", _dumpAsm ? "x" : " ");
135 printf("\n");
136 }
137
138 int X86TestApp::run() {
139 #ifndef ASMJIT_NO_LOGGING
140 uint32_t kFormatFlags = FormatOptions::kFlagMachineCode |
141 FormatOptions::kFlagExplainImms |
142 FormatOptions::kFlagRegCasts |
143 FormatOptions::kFlagAnnotations |
144 FormatOptions::kFlagDebugPasses |
145 FormatOptions::kFlagDebugRA ;
146
147 FileLogger fileLogger(stdout);
148 fileLogger.addFlags(kFormatFlags);
149
150 StringLogger stringLogger;
151 stringLogger.addFlags(kFormatFlags);
152 #endif
153
154 for (X86Test* test : _tests) {
155 JitRuntime runtime;
156 CodeHolder code;
157 SimpleErrorHandler errorHandler;
158
159 code.init(runtime.codeInfo());
160 code.setErrorHandler(&errorHandler);
161
162 #ifndef ASMJIT_NO_LOGGING
163 if (_verbose) {
164 code.setLogger(&fileLogger);
165 }
166 else {
167 stringLogger.clear();
168 code.setLogger(&stringLogger);
169 }
170 #endif
171
172 printf("[Test] %s", test->name());
173
174 #ifndef ASMJIT_NO_LOGGING
175 if (_verbose) printf("\n");
176 #endif
177
178 x86::Compiler cc(&code);
179 test->compile(cc);
180
181 void* func = nullptr;
182 Error err = errorHandler._err;
183
184 if (!err)
185 err = cc.finalize();
186
187 #ifndef ASMJIT_NO_LOGGING
188 if (_dumpAsm) {
189 if (!_verbose) printf("\n");
190
191 String sb;
192 cc.dump(sb, kFormatFlags);
193 printf("%s", sb.data());
194 }
195 #endif
196
197 if (err == kErrorOk)
198 err = runtime.add(&func, &code);
199
200 if (_verbose)
201 fflush(stdout);
202
203 if (err == kErrorOk) {
204 _outputSize += code.codeSize();
205
206 StringTmp<128> result;
207 StringTmp<128> expect;
208
209 if (test->run(func, result, expect)) {
210 if (!_verbose) printf(" [OK]\n");
211 }
212 else {
213 if (!_verbose) printf(" [FAILED]\n");
214
215 #ifndef ASMJIT_NO_LOGGING
216 if (!_verbose) printf("%s", stringLogger.data());
217 #endif
218
219 printf("[Status]\n");
220 printf(" Returned: %s\n", result.data());
221 printf(" Expected: %s\n", expect.data());
222
223 _nFailed++;
224 }
225
226 if (_dumpAsm)
227 printf("\n");
228
229 runtime.release(func);
230 }
231 else {
232 if (!_verbose) printf(" [FAILED]\n");
233
234 #ifndef ASMJIT_NO_LOGGING
235 if (!_verbose) printf("%s", stringLogger.data());
236 #endif
237
238 printf("[Status]\n");
239 printf(" ERROR 0x%08X: %s\n", unsigned(err), errorHandler._message.data());
240
241 _nFailed++;
242 }
243 }
244
245 if (_nFailed == 0)
246 printf("\n[PASSED] All %u tests passed\n", unsigned(_tests.size()));
247 else
248 printf("\n[FAILED] %u %s of %u failed\n", _nFailed, _nFailed == 1 ? "test" : "tests", unsigned(_tests.size()));
249
250 printf(" OutputSize=%zu\n", _outputSize);
251
252 return _nFailed == 0 ? 0 : 1;
253 }
254
255 // ============================================================================
256 // [X86Test_AlignBase]
257 // ============================================================================
258
259 class X86Test_AlignBase : public X86Test {
260 public:
261 X86Test_AlignBase(uint32_t argCount, uint32_t alignment, bool preserveFP)
262 : _argCount(argCount),
263 _alignment(alignment),
264 _preserveFP(preserveFP) {
265 _name.assignFormat("AlignBase {NumArgs=%u Alignment=%u PreserveFP=%c}", argCount, alignment, preserveFP ? 'Y' : 'N');
266 }
267
268 static void add(X86TestApp& app) {
269 for (uint32_t i = 0; i <= 16; i++) {
270 for (uint32_t a = 16; a <= 32; a += 16) {
271 app.add(new X86Test_AlignBase(i, a, true));
272 app.add(new X86Test_AlignBase(i, a, false));
273 }
274 }
275 }
276
277 virtual void compile(x86::Compiler& cc) {
278 uint32_t i;
279 uint32_t argCount = _argCount;
280
281 FuncSignatureBuilder signature(CallConv::kIdHost);
282 signature.setRetT<int>();
283 for (i = 0; i < argCount; i++)
284 signature.addArgT<int>();
285
286 cc.addFunc(signature);
287 if (_preserveFP)
288 cc.func()->frame().setPreservedFP();
289
290 x86::Gp gpVar = cc.newIntPtr("gpVar");
291 x86::Gp gpSum;
292 x86::Mem stack = cc.newStack(_alignment, _alignment);
293
294 // Do a sum of arguments to verify a possible relocation when misaligned.
295 if (argCount) {
296 for (i = 0; i < argCount; i++) {
297 x86::Gp gpArg = cc.newInt32("gpArg%u", i);
298 cc.setArg(i, gpArg);
299
300 if (i == 0)
301 gpSum = gpArg;
302 else
303 cc.add(gpSum, gpArg);
304 }
305 }
306
307 // Check alignment of xmmVar (has to be 16).
308 cc.lea(gpVar, stack);
309 cc.and_(gpVar, _alignment - 1);
310
311 // Add a sum of all arguments to check if they are correct.
312 if (argCount)
313 cc.or_(gpVar.r32(), gpSum);
314
315 cc.ret(gpVar);
316 cc.endFunc();
317 }
318
319 virtual bool run(void* _func, String& result, String& expect) {
320 typedef unsigned int U;
321
322 typedef U (*Func0)();
323 typedef U (*Func1)(U);
324 typedef U (*Func2)(U, U);
325 typedef U (*Func3)(U, U, U);
326 typedef U (*Func4)(U, U, U, U);
327 typedef U (*Func5)(U, U, U, U, U);
328 typedef U (*Func6)(U, U, U, U, U, U);
329 typedef U (*Func7)(U, U, U, U, U, U, U);
330 typedef U (*Func8)(U, U, U, U, U, U, U, U);
331 typedef U (*Func9)(U, U, U, U, U, U, U, U, U);
332 typedef U (*Func10)(U, U, U, U, U, U, U, U, U, U);
333 typedef U (*Func11)(U, U, U, U, U, U, U, U, U, U, U);
334 typedef U (*Func12)(U, U, U, U, U, U, U, U, U, U, U, U);
335 typedef U (*Func13)(U, U, U, U, U, U, U, U, U, U, U, U, U);
336 typedef U (*Func14)(U, U, U, U, U, U, U, U, U, U, U, U, U, U);
337 typedef U (*Func15)(U, U, U, U, U, U, U, U, U, U, U, U, U, U, U);
338 typedef U (*Func16)(U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U);
339
340 unsigned int resultRet = 0;
341 unsigned int expectRet = 0;
342
343 switch (_argCount) {
344 case 0:
345 resultRet = ptr_as_func<Func0>(_func)();
346 expectRet = 0;
347 break;
348 case 1:
349 resultRet = ptr_as_func<Func1>(_func)(1);
350 expectRet = 1;
351 break;
352 case 2:
353 resultRet = ptr_as_func<Func2>(_func)(1, 2);
354 expectRet = 1 + 2;
355 break;
356 case 3:
357 resultRet = ptr_as_func<Func3>(_func)(1, 2, 3);
358 expectRet = 1 + 2 + 3;
359 break;
360 case 4:
361 resultRet = ptr_as_func<Func4>(_func)(1, 2, 3, 4);
362 expectRet = 1 + 2 + 3 + 4;
363 break;
364 case 5:
365 resultRet = ptr_as_func<Func5>(_func)(1, 2, 3, 4, 5);
366 expectRet = 1 + 2 + 3 + 4 + 5;
367 break;
368 case 6:
369 resultRet = ptr_as_func<Func6>(_func)(1, 2, 3, 4, 5, 6);
370 expectRet = 1 + 2 + 3 + 4 + 5 + 6;
371 break;
372 case 7:
373 resultRet = ptr_as_func<Func7>(_func)(1, 2, 3, 4, 5, 6, 7);
374 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7;
375 break;
376 case 8:
377 resultRet = ptr_as_func<Func8>(_func)(1, 2, 3, 4, 5, 6, 7, 8);
378 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8;
379 break;
380 case 9:
381 resultRet = ptr_as_func<Func9>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9);
382 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9;
383 break;
384 case 10:
385 resultRet = ptr_as_func<Func10>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
386 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10;
387 break;
388 case 11:
389 resultRet = ptr_as_func<Func11>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
390 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11;
391 break;
392 case 12:
393 resultRet = ptr_as_func<Func12>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
394 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12;
395 break;
396 case 13:
397 resultRet = ptr_as_func<Func13>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13);
398 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13;
399 break;
400 case 14:
401 resultRet = ptr_as_func<Func14>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14);
402 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14;
403 break;
404 case 15:
405 resultRet = ptr_as_func<Func15>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
406 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15;
407 break;
408 case 16:
409 resultRet = ptr_as_func<Func16>(_func)(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
410 expectRet = 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16;
411 break;
412 }
413
414 result.assignFormat("ret={%u, %u}", resultRet >> 28, resultRet & 0x0FFFFFFFu);
415 expect.assignFormat("ret={%u, %u}", expectRet >> 28, expectRet & 0x0FFFFFFFu);
416
417 return resultRet == expectRet;
418 }
419
420 uint32_t _argCount;
421 uint32_t _alignment;
422 bool _preserveFP;
423 };
424
425 // ============================================================================
426 // [X86Test_NoCode]
427 // ============================================================================
428
429 class X86Test_NoCode : public X86Test {
430 public:
431 X86Test_NoCode() : X86Test("NoCode") {}
432
433 static void add(X86TestApp& app) {
434 app.add(new X86Test_NoCode());
435 }
436
437 virtual void compile(x86::Compiler& cc) {
438 cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
439 cc.endFunc();
440 }
441
442 virtual bool run(void* _func, String& result, String& expect) {
443 ASMJIT_UNUSED(result);
444 ASMJIT_UNUSED(expect);
445
446 typedef void(*Func)(void);
447 Func func = ptr_as_func<Func>(_func);
448
449 func();
450 return true;
451 }
452 };
453
454 // ============================================================================
455 // [X86Test_AlignNone]
456 // ============================================================================
457
458 class X86Test_NoAlign : public X86Test {
459 public:
460 X86Test_NoAlign() : X86Test("NoAlign") {}
461
462 static void add(X86TestApp& app) {
463 app.add(new X86Test_NoAlign());
464 }
465
466 virtual void compile(x86::Compiler& cc) {
467 cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
468 cc.align(kAlignCode, 0);
469 cc.align(kAlignCode, 1);
470 cc.endFunc();
471 }
472
473 virtual bool run(void* _func, String& result, String& expect) {
474 ASMJIT_UNUSED(result);
475 ASMJIT_UNUSED(expect);
476
477 typedef void (*Func)(void);
478 Func func = ptr_as_func<Func>(_func);
479
480 func();
481 return true;
482 }
483 };
484
485 // ============================================================================
486 // [X86Test_JumpMerge]
487 // ============================================================================
488
489 class X86Test_JumpMerge : public X86Test {
490 public:
491 X86Test_JumpMerge() : X86Test("JumpMerge") {}
492
493 static void add(X86TestApp& app) {
494 app.add(new X86Test_JumpMerge());
495 }
496
497 virtual void compile(x86::Compiler& cc) {
498 cc.addFunc(FuncSignatureT<void, int*, int>(CallConv::kIdHost));
499
500 Label L0 = cc.newLabel();
501 Label L1 = cc.newLabel();
502 Label L2 = cc.newLabel();
503 Label LEnd = cc.newLabel();
504
505 x86::Gp dst = cc.newIntPtr("dst");
506 x86::Gp val = cc.newInt32("val");
507
508 cc.setArg(0, dst);
509 cc.setArg(1, val);
510
511 cc.cmp(val, 0);
512 cc.je(L2);
513
514 cc.cmp(val, 1);
515 cc.je(L1);
516
517 cc.cmp(val, 2);
518 cc.je(L0);
519
520 cc.mov(x86::dword_ptr(dst), val);
521 cc.jmp(LEnd);
522
523 // On purpose. This tests whether the CFG constructs a single basic-block
524 // from multiple labels next to each other.
525 cc.bind(L0);
526 cc.bind(L1);
527 cc.bind(L2);
528 cc.mov(x86::dword_ptr(dst), 0);
529
530 cc.bind(LEnd);
531 cc.endFunc();
532 }
533
534 virtual bool run(void* _func, String& result, String& expect) {
535 typedef void(*Func)(int*, int);
536 Func func = ptr_as_func<Func>(_func);
537
538 int arr[5] = { -1, -1, -1, -1, -1 };
539 int exp[5] = { 0, 0, 0, 3, 4 };
540
541 for (int i = 0; i < 5; i++)
542 func(&arr[i], i);
543
544 result.assignFormat("ret={%d, %d, %d, %d, %d}", arr[0], arr[1], arr[2], arr[3], arr[4]);
545 expect.assignFormat("ret={%d, %d, %d, %d, %d}", exp[0], exp[1], exp[2], exp[3], exp[4]);
546
547 return result == expect;
548 }
549 };
550
551 // ============================================================================
552 // [X86Test_JumpCross]
553 // ============================================================================
554
555 class X86Test_JumpCross : public X86Test {
556 public:
557 X86Test_JumpCross() : X86Test("JumpCross") {}
558
559 static void add(X86TestApp& app) {
560 app.add(new X86Test_JumpCross());
561 }
562
563 virtual void compile(x86::Compiler& cc) {
564 cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
565
566 Label L1 = cc.newLabel();
567 Label L2 = cc.newLabel();
568 Label L3 = cc.newLabel();
569
570 cc.jmp(L2);
571
572 cc.bind(L1);
573 cc.jmp(L3);
574
575 cc.bind(L2);
576 cc.jmp(L1);
577
578 cc.bind(L3);
579 cc.endFunc();
580 }
581
582 virtual bool run(void* _func, String& result, String& expect) {
583 ASMJIT_UNUSED(result);
584 ASMJIT_UNUSED(expect);
585
586 typedef void (*Func)(void);
587 Func func = ptr_as_func<Func>(_func);
588
589 func();
590 return true;
591 }
592 };
593
594 // ============================================================================
595 // [X86Test_JumpMany]
596 // ============================================================================
597
598 class X86Test_JumpMany : public X86Test {
599 public:
600 X86Test_JumpMany() : X86Test("JumpMany") {}
601
602 static void add(X86TestApp& app) {
603 app.add(new X86Test_JumpMany());
604 }
605
606 virtual void compile(x86::Compiler& cc) {
607 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
608 for (uint32_t i = 0; i < 1000; i++) {
609 Label L = cc.newLabel();
610 cc.jmp(L);
611 cc.bind(L);
612 }
613
614 x86::Gp ret = cc.newInt32("ret");
615 cc.xor_(ret, ret);
616 cc.ret(ret);
617 cc.endFunc();
618 }
619
620 virtual bool run(void* _func, String& result, String& expect) {
621 typedef int (*Func)(void);
622
623 Func func = ptr_as_func<Func>(_func);
624
625 int resultRet = func();
626 int expectRet = 0;
627
628 result.assignFormat("ret={%d}", resultRet);
629 expect.assignFormat("ret={%d}", expectRet);
630
631 return resultRet == expectRet;
632 }
633 };
634
635 // ============================================================================
636 // [X86Test_JumpUnreachable1]
637 // ============================================================================
638
639 class X86Test_JumpUnreachable1 : public X86Test {
640 public:
641 X86Test_JumpUnreachable1() : X86Test("JumpUnreachable1") {}
642
643 static void add(X86TestApp& app) {
644 app.add(new X86Test_JumpUnreachable1());
645 }
646
647 virtual void compile(x86::Compiler& cc) {
648 cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
649
650 Label L_1 = cc.newLabel();
651 Label L_2 = cc.newLabel();
652 Label L_3 = cc.newLabel();
653 Label L_4 = cc.newLabel();
654 Label L_5 = cc.newLabel();
655 Label L_6 = cc.newLabel();
656 Label L_7 = cc.newLabel();
657
658 x86::Gp v0 = cc.newUInt32("v0");
659 x86::Gp v1 = cc.newUInt32("v1");
660
661 cc.bind(L_2);
662 cc.bind(L_3);
663
664 cc.jmp(L_1);
665
666 cc.bind(L_5);
667 cc.mov(v0, 0);
668
669 cc.bind(L_6);
670 cc.jmp(L_3);
671 cc.mov(v1, 1);
672 cc.jmp(L_1);
673
674 cc.bind(L_4);
675 cc.jmp(L_2);
676 cc.bind(L_7);
677 cc.add(v0, v1);
678
679 cc.align(kAlignCode, 16);
680 cc.bind(L_1);
681 cc.ret();
682 cc.endFunc();
683 }
684
685 virtual bool run(void* _func, String& result, String& expect) {
686 typedef void (*Func)(void);
687 Func func = ptr_as_func<Func>(_func);
688
689 func();
690
691 result.appendString("ret={}");
692 expect.appendString("ret={}");
693
694 return true;
695 }
696 };
697
698 // ============================================================================
699 // [X86Test_JumpUnreachable2]
700 // ============================================================================
701
702 class X86Test_JumpUnreachable2 : public X86Test {
703 public:
704 X86Test_JumpUnreachable2() : X86Test("JumpUnreachable2") {}
705
706 static void add(X86TestApp& app) {
707 app.add(new X86Test_JumpUnreachable2());
708 }
709
710 virtual void compile(x86::Compiler& cc) {
711 cc.addFunc(FuncSignatureT<void>(CallConv::kIdHost));
712
713 Label L_1 = cc.newLabel();
714 Label L_2 = cc.newLabel();
715
716 x86::Gp v0 = cc.newUInt32("v0");
717 x86::Gp v1 = cc.newUInt32("v1");
718
719 cc.jmp(L_1);
720 cc.bind(L_2);
721 cc.mov(v0, 1);
722 cc.mov(v1, 2);
723 cc.cmp(v0, v1);
724 cc.jz(L_2);
725 cc.jmp(L_1);
726
727 cc.bind(L_1);
728 cc.ret();
729 cc.endFunc();
730 }
731
732 virtual bool run(void* _func, String& result, String& expect) {
733 typedef void (*Func)(void);
734 Func func = ptr_as_func<Func>(_func);
735
736 func();
737
738 result.appendString("ret={}");
739 expect.appendString("ret={}");
740
741 return true;
742 }
743 };
744
745 // ============================================================================
746 // [X86Test_AllocBase]
747 // ============================================================================
748
749 class X86Test_AllocBase : public X86Test {
750 public:
751 X86Test_AllocBase() : X86Test("AllocBase") {}
752
753 static void add(X86TestApp& app) {
754 app.add(new X86Test_AllocBase());
755 }
756
757 virtual void compile(x86::Compiler& cc) {
758 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
759
760 x86::Gp v0 = cc.newInt32("v0");
761 x86::Gp v1 = cc.newInt32("v1");
762 x86::Gp v2 = cc.newInt32("v2");
763 x86::Gp v3 = cc.newInt32("v3");
764 x86::Gp v4 = cc.newInt32("v4");
765
766 cc.xor_(v0, v0);
767
768 cc.mov(v1, 1);
769 cc.mov(v2, 2);
770 cc.mov(v3, 3);
771 cc.mov(v4, 4);
772
773 cc.add(v0, v1);
774 cc.add(v0, v2);
775 cc.add(v0, v3);
776 cc.add(v0, v4);
777
778 cc.ret(v0);
779 cc.endFunc();
780 }
781
782 virtual bool run(void* _func, String& result, String& expect) {
783 typedef int (*Func)(void);
784 Func func = ptr_as_func<Func>(_func);
785
786 int resultRet = func();
787 int expectRet = 1 + 2 + 3 + 4;
788
789 result.assignFormat("ret=%d", resultRet);
790 expect.assignFormat("ret=%d", expectRet);
791
792 return resultRet == expectRet;
793 }
794 };
795
796 // ============================================================================
797 // [X86Test_AllocMany1]
798 // ============================================================================
799
800 class X86Test_AllocMany1 : public X86Test {
801 public:
802 X86Test_AllocMany1() : X86Test("AllocMany1") {}
803
804 enum { kCount = 8 };
805
806 static void add(X86TestApp& app) {
807 app.add(new X86Test_AllocMany1());
808 }
809
810 virtual void compile(x86::Compiler& cc) {
811 cc.addFunc(FuncSignatureT<void, int*, int*>(CallConv::kIdHost));
812
813 x86::Gp a0 = cc.newIntPtr("a0");
814 x86::Gp a1 = cc.newIntPtr("a1");
815
816 cc.setArg(0, a0);
817 cc.setArg(1, a1);
818
819 // Create some variables.
820 x86::Gp t = cc.newInt32("t");
821 x86::Gp x[kCount];
822
823 uint32_t i;
824
825 // Setup variables (use mov with reg/imm to se if register allocator works).
826 for (i = 0; i < kCount; i++) x[i] = cc.newInt32("x%u", i);
827 for (i = 0; i < kCount; i++) cc.mov(x[i], int(i + 1));
828
829 // Make sum (addition).
830 cc.xor_(t, t);
831 for (i = 0; i < kCount; i++) cc.add(t, x[i]);
832
833 // Store result to a given pointer in first argument.
834 cc.mov(x86::dword_ptr(a0), t);
835
836 // Clear t.
837 cc.xor_(t, t);
838
839 // Make sum (subtraction).
840 for (i = 0; i < kCount; i++) cc.sub(t, x[i]);
841
842 // Store result to a given pointer in second argument.
843 cc.mov(x86::dword_ptr(a1), t);
844
845 // End of function.
846 cc.endFunc();
847 }
848
849 virtual bool run(void* _func, String& result, String& expect) {
850 typedef void (*Func)(int*, int*);
851 Func func = ptr_as_func<Func>(_func);
852
853 int resultX;
854 int resultY;
855
856 int expectX = 36;
857 int expectY = -36;
858
859 func(&resultX, &resultY);
860
861 result.assignFormat("ret={x=%d, y=%d}", resultX, resultY);
862 expect.assignFormat("ret={x=%d, y=%d}", expectX, expectY);
863
864 return resultX == expectX && resultY == expectY;
865 }
866 };
867
868 // ============================================================================
869 // [X86Test_AllocMany2]
870 // ============================================================================
871
872 class X86Test_AllocMany2 : public X86Test {
873 public:
874 X86Test_AllocMany2() : X86Test("AllocMany2") {}
875
876 static void add(X86TestApp& app) {
877 app.add(new X86Test_AllocMany2());
878 }
879
880 virtual void compile(x86::Compiler& cc) {
881 cc.addFunc(FuncSignatureT<void, uint32_t*>(CallConv::kIdHost));
882
883 x86::Gp a = cc.newIntPtr("a");
884 x86::Gp v[32];
885
886 uint32_t i;
887 cc.setArg(0, a);
888
889 for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) v[i] = cc.newInt32("v%d", i);
890 for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.xor_(v[i], v[i]);
891
892 x86::Gp x = cc.newInt32("x");
893 Label L = cc.newLabel();
894
895 cc.mov(x, 32);
896 cc.bind(L);
897 for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.add(v[i], i);
898
899 cc.dec(x);
900 cc.jnz(L);
901 for (i = 0; i < ASMJIT_ARRAY_SIZE(v); i++) cc.mov(x86::dword_ptr(a, int(i * 4)), v[i]);
902
903 cc.endFunc();
904 }
905
906 virtual bool run(void* _func, String& result, String& expect) {
907 typedef void (*Func)(uint32_t*);
908 Func func = ptr_as_func<Func>(_func);
909
910 uint32_t i;
911 uint32_t resultBuf[32];
912 uint32_t expectBuf[32];
913
914 for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++)
915 expectBuf[i] = i * 32;
916 func(resultBuf);
917
918 for (i = 0; i < ASMJIT_ARRAY_SIZE(resultBuf); i++) {
919 if (i != 0) {
920 result.appendChar(',');
921 expect.appendChar(',');
922 }
923
924 result.appendFormat("%u", resultBuf[i]);
925 expect.appendFormat("%u", expectBuf[i]);
926 }
927
928 return result == expect;
929 }
930 };
931
932 // ============================================================================
933 // [X86Test_AllocImul1]
934 // ============================================================================
935
936 class X86Test_AllocImul1 : public X86Test {
937 public:
938 X86Test_AllocImul1() : X86Test("AllocImul1") {}
939
940 static void add(X86TestApp& app) {
941 app.add(new X86Test_AllocImul1());
942 }
943
944 virtual void compile(x86::Compiler& cc) {
945 cc.addFunc(FuncSignatureT<void, int*, int*, int, int>(CallConv::kIdHost));
946
947 x86::Gp dstHi = cc.newIntPtr("dstHi");
948 x86::Gp dstLo = cc.newIntPtr("dstLo");
949
950 x86::Gp vHi = cc.newInt32("vHi");
951 x86::Gp vLo = cc.newInt32("vLo");
952 x86::Gp src = cc.newInt32("src");
953
954 cc.setArg(0, dstHi);
955 cc.setArg(1, dstLo);
956 cc.setArg(2, vLo);
957 cc.setArg(3, src);
958
959 cc.imul(vHi, vLo, src);
960
961 cc.mov(x86::dword_ptr(dstHi), vHi);
962 cc.mov(x86::dword_ptr(dstLo), vLo);
963 cc.endFunc();
964 }
965
966 virtual bool run(void* _func, String& result, String& expect) {
967 typedef void (*Func)(int*, int*, int, int);
968 Func func = ptr_as_func<Func>(_func);
969
970 int v0 = 4;
971 int v1 = 4;
972
973 int resultHi;
974 int resultLo;
975
976 int expectHi = 0;
977 int expectLo = v0 * v1;
978
979 func(&resultHi, &resultLo, v0, v1);
980
981 result.assignFormat("hi=%d, lo=%d", resultHi, resultLo);
982 expect.assignFormat("hi=%d, lo=%d", expectHi, expectLo);
983
984 return resultHi == expectHi && resultLo == expectLo;
985 }
986 };
987
988 // ============================================================================
989 // [X86Test_AllocImul2]
990 // ============================================================================
991
992 class X86Test_AllocImul2 : public X86Test {
993 public:
994 X86Test_AllocImul2() : X86Test("AllocImul2") {}
995
996 static void add(X86TestApp& app) {
997 app.add(new X86Test_AllocImul2());
998 }
999
1000 virtual void compile(x86::Compiler& cc) {
1001 cc.addFunc(FuncSignatureT<void, int*, const int*>(CallConv::kIdHost));
1002
1003 x86::Gp dst = cc.newIntPtr("dst");
1004 x86::Gp src = cc.newIntPtr("src");
1005
1006 cc.setArg(0, dst);
1007 cc.setArg(1, src);
1008
1009 for (unsigned int i = 0; i < 4; i++) {
1010 x86::Gp x = cc.newInt32("x");
1011 x86::Gp y = cc.newInt32("y");
1012 x86::Gp hi = cc.newInt32("hi");
1013
1014 cc.mov(x, x86::dword_ptr(src, 0));
1015 cc.mov(y, x86::dword_ptr(src, 4));
1016
1017 cc.imul(hi, x, y);
1018 cc.add(x86::dword_ptr(dst, 0), hi);
1019 cc.add(x86::dword_ptr(dst, 4), x);
1020 }
1021
1022 cc.endFunc();
1023 }
1024
1025 virtual bool run(void* _func, String& result, String& expect) {
1026 typedef void (*Func)(int*, const int*);
1027 Func func = ptr_as_func<Func>(_func);
1028
1029 int src[2] = { 4, 9 };
1030 int resultRet[2] = { 0, 0 };
1031 int expectRet[2] = { 0, (4 * 9) * 4 };
1032
1033 func(resultRet, src);
1034
1035 result.assignFormat("ret={%d, %d}", resultRet[0], resultRet[1]);
1036 expect.assignFormat("ret={%d, %d}", expectRet[0], expectRet[1]);
1037
1038 return resultRet[0] == expectRet[0] && resultRet[1] == expectRet[1];
1039 }
1040 };
1041
1042 // ============================================================================
1043 // [X86Test_AllocIdiv1]
1044 // ============================================================================
1045
1046 class X86Test_AllocIdiv1 : public X86Test {
1047 public:
1048 X86Test_AllocIdiv1() : X86Test("AllocIdiv1") {}
1049
1050 static void add(X86TestApp& app) {
1051 app.add(new X86Test_AllocIdiv1());
1052 }
1053
1054 virtual void compile(x86::Compiler& cc) {
1055 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
1056
1057 x86::Gp a = cc.newInt32("a");
1058 x86::Gp b = cc.newInt32("b");
1059 x86::Gp dummy = cc.newInt32("dummy");
1060
1061 cc.setArg(0, a);
1062 cc.setArg(1, b);
1063
1064 cc.xor_(dummy, dummy);
1065 cc.idiv(dummy, a, b);
1066
1067 cc.ret(a);
1068 cc.endFunc();
1069 }
1070
1071 virtual bool run(void* _func, String& result, String& expect) {
1072 typedef int (*Func)(int, int);
1073 Func func = ptr_as_func<Func>(_func);
1074
1075 int v0 = 2999;
1076 int v1 = 245;
1077
1078 int resultRet = func(v0, v1);
1079 int expectRet = 2999 / 245;
1080
1081 result.assignFormat("result=%d", resultRet);
1082 expect.assignFormat("result=%d", expectRet);
1083
1084 return resultRet == expectRet;
1085 }
1086 };
1087
1088 // ============================================================================
1089 // [X86Test_AllocSetz]
1090 // ============================================================================
1091
1092 class X86Test_AllocSetz : public X86Test {
1093 public:
1094 X86Test_AllocSetz() : X86Test("AllocSetz") {}
1095
1096 static void add(X86TestApp& app) {
1097 app.add(new X86Test_AllocSetz());
1098 }
1099
1100 virtual void compile(x86::Compiler& cc) {
1101 cc.addFunc(FuncSignatureT<void, int, int, char*>(CallConv::kIdHost));
1102
1103 x86::Gp src0 = cc.newInt32("src0");
1104 x86::Gp src1 = cc.newInt32("src1");
1105 x86::Gp dst0 = cc.newIntPtr("dst0");
1106
1107 cc.setArg(0, src0);
1108 cc.setArg(1, src1);
1109 cc.setArg(2, dst0);
1110
1111 cc.cmp(src0, src1);
1112 cc.setz(x86::byte_ptr(dst0));
1113
1114 cc.endFunc();
1115 }
1116
1117 virtual bool run(void* _func, String& result, String& expect) {
1118 typedef void (*Func)(int, int, char*);
1119 Func func = ptr_as_func<Func>(_func);
1120
1121 char resultBuf[4];
1122 char expectBuf[4] = { 1, 0, 0, 1 };
1123
1124 func(0, 0, &resultBuf[0]); // We are expecting 1 (0 == 0).
1125 func(0, 1, &resultBuf[1]); // We are expecting 0 (0 != 1).
1126 func(1, 0, &resultBuf[2]); // We are expecting 0 (1 != 0).
1127 func(1, 1, &resultBuf[3]); // We are expecting 1 (1 == 1).
1128
1129 result.assignFormat("out={%d, %d, %d, %d}", resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3]);
1130 expect.assignFormat("out={%d, %d, %d, %d}", expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3]);
1131
1132 return resultBuf[0] == expectBuf[0] &&
1133 resultBuf[1] == expectBuf[1] &&
1134 resultBuf[2] == expectBuf[2] &&
1135 resultBuf[3] == expectBuf[3] ;
1136 }
1137 };
1138
1139 // ============================================================================
1140 // [X86Test_AllocShlRor]
1141 // ============================================================================
1142
1143 class X86Test_AllocShlRor : public X86Test {
1144 public:
1145 X86Test_AllocShlRor() : X86Test("AllocShlRor") {}
1146
1147 static void add(X86TestApp& app) {
1148 app.add(new X86Test_AllocShlRor());
1149 }
1150
1151 virtual void compile(x86::Compiler& cc) {
1152 cc.addFunc(FuncSignatureT<void, int*, int, int, int>(CallConv::kIdHost));
1153
1154 x86::Gp dst = cc.newIntPtr("dst");
1155 x86::Gp var = cc.newInt32("var");
1156 x86::Gp vShlParam = cc.newInt32("vShlParam");
1157 x86::Gp vRorParam = cc.newInt32("vRorParam");
1158
1159 cc.setArg(0, dst);
1160 cc.setArg(1, var);
1161 cc.setArg(2, vShlParam);
1162 cc.setArg(3, vRorParam);
1163
1164 cc.shl(var, vShlParam);
1165 cc.ror(var, vRorParam);
1166
1167 cc.mov(x86::dword_ptr(dst), var);
1168 cc.endFunc();
1169 }
1170
1171 virtual bool run(void* _func, String& result, String& expect) {
1172 typedef void (*Func)(int*, int, int, int);
1173 Func func = ptr_as_func<Func>(_func);
1174
1175 int v0 = 0x000000FF;
1176
1177 int resultRet;
1178 int expectRet = 0x0000FF00;
1179
1180 func(&resultRet, v0, 16, 8);
1181
1182 result.assignFormat("ret=%d", resultRet);
1183 expect.assignFormat("ret=%d", expectRet);
1184
1185 return resultRet == expectRet;
1186 }
1187 };
1188
1189 // ============================================================================
1190 // [X86Test_AllocGpbLo]
1191 // ============================================================================
1192
1193 class X86Test_AllocGpbLo1 : public X86Test {
1194 public:
1195 X86Test_AllocGpbLo1() : X86Test("AllocGpbLo1") {}
1196
1197 enum { kCount = 32 };
1198
1199 static void add(X86TestApp& app) {
1200 app.add(new X86Test_AllocGpbLo1());
1201 }
1202
1203 virtual void compile(x86::Compiler& cc) {
1204 cc.addFunc(FuncSignatureT<uint32_t, uint32_t*>(CallConv::kIdHost));
1205
1206 x86::Gp rPtr = cc.newUIntPtr("rPtr");
1207 x86::Gp rSum = cc.newUInt32("rSum");
1208
1209 cc.setArg(0, rPtr);
1210
1211 x86::Gp x[kCount];
1212 uint32_t i;
1213
1214 for (i = 0; i < kCount; i++) {
1215 x[i] = cc.newUInt32("x%u", i);
1216 }
1217
1218 // Init pseudo-regs with values from our array.
1219 for (i = 0; i < kCount; i++) {
1220 cc.mov(x[i], x86::dword_ptr(rPtr, int(i * 4)));
1221 }
1222
1223 for (i = 2; i < kCount; i++) {
1224 // Add and truncate to 8 bit; no purpose, just mess with jit.
1225 cc.add (x[i ], x[i-1]);
1226 cc.movzx(x[i ], x[i ].r8());
1227 cc.movzx(x[i-2], x[i-1].r8());
1228 cc.movzx(x[i-1], x[i-2].r8());
1229 }
1230
1231 // Sum up all computed values.
1232 cc.mov(rSum, 0);
1233 for (i = 0; i < kCount; i++) {
1234 cc.add(rSum, x[i]);
1235 }
1236
1237 // Return the sum.
1238 cc.ret(rSum);
1239 cc.endFunc();
1240 }
1241
1242 virtual bool run(void* _func, String& result, String& expect) {
1243 typedef uint32_t (*Func)(uint32_t*);
1244 Func func = ptr_as_func<Func>(_func);
1245
1246 uint32_t i;
1247 uint32_t buf[kCount];
1248 uint32_t resultRet;
1249 uint32_t expectRet;
1250
1251 expectRet = 0;
1252 for (i = 0; i < kCount; i++) {
1253 buf[i] = 1;
1254 }
1255
1256 for (i = 2; i < kCount; i++) {
1257 buf[i ]+= buf[i-1];
1258 buf[i ] = buf[i ] & 0xFF;
1259 buf[i-2] = buf[i-1] & 0xFF;
1260 buf[i-1] = buf[i-2] & 0xFF;
1261 }
1262
1263 for (i = 0; i < kCount; i++) {
1264 expectRet += buf[i];
1265 }
1266
1267 for (i = 0; i < kCount; i++) {
1268 buf[i] = 1;
1269 }
1270 resultRet = func(buf);
1271
1272 result.assignFormat("ret=%d", resultRet);
1273 expect.assignFormat("ret=%d", expectRet);
1274
1275 return resultRet == expectRet;
1276 }
1277 };
1278
1279 // ============================================================================
1280 // [X86Test_AllocGpbLo2]
1281 // ============================================================================
1282
1283 class X86Test_AllocGpbLo2 : public X86Test {
1284 public:
1285 X86Test_AllocGpbLo2() : X86Test("AllocGpbLo2") {}
1286
1287 static void add(X86TestApp& app) {
1288 app.add(new X86Test_AllocGpbLo2());
1289 }
1290
1291 virtual void compile(x86::Compiler& cc) {
1292 cc.addFunc(FuncSignatureT<uint32_t, uint32_t>(CallConv::kIdHost));
1293
1294 x86::Gp v = cc.newUInt32("v");
1295 cc.setArg(0, v);
1296 cc.mov(v.r8(), 0xFF);
1297 cc.ret(v);
1298 cc.endFunc();
1299 }
1300
1301 virtual bool run(void* _func, String& result, String& expect) {
1302 typedef uint32_t (*Func)(uint32_t);
1303 Func func = ptr_as_func<Func>(_func);
1304
1305 uint32_t resultRet = func(0x12345678u);
1306 uint32_t expectRet = 0x123456FFu;
1307
1308 result.assignFormat("ret=%d", resultRet);
1309 expect.assignFormat("ret=%d", expectRet);
1310
1311 return resultRet == expectRet;
1312 }
1313 };
1314
1315 // ============================================================================
1316 // [X86Test_AllocRepMovsb]
1317 // ============================================================================
1318
1319 class X86Test_AllocRepMovsb : public X86Test {
1320 public:
1321 X86Test_AllocRepMovsb() : X86Test("AllocRepMovsb") {}
1322
1323 static void add(X86TestApp& app) {
1324 app.add(new X86Test_AllocRepMovsb());
1325 }
1326
1327 virtual void compile(x86::Compiler& cc) {
1328 cc.addFunc(FuncSignatureT<void, void*, void*, size_t>(CallConv::kIdHost));
1329
1330 x86::Gp dst = cc.newIntPtr("dst");
1331 x86::Gp src = cc.newIntPtr("src");
1332 x86::Gp cnt = cc.newIntPtr("cnt");
1333
1334 cc.setArg(0, dst);
1335 cc.setArg(1, src);
1336 cc.setArg(2, cnt);
1337
1338 cc.rep(cnt).movs(x86::byte_ptr(dst), x86::byte_ptr(src));
1339 cc.endFunc();
1340 }
1341
1342 virtual bool run(void* _func, String& result, String& expect) {
1343 typedef void (*Func)(void*, void*, size_t);
1344 Func func = ptr_as_func<Func>(_func);
1345
1346 char dst[20] = { 0 };
1347 char src[20] = "Hello AsmJit!";
1348 func(dst, src, strlen(src) + 1);
1349
1350 result.assignFormat("ret=\"%s\"", dst);
1351 expect.assignFormat("ret=\"%s\"", src);
1352
1353 return result == expect;
1354 }
1355 };
1356
1357 // ============================================================================
1358 // [X86Test_AllocIfElse1]
1359 // ============================================================================
1360
1361 class X86Test_AllocIfElse1 : public X86Test {
1362 public:
1363 X86Test_AllocIfElse1() : X86Test("AllocIfElse1") {}
1364
1365 static void add(X86TestApp& app) {
1366 app.add(new X86Test_AllocIfElse1());
1367 }
1368
1369 virtual void compile(x86::Compiler& cc) {
1370 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
1371
1372 x86::Gp v1 = cc.newInt32("v1");
1373 x86::Gp v2 = cc.newInt32("v2");
1374
1375 Label L_1 = cc.newLabel();
1376 Label L_2 = cc.newLabel();
1377
1378 cc.setArg(0, v1);
1379 cc.setArg(1, v2);
1380
1381 cc.cmp(v1, v2);
1382 cc.jg(L_1);
1383
1384 cc.mov(v1, 1);
1385 cc.jmp(L_2);
1386
1387 cc.bind(L_1);
1388 cc.mov(v1, 2);
1389
1390 cc.bind(L_2);
1391 cc.ret(v1);
1392 cc.endFunc();
1393 }
1394
1395 virtual bool run(void* _func, String& result, String& expect) {
1396 typedef int (*Func)(int, int);
1397 Func func = ptr_as_func<Func>(_func);
1398
1399 int a = func(0, 1);
1400 int b = func(1, 0);
1401
1402 result.appendFormat("ret={%d, %d}", a, b);
1403 expect.appendFormat("ret={%d, %d}", 1, 2);
1404
1405 return result == expect;
1406 }
1407 };
1408
1409 // ============================================================================
1410 // [X86Test_AllocIfElse2]
1411 // ============================================================================
1412
1413 class X86Test_AllocIfElse2 : public X86Test {
1414 public:
1415 X86Test_AllocIfElse2() : X86Test("AllocIfElse2") {}
1416
1417 static void add(X86TestApp& app) {
1418 app.add(new X86Test_AllocIfElse2());
1419 }
1420
1421 virtual void compile(x86::Compiler& cc) {
1422 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
1423
1424 x86::Gp v1 = cc.newInt32("v1");
1425 x86::Gp v2 = cc.newInt32("v2");
1426
1427 Label L_1 = cc.newLabel();
1428 Label L_2 = cc.newLabel();
1429 Label L_3 = cc.newLabel();
1430 Label L_4 = cc.newLabel();
1431
1432 cc.setArg(0, v1);
1433 cc.setArg(1, v2);
1434
1435 cc.jmp(L_1);
1436 cc.bind(L_2);
1437 cc.jmp(L_4);
1438 cc.bind(L_1);
1439
1440 cc.cmp(v1, v2);
1441 cc.jg(L_3);
1442
1443 cc.mov(v1, 1);
1444 cc.jmp(L_2);
1445
1446 cc.bind(L_3);
1447 cc.mov(v1, 2);
1448 cc.jmp(L_2);
1449
1450 cc.bind(L_4);
1451
1452 cc.ret(v1);
1453 cc.endFunc();
1454 }
1455
1456 virtual bool run(void* _func, String& result, String& expect) {
1457 typedef int (*Func)(int, int);
1458 Func func = ptr_as_func<Func>(_func);
1459
1460 int a = func(0, 1);
1461 int b = func(1, 0);
1462
1463 result.appendFormat("ret={%d, %d}", a, b);
1464 expect.appendFormat("ret={%d, %d}", 1, 2);
1465
1466 return result == expect;
1467 }
1468 };
1469
1470 // ============================================================================
1471 // [X86Test_AllocIfElse3]
1472 // ============================================================================
1473
1474 class X86Test_AllocIfElse3 : public X86Test {
1475 public:
1476 X86Test_AllocIfElse3() : X86Test("AllocIfElse3") {}
1477
1478 static void add(X86TestApp& app) {
1479 app.add(new X86Test_AllocIfElse3());
1480 }
1481
1482 virtual void compile(x86::Compiler& cc) {
1483 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
1484
1485 x86::Gp v1 = cc.newInt32("v1");
1486 x86::Gp v2 = cc.newInt32("v2");
1487 x86::Gp counter = cc.newInt32("counter");
1488
1489 Label L_1 = cc.newLabel();
1490 Label L_Loop = cc.newLabel();
1491 Label L_Exit = cc.newLabel();
1492
1493 cc.setArg(0, v1);
1494 cc.setArg(1, v2);
1495
1496 cc.cmp(v1, v2);
1497 cc.jg(L_1);
1498
1499 cc.mov(counter, 0);
1500
1501 cc.bind(L_Loop);
1502 cc.mov(v1, counter);
1503
1504 cc.inc(counter);
1505 cc.cmp(counter, 1);
1506 cc.jle(L_Loop);
1507 cc.jmp(L_Exit);
1508
1509 cc.bind(L_1);
1510 cc.mov(v1, 2);
1511
1512 cc.bind(L_Exit);
1513 cc.ret(v1);
1514 cc.endFunc();
1515 }
1516
1517 virtual bool run(void* _func, String& result, String& expect) {
1518 typedef int (*Func)(int, int);
1519 Func func = ptr_as_func<Func>(_func);
1520
1521 int a = func(0, 1);
1522 int b = func(1, 0);
1523
1524 result.appendFormat("ret={%d, %d}", a, b);
1525 expect.appendFormat("ret={%d, %d}", 1, 2);
1526
1527 return result == expect;
1528 }
1529 };
1530
1531 // ============================================================================
1532 // [X86Test_AllocIfElse4]
1533 // ============================================================================
1534
1535 class X86Test_AllocIfElse4 : public X86Test {
1536 public:
1537 X86Test_AllocIfElse4() : X86Test("AllocIfElse4") {}
1538
1539 static void add(X86TestApp& app) {
1540 app.add(new X86Test_AllocIfElse4());
1541 }
1542
1543 virtual void compile(x86::Compiler& cc) {
1544 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
1545
1546 x86::Gp v1 = cc.newInt32("v1");
1547 x86::Gp v2 = cc.newInt32("v2");
1548 x86::Gp counter = cc.newInt32("counter");
1549
1550 Label L_1 = cc.newLabel();
1551 Label L_Loop1 = cc.newLabel();
1552 Label L_Loop2 = cc.newLabel();
1553 Label L_Exit = cc.newLabel();
1554
1555 cc.mov(counter, 0);
1556
1557 cc.setArg(0, v1);
1558 cc.setArg(1, v2);
1559
1560 cc.cmp(v1, v2);
1561 cc.jg(L_1);
1562
1563 cc.bind(L_Loop1);
1564 cc.mov(v1, counter);
1565
1566 cc.inc(counter);
1567 cc.cmp(counter, 1);
1568 cc.jle(L_Loop1);
1569 cc.jmp(L_Exit);
1570
1571 cc.bind(L_1);
1572 cc.bind(L_Loop2);
1573 cc.mov(v1, counter);
1574 cc.inc(counter);
1575 cc.cmp(counter, 2);
1576 cc.jle(L_Loop2);
1577
1578 cc.bind(L_Exit);
1579 cc.ret(v1);
1580 cc.endFunc();
1581 }
1582
1583 virtual bool run(void* _func, String& result, String& expect) {
1584 typedef int (*Func)(int, int);
1585 Func func = ptr_as_func<Func>(_func);
1586
1587 int a = func(0, 1);
1588 int b = func(1, 0);
1589
1590 result.appendFormat("ret={%d, %d}", a, b);
1591 expect.appendFormat("ret={%d, %d}", 1, 2);
1592
1593 return result == expect;
1594 }
1595 };
1596
1597 // ============================================================================
1598 // [X86Test_AllocInt8]
1599 // ============================================================================
1600
1601 class X86Test_AllocInt8 : public X86Test {
1602 public:
1603 X86Test_AllocInt8() : X86Test("AllocInt8") {}
1604
1605 static void add(X86TestApp& app) {
1606 app.add(new X86Test_AllocInt8());
1607 }
1608
1609 virtual void compile(x86::Compiler& cc) {
1610 x86::Gp x = cc.newInt8("x");
1611 x86::Gp y = cc.newInt32("y");
1612
1613 cc.addFunc(FuncSignatureT<int, char>(CallConv::kIdHost));
1614 cc.setArg(0, x);
1615
1616 cc.movsx(y, x);
1617
1618 cc.ret(y);
1619 cc.endFunc();
1620 }
1621
1622 virtual bool run(void* _func, String& result, String& expect) {
1623 typedef int (*Func)(char);
1624 Func func = ptr_as_func<Func>(_func);
1625
1626 int resultRet = func(-13);
1627 int expectRet = -13;
1628
1629 result.assignFormat("ret=%d", resultRet);
1630 expect.assignFormat("ret=%d", expectRet);
1631
1632 return result == expect;
1633 }
1634 };
1635
1636 // ============================================================================
1637 // [X86Test_AllocUnhandledArg]
1638 // ============================================================================
1639
1640 class X86Test_AllocUnhandledArg : public X86Test {
1641 public:
1642 X86Test_AllocUnhandledArg() : X86Test("AllocUnhandledArg") {}
1643
1644 static void add(X86TestApp& app) {
1645 app.add(new X86Test_AllocUnhandledArg());
1646 }
1647
1648 virtual void compile(x86::Compiler& cc) {
1649 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
1650
1651 x86::Gp x = cc.newInt32("x");
1652 cc.setArg(2, x);
1653 cc.ret(x);
1654
1655 cc.endFunc();
1656 }
1657
1658 virtual bool run(void* _func, String& result, String& expect) {
1659 typedef int (*Func)(int, int, int);
1660 Func func = ptr_as_func<Func>(_func);
1661
1662 int resultRet = func(42, 155, 199);
1663 int expectRet = 199;
1664
1665 result.assignFormat("ret={%d}", resultRet);
1666 expect.assignFormat("ret={%d}", expectRet);
1667
1668 return result == expect;
1669 }
1670 };
1671
1672 // ============================================================================
1673 // [X86Test_AllocArgsIntPtr]
1674 // ============================================================================
1675
1676 class X86Test_AllocArgsIntPtr : public X86Test {
1677 public:
1678 X86Test_AllocArgsIntPtr() : X86Test("AllocArgsIntPtr") {}
1679
1680 static void add(X86TestApp& app) {
1681 app.add(new X86Test_AllocArgsIntPtr());
1682 }
1683
1684 virtual void compile(x86::Compiler& cc) {
1685 cc.addFunc(FuncSignatureT<void, void*, void*, void*, void*, void*, void*, void*, void*>(CallConv::kIdHost));
1686
1687 uint32_t i;
1688 x86::Gp var[8];
1689
1690 for (i = 0; i < 8; i++) {
1691 var[i] = cc.newIntPtr("var%u", i);
1692 cc.setArg(i, var[i]);
1693 }
1694
1695 for (i = 0; i < 8; i++) {
1696 cc.add(var[i], int(i + 1));
1697 }
1698
1699 // Move some data into buffer provided by arguments so we can verify if it
1700 // really works without looking into assembler output.
1701 for (i = 0; i < 8; i++) {
1702 cc.add(x86::byte_ptr(var[i]), int(i + 1));
1703 }
1704
1705 cc.endFunc();
1706 }
1707
1708 virtual bool run(void* _func, String& result, String& expect) {
1709 typedef void (*Func)(void*, void*, void*, void*, void*, void*, void*, void*);
1710 Func func = ptr_as_func<Func>(_func);
1711
1712 uint8_t resultBuf[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1713 uint8_t expectBuf[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
1714
1715 func(resultBuf, resultBuf, resultBuf, resultBuf,
1716 resultBuf, resultBuf, resultBuf, resultBuf);
1717
1718 result.assignFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}",
1719 resultBuf[0], resultBuf[1], resultBuf[2], resultBuf[3],
1720 resultBuf[4], resultBuf[5], resultBuf[6], resultBuf[7],
1721 resultBuf[8]);
1722 expect.assignFormat("buf={%d, %d, %d, %d, %d, %d, %d, %d, %d}",
1723 expectBuf[0], expectBuf[1], expectBuf[2], expectBuf[3],
1724 expectBuf[4], expectBuf[5], expectBuf[6], expectBuf[7],
1725 expectBuf[8]);
1726
1727 return result == expect;
1728 }
1729 };
1730
1731 // ============================================================================
1732 // [X86Test_AllocArgsFloat]
1733 // ============================================================================
1734
1735 class X86Test_AllocArgsFloat : public X86Test {
1736 public:
1737 X86Test_AllocArgsFloat() : X86Test("AllocArgsFloat") {}
1738
1739 static void add(X86TestApp& app) {
1740 app.add(new X86Test_AllocArgsFloat());
1741 }
1742
1743 virtual void compile(x86::Compiler& cc) {
1744 cc.addFunc(FuncSignatureT<void, float, float, float, float, float, float, float, void*>(CallConv::kIdHost));
1745
1746 uint32_t i;
1747
1748 x86::Gp p = cc.newIntPtr("p");
1749 x86::Xmm xv[7];
1750
1751 for (i = 0; i < 7; i++) {
1752 xv[i] = cc.newXmmSs("xv%u", i);
1753 cc.setArg(i, xv[i]);
1754 }
1755
1756 cc.setArg(7, p);
1757
1758 cc.addss(xv[0], xv[1]);
1759 cc.addss(xv[0], xv[2]);
1760 cc.addss(xv[0], xv[3]);
1761 cc.addss(xv[0], xv[4]);
1762 cc.addss(xv[0], xv[5]);
1763 cc.addss(xv[0], xv[6]);
1764
1765 cc.movss(x86::ptr(p), xv[0]);
1766 cc.endFunc();
1767 }
1768
1769 virtual bool run(void* _func, String& result, String& expect) {
1770 typedef void (*Func)(float, float, float, float, float, float, float, float*);
1771 Func func = ptr_as_func<Func>(_func);
1772
1773 float resultRet;
1774 float expectRet = 1.0f + 2.0f + 3.0f + 4.0f + 5.0f + 6.0f + 7.0f;
1775
1776 func(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, &resultRet);
1777
1778 result.assignFormat("ret={%g}", resultRet);
1779 expect.assignFormat("ret={%g}", expectRet);
1780
1781 return resultRet == expectRet;
1782 }
1783 };
1784
1785 // ============================================================================
1786 // [X86Test_AllocArgsDouble]
1787 // ============================================================================
1788
1789 class X86Test_AllocArgsDouble : public X86Test {
1790 public:
1791 X86Test_AllocArgsDouble() : X86Test("AllocArgsDouble") {}
1792
1793 static void add(X86TestApp& app) {
1794 app.add(new X86Test_AllocArgsDouble());
1795 }
1796
1797 virtual void compile(x86::Compiler& cc) {
1798 cc.addFunc(FuncSignatureT<void, double, double, double, double, double, double, double, void*>(CallConv::kIdHost));
1799
1800 uint32_t i;
1801
1802 x86::Gp p = cc.newIntPtr("p");
1803 x86::Xmm xv[7];
1804
1805 for (i = 0; i < 7; i++) {
1806 xv[i] = cc.newXmmSd("xv%u", i);
1807 cc.setArg(i, xv[i]);
1808 }
1809
1810 cc.setArg(7, p);
1811
1812 cc.addsd(xv[0], xv[1]);
1813 cc.addsd(xv[0], xv[2]);
1814 cc.addsd(xv[0], xv[3]);
1815 cc.addsd(xv[0], xv[4]);
1816 cc.addsd(xv[0], xv[5]);
1817 cc.addsd(xv[0], xv[6]);
1818
1819 cc.movsd(x86::ptr(p), xv[0]);
1820 cc.endFunc();
1821 }
1822
1823 virtual bool run(void* _func, String& result, String& expect) {
1824 typedef void (*Func)(double, double, double, double, double, double, double, double*);
1825 Func func = ptr_as_func<Func>(_func);
1826
1827 double resultRet;
1828 double expectRet = 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 + 7.0;
1829
1830 func(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, &resultRet);
1831
1832 result.assignFormat("ret={%g}", resultRet);
1833 expect.assignFormat("ret={%g}", expectRet);
1834
1835 return resultRet == expectRet;
1836 }
1837 };
1838
1839 // ============================================================================
1840 // [X86Test_AllocRetFloat1]
1841 // ============================================================================
1842
1843 class X86Test_AllocRetFloat1 : public X86Test {
1844 public:
1845 X86Test_AllocRetFloat1() : X86Test("AllocRetFloat1") {}
1846
1847 static void add(X86TestApp& app) {
1848 app.add(new X86Test_AllocRetFloat1());
1849 }
1850
1851 virtual void compile(x86::Compiler& cc) {
1852 cc.addFunc(FuncSignatureT<float, float>(CallConv::kIdHost));
1853
1854 x86::Xmm x = cc.newXmmSs("x");
1855 cc.setArg(0, x);
1856 cc.ret(x);
1857
1858 cc.endFunc();
1859 }
1860
1861 virtual bool run(void* _func, String& result, String& expect) {
1862 typedef float (*Func)(float);
1863 Func func = ptr_as_func<Func>(_func);
1864
1865 float resultRet = func(42.0f);
1866 float expectRet = 42.0f;
1867
1868 result.assignFormat("ret={%g}", resultRet);
1869 expect.assignFormat("ret={%g}", expectRet);
1870
1871 return resultRet == expectRet;
1872 }
1873 };
1874
1875 // ============================================================================
1876 // [X86Test_AllocRetFloat2]
1877 // ============================================================================
1878
1879 class X86Test_AllocRetFloat2 : public X86Test {
1880 public:
1881 X86Test_AllocRetFloat2() : X86Test("AllocRetFloat2") {}
1882
1883 static void add(X86TestApp& app) {
1884 app.add(new X86Test_AllocRetFloat2());
1885 }
1886
1887 virtual void compile(x86::Compiler& cc) {
1888 cc.addFunc(FuncSignatureT<float, float, float>(CallConv::kIdHost));
1889
1890 x86::Xmm x = cc.newXmmSs("x");
1891 x86::Xmm y = cc.newXmmSs("y");
1892
1893 cc.setArg(0, x);
1894 cc.setArg(1, y);
1895
1896 cc.addss(x, y);
1897 cc.ret(x);
1898
1899 cc.endFunc();
1900 }
1901
1902 virtual bool run(void* _func, String& result, String& expect) {
1903 typedef float (*Func)(float, float);
1904 Func func = ptr_as_func<Func>(_func);
1905
1906 float resultRet = func(1.0f, 2.0f);
1907 float expectRet = 1.0f + 2.0f;
1908
1909 result.assignFormat("ret={%g}", resultRet);
1910 expect.assignFormat("ret={%g}", expectRet);
1911
1912 return resultRet == expectRet;
1913 }
1914 };
1915
1916 // ============================================================================
1917 // [X86Test_AllocRetDouble1]
1918 // ============================================================================
1919
1920 class X86Test_AllocRetDouble1 : public X86Test {
1921 public:
1922 X86Test_AllocRetDouble1() : X86Test("AllocRetDouble1") {}
1923
1924 static void add(X86TestApp& app) {
1925 app.add(new X86Test_AllocRetDouble1());
1926 }
1927
1928 virtual void compile(x86::Compiler& cc) {
1929 cc.addFunc(FuncSignatureT<double, double>(CallConv::kIdHost));
1930
1931 x86::Xmm x = cc.newXmmSd("x");
1932 cc.setArg(0, x);
1933 cc.ret(x);
1934
1935 cc.endFunc();
1936 }
1937
1938 virtual bool run(void* _func, String& result, String& expect) {
1939 typedef double (*Func)(double);
1940 Func func = ptr_as_func<Func>(_func);
1941
1942 double resultRet = func(42.0);
1943 double expectRet = 42.0;
1944
1945 result.assignFormat("ret={%g}", resultRet);
1946 expect.assignFormat("ret={%g}", expectRet);
1947
1948 return resultRet == expectRet;
1949 }
1950 };
1951 // ============================================================================
1952 // [X86Test_AllocRetDouble2]
1953 // ============================================================================
1954
1955 class X86Test_AllocRetDouble2 : public X86Test {
1956 public:
1957 X86Test_AllocRetDouble2() : X86Test("AllocRetDouble2") {}
1958
1959 static void add(X86TestApp& app) {
1960 app.add(new X86Test_AllocRetDouble2());
1961 }
1962
1963 virtual void compile(x86::Compiler& cc) {
1964 cc.addFunc(FuncSignatureT<double, double, double>(CallConv::kIdHost));
1965
1966 x86::Xmm x = cc.newXmmSd("x");
1967 x86::Xmm y = cc.newXmmSd("y");
1968
1969 cc.setArg(0, x);
1970 cc.setArg(1, y);
1971
1972 cc.addsd(x, y);
1973 cc.ret(x);
1974
1975 cc.endFunc();
1976 }
1977
1978 virtual bool run(void* _func, String& result, String& expect) {
1979 typedef double (*Func)(double, double);
1980 Func func = ptr_as_func<Func>(_func);
1981
1982 double resultRet = func(1.0, 2.0);
1983 double expectRet = 1.0 + 2.0;
1984
1985 result.assignFormat("ret={%g}", resultRet);
1986 expect.assignFormat("ret={%g}", expectRet);
1987
1988 return resultRet == expectRet;
1989 }
1990 };
1991
1992 // ============================================================================
1993 // [X86Test_AllocStack]
1994 // ============================================================================
1995
1996 class X86Test_AllocStack : public X86Test {
1997 public:
1998 X86Test_AllocStack() : X86Test("AllocStack") {}
1999
2000 enum { kSize = 256 };
2001
2002 static void add(X86TestApp& app) {
2003 app.add(new X86Test_AllocStack());
2004 }
2005
2006 virtual void compile(x86::Compiler& cc) {
2007 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2008
2009 x86::Mem stack = cc.newStack(kSize, 1);
2010 stack.setSize(1);
2011
2012 x86::Gp i = cc.newIntPtr("i");
2013 x86::Gp a = cc.newInt32("a");
2014 x86::Gp b = cc.newInt32("b");
2015
2016 Label L_1 = cc.newLabel();
2017 Label L_2 = cc.newLabel();
2018
2019 // Fill stack by sequence [0, 1, 2, 3 ... 255].
2020 cc.xor_(i, i);
2021
2022 x86::Mem stackWithIndex = stack.clone();
2023 stackWithIndex.setIndex(i, 0);
2024
2025 cc.bind(L_1);
2026 cc.mov(stackWithIndex, i.r8());
2027 cc.inc(i);
2028 cc.cmp(i, 255);
2029 cc.jle(L_1);
2030
2031 // Sum sequence in stack.
2032 cc.xor_(i, i);
2033 cc.xor_(a, a);
2034
2035 cc.bind(L_2);
2036 cc.movzx(b, stackWithIndex);
2037 cc.add(a, b);
2038 cc.inc(i);
2039 cc.cmp(i, 255);
2040 cc.jle(L_2);
2041
2042 cc.ret(a);
2043 cc.endFunc();
2044 }
2045
2046 virtual bool run(void* _func, String& result, String& expect) {
2047 typedef int (*Func)(void);
2048 Func func = ptr_as_func<Func>(_func);
2049
2050 int resultRet = func();
2051 int expectRet = 32640;
2052
2053 result.assignInt(resultRet);
2054 expect.assignInt(expectRet);
2055
2056 return resultRet == expectRet;
2057 }
2058 };
2059
2060 // ============================================================================
2061 // [X86Test_AllocMemcpy]
2062 // ============================================================================
2063
2064 class X86Test_AllocMemcpy : public X86Test {
2065 public:
2066 X86Test_AllocMemcpy() : X86Test("AllocMemcpy") {}
2067
2068 enum { kCount = 32 };
2069
2070 static void add(X86TestApp& app) {
2071 app.add(new X86Test_AllocMemcpy());
2072 }
2073
2074 virtual void compile(x86::Compiler& cc) {
2075 x86::Gp dst = cc.newIntPtr("dst");
2076 x86::Gp src = cc.newIntPtr("src");
2077 x86::Gp cnt = cc.newUIntPtr("cnt");
2078
2079 Label L_Loop = cc.newLabel(); // Create base labels we use
2080 Label L_Exit = cc.newLabel(); // in our function.
2081
2082 cc.addFunc(FuncSignatureT<void, uint32_t*, const uint32_t*, size_t>(CallConv::kIdHost));
2083 cc.setArg(0, dst);
2084 cc.setArg(1, src);
2085 cc.setArg(2, cnt);
2086
2087 cc.test(cnt, cnt); // Exit if the size is zero.
2088 cc.jz(L_Exit);
2089
2090 cc.bind(L_Loop); // Bind the loop label here.
2091
2092 x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
2093 cc.mov(tmp, x86::dword_ptr(src));
2094 cc.mov(x86::dword_ptr(dst), tmp);
2095
2096 cc.add(src, 4); // Increment dst/src pointers.
2097 cc.add(dst, 4);
2098
2099 cc.dec(cnt); // Loop until cnt isn't zero.
2100 cc.jnz(L_Loop);
2101
2102 cc.bind(L_Exit); // Bind the exit label here.
2103 cc.endFunc(); // End of function.
2104 }
2105
2106 virtual bool run(void* _func, String& result, String& expect) {
2107 typedef void (*Func)(uint32_t*, const uint32_t*, size_t);
2108 Func func = ptr_as_func<Func>(_func);
2109
2110 uint32_t i;
2111
2112 uint32_t dstBuffer[kCount];
2113 uint32_t srcBuffer[kCount];
2114
2115 for (i = 0; i < kCount; i++) {
2116 dstBuffer[i] = 0;
2117 srcBuffer[i] = i;
2118 }
2119
2120 func(dstBuffer, srcBuffer, kCount);
2121
2122 result.assignString("buf={");
2123 expect.assignString("buf={");
2124
2125 for (i = 0; i < kCount; i++) {
2126 if (i != 0) {
2127 result.appendString(", ");
2128 expect.appendString(", ");
2129 }
2130
2131 result.appendFormat("%u", unsigned(dstBuffer[i]));
2132 expect.appendFormat("%u", unsigned(srcBuffer[i]));
2133 }
2134
2135 result.appendString("}");
2136 expect.appendString("}");
2137
2138 return result == expect;
2139 }
2140 };
2141
2142 // ============================================================================
2143 // [X86Test_AllocExtraBlock]
2144 // ============================================================================
2145
2146 class X86Test_AllocExtraBlock : public X86Test {
2147 public:
2148 X86Test_AllocExtraBlock() : X86Test("AllocExtraBlock") {}
2149
2150 static void add(X86TestApp& app) {
2151 app.add(new X86Test_AllocExtraBlock());
2152 }
2153
2154 virtual void compile(x86::Compiler& cc) {
2155 x86::Gp cond = cc.newInt32("cond");
2156 x86::Gp ret = cc.newInt32("ret");
2157 x86::Gp a = cc.newInt32("a");
2158 x86::Gp b = cc.newInt32("b");
2159
2160 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
2161 cc.setArg(0, cond);
2162 cc.setArg(1, a);
2163 cc.setArg(2, b);
2164
2165 Label L_Ret = cc.newLabel();
2166 Label L_Extra = cc.newLabel();
2167
2168 cc.test(cond, cond);
2169 cc.jnz(L_Extra);
2170
2171 cc.mov(ret, a);
2172 cc.add(ret, b);
2173
2174 cc.bind(L_Ret);
2175 cc.ret(ret);
2176
2177 // Emit code sequence at the end of the function.
2178 BaseNode* prevCursor = cc.setCursor(cc.func()->endNode()->prev());
2179 cc.bind(L_Extra);
2180 cc.mov(ret, a);
2181 cc.sub(ret, b);
2182 cc.jmp(L_Ret);
2183 cc.setCursor(prevCursor);
2184
2185 cc.endFunc();
2186 }
2187
2188 virtual bool run(void* _func, String& result, String& expect) {
2189 typedef int (*Func)(int, int, int);
2190 Func func = ptr_as_func<Func>(_func);
2191
2192 int ret1 = func(0, 4, 5);
2193 int ret2 = func(1, 4, 5);
2194
2195 int exp1 = 4 + 5;
2196 int exp2 = 4 - 5;
2197
2198 result.assignFormat("ret={%d, %d}", ret1, ret2);
2199 expect.assignFormat("ret={%d, %d}", exp1, exp2);
2200
2201 return result == expect;
2202 }
2203 };
2204
2205 // ============================================================================
2206 // [X86Test_AllocAlphaBlend]
2207 // ============================================================================
2208
2209 class X86Test_AllocAlphaBlend : public X86Test {
2210 public:
2211 X86Test_AllocAlphaBlend() : X86Test("AllocAlphaBlend") {}
2212
2213 enum { kCount = 17 };
2214
2215 static void add(X86TestApp& app) {
2216 app.add(new X86Test_AllocAlphaBlend());
2217 }
2218
2219 static uint32_t blendSrcOver(uint32_t d, uint32_t s) {
2220 uint32_t saInv = ~s >> 24;
2221
2222 uint32_t d_20 = (d ) & 0x00FF00FF;
2223 uint32_t d_31 = (d >> 8) & 0x00FF00FF;
2224
2225 d_20 *= saInv;
2226 d_31 *= saInv;
2227
2228 d_20 = ((d_20 + ((d_20 >> 8) & 0x00FF00FFu) + 0x00800080u) & 0xFF00FF00u) >> 8;
2229 d_31 = ((d_31 + ((d_31 >> 8) & 0x00FF00FFu) + 0x00800080u) & 0xFF00FF00u);
2230
2231 return d_20 + d_31 + s;
2232 }
2233
2234 virtual void compile(x86::Compiler& cc) {
2235 asmtest::generateAlphaBlend(cc);
2236 }
2237
2238 virtual bool run(void* _func, String& result, String& expect) {
2239 typedef void (*Func)(void*, const void*, size_t);
2240 Func func = ptr_as_func<Func>(_func);
2241
2242 static const uint32_t dstConstData[] = { 0x00000000, 0x10101010, 0x20100804, 0x30200003, 0x40204040, 0x5000004D, 0x60302E2C, 0x706F6E6D, 0x807F4F2F, 0x90349001, 0xA0010203, 0xB03204AB, 0xC023AFBD, 0xD0D0D0C0, 0xE0AABBCC, 0xFFFFFFFF, 0xF8F4F2F1 };
2243 static const uint32_t srcConstData[] = { 0xE0E0E0E0, 0xA0008080, 0x341F1E1A, 0xFEFEFEFE, 0x80302010, 0x49490A0B, 0x998F7798, 0x00000000, 0x01010101, 0xA0264733, 0xBAB0B1B9, 0xFF000000, 0xDAB0A0C1, 0xE0BACFDA, 0x99887766, 0xFFFFFF80, 0xEE0A5FEC };
2244
2245 uint32_t _dstBuffer[kCount + 3];
2246 uint32_t _srcBuffer[kCount + 3];
2247
2248 // Has to be aligned.
2249 uint32_t* dstBuffer = (uint32_t*)Support::alignUp<intptr_t>((intptr_t)_dstBuffer, 16);
2250 uint32_t* srcBuffer = (uint32_t*)Support::alignUp<intptr_t>((intptr_t)_srcBuffer, 16);
2251
2252 memcpy(dstBuffer, dstConstData, sizeof(dstConstData));
2253 memcpy(srcBuffer, srcConstData, sizeof(srcConstData));
2254
2255 uint32_t i;
2256 uint32_t expBuffer[kCount];
2257
2258 for (i = 0; i < kCount; i++) {
2259 expBuffer[i] = blendSrcOver(dstBuffer[i], srcBuffer[i]);
2260 }
2261
2262 func(dstBuffer, srcBuffer, kCount);
2263
2264 result.assignString("buf={");
2265 expect.assignString("buf={");
2266
2267 for (i = 0; i < kCount; i++) {
2268 if (i != 0) {
2269 result.appendString(", ");
2270 expect.appendString(", ");
2271 }
2272
2273 result.appendFormat("%08X", unsigned(dstBuffer[i]));
2274 expect.appendFormat("%08X", unsigned(expBuffer[i]));
2275 }
2276
2277 result.appendString("}");
2278 expect.appendString("}");
2279
2280 return result == expect;
2281 }
2282 };
2283
2284 // ============================================================================
2285 // [X86Test_FuncCallBase1]
2286 // ============================================================================
2287
2288 class X86Test_FuncCallBase1 : public X86Test {
2289 public:
2290 X86Test_FuncCallBase1() : X86Test("FuncCallBase1") {}
2291
2292 static void add(X86TestApp& app) {
2293 app.add(new X86Test_FuncCallBase1());
2294 }
2295
2296 virtual void compile(x86::Compiler& cc) {
2297 x86::Gp v0 = cc.newInt32("v0");
2298 x86::Gp v1 = cc.newInt32("v1");
2299 x86::Gp v2 = cc.newInt32("v2");
2300
2301 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
2302 cc.setArg(0, v0);
2303 cc.setArg(1, v1);
2304 cc.setArg(2, v2);
2305
2306 // Just do something.
2307 cc.shl(v0, 1);
2308 cc.shl(v1, 1);
2309 cc.shl(v2, 1);
2310
2311 // Call a function.
2312 FuncCallNode* call = cc.call(imm((void*)calledFunc), FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
2313 call->setArg(0, v2);
2314 call->setArg(1, v1);
2315 call->setArg(2, v0);
2316 call->setRet(0, v0);
2317
2318 cc.ret(v0);
2319 cc.endFunc();
2320 }
2321
2322 virtual bool run(void* _func, String& result, String& expect) {
2323 typedef int (*Func)(int, int, int);
2324 Func func = ptr_as_func<Func>(_func);
2325
2326 int resultRet = func(3, 2, 1);
2327 int expectRet = 36;
2328
2329 result.assignFormat("ret=%d", resultRet);
2330 expect.assignFormat("ret=%d", expectRet);
2331
2332 return resultRet == expectRet;
2333 }
2334
2335 static int calledFunc(int a, int b, int c) { return (a + b) * c; }
2336 };
2337
2338 // ============================================================================
2339 // [X86Test_FuncCallBase2]
2340 // ============================================================================
2341
2342 class X86Test_FuncCallBase2 : public X86Test {
2343 public:
2344 X86Test_FuncCallBase2() : X86Test("FuncCallBase2") {}
2345
2346 enum { kSize = 256 };
2347
2348 static void add(X86TestApp& app) {
2349 app.add(new X86Test_FuncCallBase2());
2350 }
2351
2352 virtual void compile(x86::Compiler& cc) {
2353 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2354
2355 const int kTokenSize = 32;
2356
2357 x86::Mem s1 = cc.newStack(kTokenSize, 32);
2358 x86::Mem s2 = cc.newStack(kTokenSize, 32);
2359
2360 x86::Gp p1 = cc.newIntPtr("p1");
2361 x86::Gp p2 = cc.newIntPtr("p2");
2362
2363 x86::Gp ret = cc.newInt32("ret");
2364 Label L_Exit = cc.newLabel();
2365
2366 static const char token[kTokenSize] = "-+:|abcdefghijklmnopqrstuvwxyz|";
2367 FuncCallNode* call;
2368
2369 cc.lea(p1, s1);
2370 cc.lea(p2, s2);
2371
2372 // Try to corrupt the stack if wrongly allocated.
2373 call = cc.call(imm((void*)memcpy), FuncSignatureT<void*, void*, void*, size_t>(CallConv::kIdHostCDecl));
2374 call->setArg(0, p1);
2375 call->setArg(1, imm(token));
2376 call->setArg(2, imm(kTokenSize));
2377 call->setRet(0, p1);
2378
2379 call = cc.call(imm((void*)memcpy), FuncSignatureT<void*, void*, void*, size_t>(CallConv::kIdHostCDecl));
2380 call->setArg(0, p2);
2381 call->setArg(1, imm(token));
2382 call->setArg(2, imm(kTokenSize));
2383 call->setRet(0, p2);
2384
2385 call = cc.call(imm((void*)memcmp), FuncSignatureT<int, void*, void*, size_t>(CallConv::kIdHostCDecl));
2386 call->setArg(0, p1);
2387 call->setArg(1, p2);
2388 call->setArg(2, imm(kTokenSize));
2389 call->setRet(0, ret);
2390
2391 // This should be 0 on success, however, if both `p1` and `p2` were
2392 // allocated in the same address this check will still pass.
2393 cc.cmp(ret, 0);
2394 cc.jnz(L_Exit);
2395
2396 // Checks whether `p1` and `p2` are different (must be).
2397 cc.xor_(ret, ret);
2398 cc.cmp(p1, p2);
2399 cc.setz(ret.r8());
2400
2401 cc.bind(L_Exit);
2402 cc.ret(ret);
2403 cc.endFunc();
2404 }
2405
2406 virtual bool run(void* _func, String& result, String& expect) {
2407 typedef int (*Func)(void);
2408 Func func = ptr_as_func<Func>(_func);
2409
2410 int resultRet = func();
2411 int expectRet = 0; // Must be zero, stack addresses must be different.
2412
2413 result.assignInt(resultRet);
2414 expect.assignInt(expectRet);
2415
2416 return resultRet == expectRet;
2417 }
2418 };
2419
2420 // ============================================================================
2421 // [X86Test_FuncCallStd]
2422 // ============================================================================
2423
2424 class X86Test_FuncCallStd : public X86Test {
2425 public:
2426 X86Test_FuncCallStd() : X86Test("FuncCallStd") {}
2427
2428 static void add(X86TestApp& app) {
2429 app.add(new X86Test_FuncCallStd());
2430 }
2431
2432 virtual void compile(x86::Compiler& cc) {
2433 x86::Gp x = cc.newInt32("x");
2434 x86::Gp y = cc.newInt32("y");
2435 x86::Gp z = cc.newInt32("z");
2436
2437 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
2438 cc.setArg(0, x);
2439 cc.setArg(1, y);
2440 cc.setArg(2, z);
2441
2442 FuncCallNode* call = cc.call(
2443 imm((void*)calledFunc),
2444 FuncSignatureT<int, int, int, int>(CallConv::kIdHostStdCall));
2445 call->setArg(0, x);
2446 call->setArg(1, y);
2447 call->setArg(2, z);
2448 call->setRet(0, x);
2449
2450 cc.ret(x);
2451 cc.endFunc();
2452 }
2453
2454 virtual bool run(void* _func, String& result, String& expect) {
2455 typedef int (*Func)(int, int, int);
2456 Func func = ptr_as_func<Func>(_func);
2457
2458 int resultRet = func(1, 42, 3);
2459 int expectRet = calledFunc(1, 42, 3);
2460
2461 result.assignFormat("ret=%d", resultRet);
2462 expect.assignFormat("ret=%d", expectRet);
2463
2464 return resultRet == expectRet;
2465 }
2466
2467 // STDCALL function that is called inside the generated one.
2468 static int ASMJIT_STDCALL calledFunc(int a, int b, int c) noexcept {
2469 return (a + b) * c;
2470 }
2471 };
2472
2473 // ============================================================================
2474 // [X86Test_FuncCallFast]
2475 // ============================================================================
2476
2477 class X86Test_FuncCallFast : public X86Test {
2478 public:
2479 X86Test_FuncCallFast() : X86Test("FuncCallFast") {}
2480
2481 static void add(X86TestApp& app) {
2482 app.add(new X86Test_FuncCallFast());
2483 }
2484
2485 virtual void compile(x86::Compiler& cc) {
2486 x86::Gp var = cc.newInt32("var");
2487
2488 cc.addFunc(FuncSignatureT<int, int>(CallConv::kIdHost));
2489 cc.setArg(0, var);
2490
2491 FuncCallNode* call;
2492 call = cc.call(
2493 imm((void*)calledFunc),
2494 FuncSignatureT<int, int>(CallConv::kIdHostFastCall));
2495 call->setArg(0, var);
2496 call->setRet(0, var);
2497
2498 call = cc.call(
2499 imm((void*)calledFunc),
2500 FuncSignatureT<int, int>(CallConv::kIdHostFastCall));
2501 call->setArg(0, var);
2502 call->setRet(0, var);
2503
2504 cc.ret(var);
2505 cc.endFunc();
2506 }
2507
2508 virtual bool run(void* _func, String& result, String& expect) {
2509 typedef int (*Func)(int);
2510 Func func = ptr_as_func<Func>(_func);
2511
2512 int resultRet = func(9);
2513 int expectRet = (9 * 9) * (9 * 9);
2514
2515 result.assignFormat("ret=%d", resultRet);
2516 expect.assignFormat("ret=%d", expectRet);
2517
2518 return resultRet == expectRet;
2519 }
2520
2521 // FASTCALL function that is called inside the generated one.
2522 static int ASMJIT_FASTCALL calledFunc(int a) noexcept {
2523 return a * a;
2524 }
2525 };
2526
2527 // ============================================================================
2528 // [X86Test_FuncCallLight]
2529 // ============================================================================
2530
2531 class X86Test_FuncCallLight : public X86Test {
2532 public:
2533 X86Test_FuncCallLight() : X86Test("FuncCallLight") {}
2534
2535 static void add(X86TestApp& app) {
2536 app.add(new X86Test_FuncCallLight());
2537 }
2538
2539 virtual void compile(x86::Compiler& cc) {
2540 FuncSignatureT<void, const void*, const void*, const void*, const void*, void*> funcSig(CallConv::kIdHostCDecl);
2541 FuncSignatureT<x86::Xmm, x86::Xmm, x86::Xmm> fastSig(CallConv::kIdHostLightCall2);
2542
2543 FuncNode* func = cc.newFunc(funcSig);
2544 FuncNode* fast = cc.newFunc(fastSig);
2545
2546 {
2547 x86::Gp aPtr = cc.newIntPtr("aPtr");
2548 x86::Gp bPtr = cc.newIntPtr("bPtr");
2549 x86::Gp cPtr = cc.newIntPtr("cPtr");
2550 x86::Gp dPtr = cc.newIntPtr("dPtr");
2551 x86::Gp pOut = cc.newIntPtr("pOut");
2552
2553 x86::Xmm aXmm = cc.newXmm("aXmm");
2554 x86::Xmm bXmm = cc.newXmm("bXmm");
2555 x86::Xmm cXmm = cc.newXmm("cXmm");
2556 x86::Xmm dXmm = cc.newXmm("dXmm");
2557
2558 cc.addFunc(func);
2559
2560 cc.setArg(0, aPtr);
2561 cc.setArg(1, bPtr);
2562 cc.setArg(2, cPtr);
2563 cc.setArg(3, dPtr);
2564 cc.setArg(4, pOut);
2565
2566 cc.movups(aXmm, x86::ptr(aPtr));
2567 cc.movups(bXmm, x86::ptr(bPtr));
2568 cc.movups(cXmm, x86::ptr(cPtr));
2569 cc.movups(dXmm, x86::ptr(dPtr));
2570
2571 x86::Xmm xXmm = cc.newXmm("xXmm");
2572 x86::Xmm yXmm = cc.newXmm("yXmm");
2573
2574 FuncCallNode* call1 = cc.call(fast->label(), fastSig);
2575 call1->setArg(0, aXmm);
2576 call1->setArg(1, bXmm);
2577 call1->setRet(0, xXmm);
2578
2579 FuncCallNode* call2 = cc.call(fast->label(), fastSig);
2580 call2->setArg(0, cXmm);
2581 call2->setArg(1, dXmm);
2582 call2->setRet(0, yXmm);
2583
2584 cc.pmullw(xXmm, yXmm);
2585 cc.movups(x86::ptr(pOut), xXmm);
2586
2587 cc.endFunc();
2588 }
2589
2590 {
2591 x86::Xmm aXmm = cc.newXmm("aXmm");
2592 x86::Xmm bXmm = cc.newXmm("bXmm");
2593
2594 cc.addFunc(fast);
2595 cc.setArg(0, aXmm);
2596 cc.setArg(1, bXmm);
2597 cc.paddw(aXmm, bXmm);
2598 cc.ret(aXmm);
2599 cc.endFunc();
2600 }
2601 }
2602
2603 virtual bool run(void* _func, String& result, String& expect) {
2604 typedef void (*Func)(const void*, const void*, const void*, const void*, void*);
2605
2606 Func func = ptr_as_func<Func>(_func);
2607
2608 int16_t a[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
2609 int16_t b[8] = { 7, 6, 5, 4, 3, 2, 1, 0 };
2610 int16_t c[8] = { 1, 3, 9, 7, 5, 4, 2, 1 };
2611 int16_t d[8] = { 2, 0,-6,-4,-2,-1, 1, 2 };
2612
2613 int16_t o[8];
2614 int oExp = 7 * 3;
2615
2616 func(a, b, c, d, o);
2617
2618 result.assignFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", o[0], o[1], o[2], o[3], o[4], o[5], o[6], o[7]);
2619 expect.assignFormat("ret={%02X %02X %02X %02X %02X %02X %02X %02X}", oExp, oExp, oExp, oExp, oExp, oExp, oExp, oExp);
2620
2621 return result == expect;
2622 }
2623 };
2624
2625 // ============================================================================
2626 // [X86Test_FuncCallManyArgs]
2627 // ============================================================================
2628
2629 class X86Test_FuncCallManyArgs : public X86Test {
2630 public:
2631 X86Test_FuncCallManyArgs() : X86Test("FuncCallManyArgs") {}
2632
2633 static void add(X86TestApp& app) {
2634 app.add(new X86Test_FuncCallManyArgs());
2635 }
2636
2637 static int calledFunc(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) {
2638 return (a * b * c * d * e) + (f * g * h * i * j);
2639 }
2640
2641 virtual void compile(x86::Compiler& cc) {
2642 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2643
2644 // Prepare.
2645 x86::Gp va = cc.newInt32("va");
2646 x86::Gp vb = cc.newInt32("vb");
2647 x86::Gp vc = cc.newInt32("vc");
2648 x86::Gp vd = cc.newInt32("vd");
2649 x86::Gp ve = cc.newInt32("ve");
2650 x86::Gp vf = cc.newInt32("vf");
2651 x86::Gp vg = cc.newInt32("vg");
2652 x86::Gp vh = cc.newInt32("vh");
2653 x86::Gp vi = cc.newInt32("vi");
2654 x86::Gp vj = cc.newInt32("vj");
2655
2656 cc.mov(va, 0x03);
2657 cc.mov(vb, 0x12);
2658 cc.mov(vc, 0xA0);
2659 cc.mov(vd, 0x0B);
2660 cc.mov(ve, 0x2F);
2661 cc.mov(vf, 0x02);
2662 cc.mov(vg, 0x0C);
2663 cc.mov(vh, 0x12);
2664 cc.mov(vi, 0x18);
2665 cc.mov(vj, 0x1E);
2666
2667 // Call function.
2668 FuncCallNode* call = cc.call(
2669 imm((void*)calledFunc),
2670 FuncSignatureT<int, int, int, int, int, int, int, int, int, int, int>(CallConv::kIdHost));
2671 call->setArg(0, va);
2672 call->setArg(1, vb);
2673 call->setArg(2, vc);
2674 call->setArg(3, vd);
2675 call->setArg(4, ve);
2676 call->setArg(5, vf);
2677 call->setArg(6, vg);
2678 call->setArg(7, vh);
2679 call->setArg(8, vi);
2680 call->setArg(9, vj);
2681 call->setRet(0, va);
2682
2683 cc.ret(va);
2684 cc.endFunc();
2685 }
2686
2687 virtual bool run(void* _func, String& result, String& expect) {
2688 typedef int (*Func)(void);
2689 Func func = ptr_as_func<Func>(_func);
2690
2691 int resultRet = func();
2692 int expectRet = calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E);
2693
2694 result.assignFormat("ret=%d", resultRet);
2695 expect.assignFormat("ret=%d", expectRet);
2696
2697 return resultRet == expectRet;
2698 }
2699 };
2700
2701 // ============================================================================
2702 // [X86Test_FuncCallDuplicateArgs]
2703 // ============================================================================
2704
2705 class X86Test_FuncCallDuplicateArgs : public X86Test {
2706 public:
2707 X86Test_FuncCallDuplicateArgs() : X86Test("FuncCallDuplicateArgs") {}
2708
2709 static void add(X86TestApp& app) {
2710 app.add(new X86Test_FuncCallDuplicateArgs());
2711 }
2712
2713 static int calledFunc(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) {
2714 return (a * b * c * d * e) + (f * g * h * i * j);
2715 }
2716
2717 virtual void compile(x86::Compiler& cc) {
2718 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2719
2720 // Prepare.
2721 x86::Gp a = cc.newInt32("a");
2722 cc.mov(a, 3);
2723
2724 // Call function.
2725 FuncCallNode* call = cc.call(
2726 imm((void*)calledFunc),
2727 FuncSignatureT<int, int, int, int, int, int, int, int, int, int, int>(CallConv::kIdHost));
2728 call->setArg(0, a);
2729 call->setArg(1, a);
2730 call->setArg(2, a);
2731 call->setArg(3, a);
2732 call->setArg(4, a);
2733 call->setArg(5, a);
2734 call->setArg(6, a);
2735 call->setArg(7, a);
2736 call->setArg(8, a);
2737 call->setArg(9, a);
2738 call->setRet(0, a);
2739
2740 cc.ret(a);
2741 cc.endFunc();
2742 }
2743
2744 virtual bool run(void* _func, String& result, String& expect) {
2745 typedef int (*Func)(void);
2746 Func func = ptr_as_func<Func>(_func);
2747
2748 int resultRet = func();
2749 int expectRet = calledFunc(3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
2750
2751 result.assignFormat("ret=%d", resultRet);
2752 expect.assignFormat("ret=%d", expectRet);
2753
2754 return resultRet == expectRet;
2755 }
2756 };
2757
2758 // ============================================================================
2759 // [X86Test_FuncCallImmArgs]
2760 // ============================================================================
2761
2762 class X86Test_FuncCallImmArgs : public X86Test {
2763 public:
2764 X86Test_FuncCallImmArgs() : X86Test("FuncCallImmArgs") {}
2765
2766 static void add(X86TestApp& app) {
2767 app.add(new X86Test_FuncCallImmArgs());
2768 }
2769
2770 virtual void compile(x86::Compiler& cc) {
2771 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2772
2773 // Prepare.
2774 x86::Gp rv = cc.newInt32("rv");
2775
2776 // Call function.
2777 FuncCallNode* call = cc.call(
2778 imm((void*)X86Test_FuncCallManyArgs::calledFunc),
2779 FuncSignatureT<int, int, int, int, int, int, int, int, int, int, int>(CallConv::kIdHost));
2780
2781 call->setArg(0, imm(0x03));
2782 call->setArg(1, imm(0x12));
2783 call->setArg(2, imm(0xA0));
2784 call->setArg(3, imm(0x0B));
2785 call->setArg(4, imm(0x2F));
2786 call->setArg(5, imm(0x02));
2787 call->setArg(6, imm(0x0C));
2788 call->setArg(7, imm(0x12));
2789 call->setArg(8, imm(0x18));
2790 call->setArg(9, imm(0x1E));
2791 call->setRet(0, rv);
2792
2793 cc.ret(rv);
2794 cc.endFunc();
2795 }
2796
2797 virtual bool run(void* _func, String& result, String& expect) {
2798 typedef int (*Func)(void);
2799 Func func = ptr_as_func<Func>(_func);
2800
2801 int resultRet = func();
2802 int expectRet = X86Test_FuncCallManyArgs::calledFunc(0x03, 0x12, 0xA0, 0x0B, 0x2F, 0x02, 0x0C, 0x12, 0x18, 0x1E);
2803
2804 result.assignFormat("ret=%d", resultRet);
2805 expect.assignFormat("ret=%d", expectRet);
2806
2807 return resultRet == expectRet;
2808 }
2809 };
2810
2811 // ============================================================================
2812 // [X86Test_FuncCallPtrArgs]
2813 // ============================================================================
2814
2815 class X86Test_FuncCallPtrArgs : public X86Test {
2816 public:
2817 X86Test_FuncCallPtrArgs() : X86Test("FuncCallPtrArgs") {}
2818
2819 static void add(X86TestApp& app) {
2820 app.add(new X86Test_FuncCallPtrArgs());
2821 }
2822
2823 static int calledFunc(void* a, void* b, void* c, void* d, void* e, void* f, void* g, void* h, void* i, void* j) {
2824 return int((intptr_t)a) +
2825 int((intptr_t)b) +
2826 int((intptr_t)c) +
2827 int((intptr_t)d) +
2828 int((intptr_t)e) +
2829 int((intptr_t)f) +
2830 int((intptr_t)g) +
2831 int((intptr_t)h) +
2832 int((intptr_t)i) +
2833 int((intptr_t)j) ;
2834 }
2835
2836 virtual void compile(x86::Compiler& cc) {
2837 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
2838
2839 // Prepare.
2840 x86::Gp rv = cc.newInt32("rv");
2841
2842 // Call function.
2843 FuncCallNode* call = cc.call(
2844 imm((void*)calledFunc),
2845 FuncSignatureT<int, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*>(CallConv::kIdHost));
2846
2847 call->setArg(0, imm(0x01));
2848 call->setArg(1, imm(0x02));
2849 call->setArg(2, imm(0x03));
2850 call->setArg(3, imm(0x04));
2851 call->setArg(4, imm(0x05));
2852 call->setArg(5, imm(0x06));
2853 call->setArg(6, imm(0x07));
2854 call->setArg(7, imm(0x08));
2855 call->setArg(8, imm(0x09));
2856 call->setArg(9, imm(0x0A));
2857 call->setRet(0, rv);
2858
2859 cc.ret(rv);
2860 cc.endFunc();
2861 }
2862
2863 virtual bool run(void* _func, String& result, String& expect) {
2864 typedef int (*Func)(void);
2865 Func func = ptr_as_func<Func>(_func);
2866
2867 int resultRet = func();
2868 int expectRet = 55;
2869
2870 result.assignFormat("ret=%d", resultRet);
2871 expect.assignFormat("ret=%d", expectRet);
2872
2873 return resultRet == expectRet;
2874 }
2875 };
2876
2877 // ============================================================================
2878 // [X86Test_FuncCallRefArgs]
2879 // ============================================================================
2880
2881 class X86Test_FuncCallRefArgs : public X86Test {
2882 public:
2883 X86Test_FuncCallRefArgs() : X86Test("FuncCallRefArgs") {}
2884
2885 static void add(X86TestApp& app) {
2886 app.add(new X86Test_FuncCallRefArgs());
2887 }
2888
2889 static int calledFunc(int& a, int& b, int& c, int& d) {
2890 a += a;
2891 b += b;
2892 c += c;
2893 d += d;
2894 return a + b + c + d;
2895 }
2896
2897 virtual void compile(x86::Compiler& cc) {
2898 cc.addFunc(FuncSignatureT<int, int&, int&, int&, int&>(CallConv::kIdHost));
2899
2900 // Prepare.
2901 x86::Gp arg1 = cc.newInt32();
2902 x86::Gp arg2 = cc.newInt32();
2903 x86::Gp arg3 = cc.newInt32();
2904 x86::Gp arg4 = cc.newInt32();
2905 x86::Gp rv = cc.newInt32("rv");
2906
2907 cc.setArg(0, arg1);
2908 cc.setArg(1, arg2);
2909 cc.setArg(2, arg3);
2910 cc.setArg(3, arg4);
2911
2912 // Call function.
2913 FuncCallNode* call = cc.call(
2914 imm((void*)calledFunc),
2915 FuncSignatureT<int, int&, int&, int&, int&>(CallConv::kIdHost));
2916
2917 call->setArg(0, arg1);
2918 call->setArg(1, arg2);
2919 call->setArg(2, arg3);
2920 call->setArg(3, arg4);
2921 call->setRet(0, rv);
2922
2923 cc.ret(rv);
2924 cc.endFunc();
2925 }
2926
2927 virtual bool run(void* _func, String& result, String& expect) {
2928 typedef int (*Func)(int&, int&, int&, int&);
2929 Func func = ptr_as_func<Func>(_func);
2930
2931 int inputs[4] = { 1, 2, 3, 4 };
2932 int outputs[4] = { 2, 4, 6, 8 };
2933 int resultRet = func(inputs[0], inputs[1], inputs[2], inputs[3]);
2934 int expectRet = 20;
2935
2936 result.assignFormat("ret={%08X %08X %08X %08X %08X}", resultRet, inputs[0], inputs[1], inputs[2], inputs[3]);
2937 expect.assignFormat("ret={%08X %08X %08X %08X %08X}", expectRet, outputs[0], outputs[1], outputs[2], outputs[3]);
2938
2939 return resultRet == expectRet;
2940 }
2941 };
2942
2943 // ============================================================================
2944 // [X86Test_FuncCallFloatAsXmmRet]
2945 // ============================================================================
2946
2947 class X86Test_FuncCallFloatAsXmmRet : public X86Test {
2948 public:
2949 X86Test_FuncCallFloatAsXmmRet() : X86Test("FuncCallFloatAsXmmRet") {}
2950
2951 static void add(X86TestApp& app) {
2952 app.add(new X86Test_FuncCallFloatAsXmmRet());
2953 }
2954
2955 static float calledFunc(float a, float b) {
2956 return a * b;
2957 }
2958
2959 virtual void compile(x86::Compiler& cc) {
2960 cc.addFunc(FuncSignatureT<float, float, float>(CallConv::kIdHost));
2961
2962 x86::Xmm a = cc.newXmmSs("a");
2963 x86::Xmm b = cc.newXmmSs("b");
2964 x86::Xmm ret = cc.newXmmSs("ret");
2965
2966 cc.setArg(0, a);
2967 cc.setArg(1, b);
2968
2969 // Call function.
2970 FuncCallNode* call = cc.call(
2971 imm((void*)calledFunc),
2972 FuncSignatureT<float, float, float>(CallConv::kIdHost));
2973 call->setArg(0, a);
2974 call->setArg(1, b);
2975 call->setRet(0, ret);
2976
2977 cc.ret(ret);
2978 cc.endFunc();
2979 }
2980
2981 virtual bool run(void* _func, String& result, String& expect) {
2982 typedef float (*Func)(float, float);
2983 Func func = ptr_as_func<Func>(_func);
2984
2985 float resultRet = func(15.5f, 2.0f);
2986 float expectRet = calledFunc(15.5f, 2.0f);
2987
2988 result.assignFormat("ret=%g", resultRet);
2989 expect.assignFormat("ret=%g", expectRet);
2990
2991 return resultRet == expectRet;
2992 }
2993 };
2994
2995 // ============================================================================
2996 // [X86Test_FuncCallDoubleAsXmmRet]
2997 // ============================================================================
2998
2999 class X86Test_FuncCallDoubleAsXmmRet : public X86Test {
3000 public:
3001 X86Test_FuncCallDoubleAsXmmRet() : X86Test("FuncCallDoubleAsXmmRet") {}
3002
3003 static void add(X86TestApp& app) {
3004 app.add(new X86Test_FuncCallDoubleAsXmmRet());
3005 }
3006
3007 static double calledFunc(double a, double b) {
3008 return a * b;
3009 }
3010
3011 virtual void compile(x86::Compiler& cc) {
3012 cc.addFunc(FuncSignatureT<double, double, double>(CallConv::kIdHost));
3013
3014 x86::Xmm a = cc.newXmmSd("a");
3015 x86::Xmm b = cc.newXmmSd("b");
3016 x86::Xmm ret = cc.newXmmSd("ret");
3017
3018 cc.setArg(0, a);
3019 cc.setArg(1, b);
3020
3021 FuncCallNode* call = cc.call(
3022 imm((void*)calledFunc),
3023 FuncSignatureT<double, double, double>(CallConv::kIdHost));
3024 call->setArg(0, a);
3025 call->setArg(1, b);
3026 call->setRet(0, ret);
3027
3028 cc.ret(ret);
3029 cc.endFunc();
3030 }
3031
3032 virtual bool run(void* _func, String& result, String& expect) {
3033 typedef double (*Func)(double, double);
3034 Func func = ptr_as_func<Func>(_func);
3035
3036 double resultRet = func(15.5, 2.0);
3037 double expectRet = calledFunc(15.5, 2.0);
3038
3039 result.assignFormat("ret=%g", resultRet);
3040 expect.assignFormat("ret=%g", expectRet);
3041
3042 return resultRet == expectRet;
3043 }
3044 };
3045
3046 // ============================================================================
3047 // [X86Test_FuncCallConditional]
3048 // ============================================================================
3049
3050 class X86Test_FuncCallConditional : public X86Test {
3051 public:
3052 X86Test_FuncCallConditional() : X86Test("FuncCallConditional") {}
3053
3054 static void add(X86TestApp& app) {
3055 app.add(new X86Test_FuncCallConditional());
3056 }
3057
3058 virtual void compile(x86::Compiler& cc) {
3059 x86::Gp x = cc.newInt32("x");
3060 x86::Gp y = cc.newInt32("y");
3061 x86::Gp op = cc.newInt32("op");
3062
3063 FuncCallNode* call;
3064 x86::Gp result;
3065
3066 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
3067 cc.setArg(0, x);
3068 cc.setArg(1, y);
3069 cc.setArg(2, op);
3070
3071 Label opAdd = cc.newLabel();
3072 Label opMul = cc.newLabel();
3073
3074 cc.cmp(op, 0);
3075 cc.jz(opAdd);
3076 cc.cmp(op, 1);
3077 cc.jz(opMul);
3078
3079 result = cc.newInt32("result_0");
3080 cc.mov(result, 0);
3081 cc.ret(result);
3082
3083 cc.bind(opAdd);
3084 result = cc.newInt32("result_1");
3085
3086 call = cc.call((uint64_t)calledFuncAdd, FuncSignatureT<int, int, int>(CallConv::kIdHost));
3087 call->setArg(0, x);
3088 call->setArg(1, y);
3089 call->setRet(0, result);
3090 cc.ret(result);
3091
3092 cc.bind(opMul);
3093 result = cc.newInt32("result_2");
3094
3095 call = cc.call((uint64_t)calledFuncMul, FuncSignatureT<int, int, int>(CallConv::kIdHost));
3096 call->setArg(0, x);
3097 call->setArg(1, y);
3098 call->setRet(0, result);
3099
3100 cc.ret(result);
3101 cc.endFunc();
3102 }
3103
3104 virtual bool run(void* _func, String& result, String& expect) {
3105 typedef int (*Func)(int, int, int);
3106 Func func = ptr_as_func<Func>(_func);
3107
3108 int arg1 = 4;
3109 int arg2 = 8;
3110
3111 int resultAdd = func(arg1, arg2, 0);
3112 int expectAdd = calledFuncAdd(arg1, arg2);
3113
3114 int resultMul = func(arg1, arg2, 1);
3115 int expectMul = calledFuncMul(arg1, arg2);
3116
3117 result.assignFormat("ret={add=%d, mul=%d}", resultAdd, resultMul);
3118 expect.assignFormat("ret={add=%d, mul=%d}", expectAdd, expectMul);
3119
3120 return (resultAdd == expectAdd) && (resultMul == expectMul);
3121 }
3122
3123 static int calledFuncAdd(int x, int y) { return x + y; }
3124 static int calledFuncMul(int x, int y) { return x * y; }
3125 };
3126
3127 // ============================================================================
3128 // [X86Test_FuncCallMultiple]
3129 // ============================================================================
3130
3131 class X86Test_FuncCallMultiple : public X86Test {
3132 public:
3133 X86Test_FuncCallMultiple() : X86Test("FuncCallMultiple") {}
3134
3135 static void add(X86TestApp& app) {
3136 app.add(new X86Test_FuncCallMultiple());
3137 }
3138
3139 static int ASMJIT_FASTCALL calledFunc(int* pInt, int index) {
3140 return pInt[index];
3141 }
3142
3143 virtual void compile(x86::Compiler& cc) {
3144 unsigned int i;
3145
3146 x86::Gp buf = cc.newIntPtr("buf");
3147 x86::Gp acc0 = cc.newInt32("acc0");
3148 x86::Gp acc1 = cc.newInt32("acc1");
3149
3150 cc.addFunc(FuncSignatureT<int, int*>(CallConv::kIdHost));
3151 cc.setArg(0, buf);
3152
3153 cc.mov(acc0, 0);
3154 cc.mov(acc1, 0);
3155
3156 for (i = 0; i < 4; i++) {
3157 x86::Gp ret = cc.newInt32("ret");
3158 x86::Gp ptr = cc.newIntPtr("ptr");
3159 x86::Gp idx = cc.newInt32("idx");
3160 FuncCallNode* call;
3161
3162 cc.mov(ptr, buf);
3163 cc.mov(idx, int(i));
3164
3165 call = cc.call((uint64_t)calledFunc, FuncSignatureT<int, int*, int>(CallConv::kIdHostFastCall));
3166 call->setArg(0, ptr);
3167 call->setArg(1, idx);
3168 call->setRet(0, ret);
3169
3170 cc.add(acc0, ret);
3171
3172 cc.mov(ptr, buf);
3173 cc.mov(idx, int(i));
3174
3175 call = cc.call((uint64_t)calledFunc, FuncSignatureT<int, int*, int>(CallConv::kIdHostFastCall));
3176 call->setArg(0, ptr);
3177 call->setArg(1, idx);
3178 call->setRet(0, ret);
3179
3180 cc.sub(acc1, ret);
3181 }
3182
3183 cc.add(acc0, acc1);
3184 cc.ret(acc0);
3185 cc.endFunc();
3186 }
3187
3188 virtual bool run(void* _func, String& result, String& expect) {
3189 typedef int (*Func)(int*);
3190 Func func = ptr_as_func<Func>(_func);
3191
3192 int buffer[4] = { 127, 87, 23, 17 };
3193
3194 int resultRet = func(buffer);
3195 int expectRet = 0;
3196
3197 result.assignFormat("ret=%d", resultRet);
3198 expect.assignFormat("ret=%d", expectRet);
3199
3200 return resultRet == expectRet;
3201 }
3202 };
3203
3204 // ============================================================================
3205 // [X86Test_FuncCallRecursive]
3206 // ============================================================================
3207
3208 class X86Test_FuncCallRecursive : public X86Test {
3209 public:
3210 X86Test_FuncCallRecursive() : X86Test("FuncCallRecursive") {}
3211
3212 static void add(X86TestApp& app) {
3213 app.add(new X86Test_FuncCallRecursive());
3214 }
3215
3216 virtual void compile(x86::Compiler& cc) {
3217 x86::Gp val = cc.newInt32("val");
3218 Label skip = cc.newLabel();
3219
3220 FuncNode* func = cc.addFunc(FuncSignatureT<int, int>(CallConv::kIdHost));
3221 cc.setArg(0, val);
3222
3223 cc.cmp(val, 1);
3224 cc.jle(skip);
3225
3226 x86::Gp tmp = cc.newInt32("tmp");
3227 cc.mov(tmp, val);
3228 cc.dec(tmp);
3229
3230 FuncCallNode* call = cc.call(func->label(), FuncSignatureT<int, int>(CallConv::kIdHost));
3231 call->setArg(0, tmp);
3232 call->setRet(0, tmp);
3233 cc.mul(cc.newInt32(), val, tmp);
3234
3235 cc.bind(skip);
3236 cc.ret(val);
3237 cc.endFunc();
3238 }
3239
3240 virtual bool run(void* _func, String& result, String& expect) {
3241 typedef int (*Func)(int);
3242 Func func = ptr_as_func<Func>(_func);
3243
3244 int resultRet = func(5);
3245 int expectRet = 1 * 2 * 3 * 4 * 5;
3246
3247 result.assignFormat("ret=%d", resultRet);
3248 expect.assignFormat("ret=%d", expectRet);
3249
3250 return resultRet == expectRet;
3251 }
3252 };
3253
3254 // ============================================================================
3255 // [X86Test_FuncCallVarArg1]
3256 // ============================================================================
3257
3258 class X86Test_FuncCallVarArg1 : public X86Test {
3259 public:
3260 X86Test_FuncCallVarArg1() : X86Test("FuncCallVarArg1") {}
3261
3262 static void add(X86TestApp& app) {
3263 app.add(new X86Test_FuncCallVarArg1());
3264 }
3265
3266 virtual void compile(x86::Compiler& cc) {
3267 cc.addFunc(FuncSignatureT<int, int, int, int, int>(CallConv::kIdHost));
3268
3269 x86::Gp a0 = cc.newInt32("a0");
3270 x86::Gp a1 = cc.newInt32("a1");
3271 x86::Gp a2 = cc.newInt32("a2");
3272 x86::Gp a3 = cc.newInt32("a3");
3273
3274 cc.setArg(0, a0);
3275 cc.setArg(1, a1);
3276 cc.setArg(2, a2);
3277 cc.setArg(3, a3);
3278
3279 // We call `int func(size_t, ...)`
3280 // - The `vaIndex` must be 1 (first argument after size_t).
3281 // - The full signature of varargs (int, int, int, int) must follow.
3282 FuncCallNode* call = cc.call(
3283 imm((void*)calledFunc),
3284 FuncSignatureT<int, size_t, int, int, int, int>(CallConv::kIdHost, 1));
3285 call->setArg(0, imm(4));
3286 call->setArg(1, a0);
3287 call->setArg(2, a1);
3288 call->setArg(3, a2);
3289 call->setArg(4, a3);
3290 call->setRet(0, a0);
3291
3292 cc.ret(a0);
3293 cc.endFunc();
3294 }
3295
3296 virtual bool run(void* _func, String& result, String& expect) {
3297 typedef int (*Func)(int, int, int, int);
3298 Func func = ptr_as_func<Func>(_func);
3299
3300 int resultRet = func(1, 2, 3, 4);
3301 int expectRet = 1 + 2 + 3 + 4;
3302
3303 result.assignFormat("ret=%d", resultRet);
3304 expect.assignFormat("ret=%d", expectRet);
3305
3306 return resultRet == expectRet;
3307 }
3308
3309 static int calledFunc(size_t n, ...) {
3310 int sum = 0;
3311 va_list ap;
3312 va_start(ap, n);
3313 for (size_t i = 0; i < n; i++) {
3314 int arg = va_arg(ap, int);
3315 sum += arg;
3316 }
3317 va_end(ap);
3318 return sum;
3319 }
3320 };
3321
3322 // ============================================================================
3323 // [X86Test_FuncCallVarArg2]
3324 // ============================================================================
3325
3326 class X86Test_FuncCallVarArg2 : public X86Test {
3327 public:
3328 X86Test_FuncCallVarArg2() : X86Test("FuncCallVarArg2") {}
3329
3330 static void add(X86TestApp& app) {
3331 app.add(new X86Test_FuncCallVarArg2());
3332 }
3333
3334 virtual void compile(x86::Compiler& cc) {
3335 cc.addFunc(FuncSignatureT<double, double, double, double, double>(CallConv::kIdHost));
3336
3337 x86::Xmm a0 = cc.newXmmSd("a0");
3338 x86::Xmm a1 = cc.newXmmSd("a1");
3339 x86::Xmm a2 = cc.newXmmSd("a2");
3340 x86::Xmm a3 = cc.newXmmSd("a3");
3341
3342 cc.setArg(0, a0);
3343 cc.setArg(1, a1);
3344 cc.setArg(2, a2);
3345 cc.setArg(3, a3);
3346
3347 // We call `double func(size_t, ...)`
3348 // - The `vaIndex` must be 1 (first argument after size_t).
3349 // - The full signature of varargs (double, double, double, double) must follow.
3350 FuncCallNode* call = cc.call(
3351 imm((void*)calledFunc),
3352 FuncSignatureT<double, size_t, double, double, double, double>(CallConv::kIdHost, 1));
3353 call->setArg(0, imm(4));
3354 call->setArg(1, a0);
3355 call->setArg(2, a1);
3356 call->setArg(3, a2);
3357 call->setArg(4, a3);
3358 call->setRet(0, a0);
3359
3360 cc.ret(a0);
3361 cc.endFunc();
3362 }
3363
3364 virtual bool run(void* _func, String& result, String& expect) {
3365 typedef double (*Func)(double, double, double, double);
3366 Func func = ptr_as_func<Func>(_func);
3367
3368 double resultRet = func(1.0, 2.0, 3.0, 4.0);
3369 double expectRet = 1.0 + 2.0 + 3.0 + 4.0;
3370
3371 result.assignFormat("ret=%f", resultRet);
3372 expect.assignFormat("ret=%f", expectRet);
3373
3374 return resultRet == expectRet;
3375 }
3376
3377 static double calledFunc(size_t n, ...) {
3378 double sum = 0;
3379 va_list ap;
3380 va_start(ap, n);
3381 for (size_t i = 0; i < n; i++) {
3382 double arg = va_arg(ap, double);
3383 sum += arg;
3384 }
3385 va_end(ap);
3386 return sum;
3387 }
3388 };
3389
3390 // ============================================================================
3391 // [X86Test_FuncCallMisc1]
3392 // ============================================================================
3393
3394 class X86Test_FuncCallMisc1 : public X86Test {
3395 public:
3396 X86Test_FuncCallMisc1() : X86Test("FuncCallMisc1") {}
3397
3398 static void add(X86TestApp& app) {
3399 app.add(new X86Test_FuncCallMisc1());
3400 }
3401
3402 static void dummy(int, int) {}
3403
3404 virtual void compile(x86::Compiler& cc) {
3405 cc.addFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
3406
3407 x86::Gp a = cc.newInt32("a");
3408 x86::Gp b = cc.newInt32("b");
3409 x86::Gp r = cc.newInt32("r");
3410
3411 cc.setArg(0, a);
3412 cc.setArg(1, b);
3413
3414 FuncCallNode* call = cc.call(
3415 imm((void*)dummy),
3416 FuncSignatureT<void, int, int>(CallConv::kIdHost));
3417 call->setArg(0, a);
3418 call->setArg(1, b);
3419
3420 cc.lea(r, x86::ptr(a, b));
3421 cc.ret(r);
3422
3423 cc.endFunc();
3424 }
3425
3426 virtual bool run(void* _func, String& result, String& expect) {
3427 typedef int (*Func)(int, int);
3428 Func func = ptr_as_func<Func>(_func);
3429
3430 int resultRet = func(44, 199);
3431 int expectRet = 243;
3432
3433 result.assignFormat("ret=%d", resultRet);
3434 expect.assignFormat("ret=%d", expectRet);
3435
3436 return resultRet == expectRet;
3437 }
3438 };
3439
3440 // ============================================================================
3441 // [X86Test_FuncCallMisc2]
3442 // ============================================================================
3443
3444 class X86Test_FuncCallMisc2 : public X86Test {
3445 public:
3446 X86Test_FuncCallMisc2() : X86Test("FuncCallMisc2") {}
3447
3448 static void add(X86TestApp& app) {
3449 app.add(new X86Test_FuncCallMisc2());
3450 }
3451
3452 virtual void compile(x86::Compiler& cc) {
3453 cc.addFunc(FuncSignatureT<double, const double*>(CallConv::kIdHost));
3454
3455 x86::Gp p = cc.newIntPtr("p");
3456 x86::Xmm arg = cc.newXmmSd("arg");
3457 x86::Xmm ret = cc.newXmmSd("ret");
3458
3459 cc.setArg(0, p);
3460 cc.movsd(arg, x86::ptr(p));
3461
3462 FuncCallNode* call = cc.call(
3463 imm((void*)op),
3464 FuncSignatureT<double, double>(CallConv::kIdHost));
3465 call->setArg(0, arg);
3466 call->setRet(0, ret);
3467
3468 cc.ret(ret);
3469 cc.endFunc();
3470 }
3471
3472 virtual bool run(void* _func, String& result, String& expect) {
3473 typedef double (*Func)(const double*);
3474 Func func = ptr_as_func<Func>(_func);
3475
3476 double arg = 2;
3477
3478 double resultRet = func(&arg);
3479 double expectRet = op(arg);
3480
3481 result.assignFormat("ret=%g", resultRet);
3482 expect.assignFormat("ret=%g", expectRet);
3483
3484 return resultRet == expectRet;
3485 }
3486
3487 static double op(double a) { return a * a; }
3488 };
3489
3490 // ============================================================================
3491 // [X86Test_FuncCallMisc3]
3492 // ============================================================================
3493
3494 class X86Test_FuncCallMisc3 : public X86Test {
3495 public:
3496 X86Test_FuncCallMisc3() : X86Test("FuncCallMisc3") {}
3497
3498 static void add(X86TestApp& app) {
3499 app.add(new X86Test_FuncCallMisc3());
3500 }
3501
3502 virtual void compile(x86::Compiler& cc) {
3503 cc.addFunc(FuncSignatureT<double, const double*>(CallConv::kIdHost));
3504
3505 x86::Gp p = cc.newIntPtr("p");
3506 x86::Xmm arg = cc.newXmmSd("arg");
3507 x86::Xmm ret = cc.newXmmSd("ret");
3508
3509 cc.setArg(0, p);
3510 cc.movsd(arg, x86::ptr(p));
3511
3512 FuncCallNode* call = cc.call(
3513 imm((void*)op),
3514 FuncSignatureT<double, double>(CallConv::kIdHost));
3515 call->setArg(0, arg);
3516 call->setRet(0, ret);
3517
3518 cc.xorps(arg, arg);
3519 cc.subsd(arg, ret);
3520
3521 cc.ret(arg);
3522 cc.endFunc();
3523 }
3524
3525 virtual bool run(void* _func, String& result, String& expect) {
3526 typedef double (*Func)(const double*);
3527 Func func = ptr_as_func<Func>(_func);
3528
3529 double arg = 2;
3530
3531 double resultRet = func(&arg);
3532 double expectRet = -op(arg);
3533
3534 result.assignFormat("ret=%g", resultRet);
3535 expect.assignFormat("ret=%g", expectRet);
3536
3537 return resultRet == expectRet;
3538 }
3539
3540 static double op(double a) { return a * a; }
3541 };
3542
3543 // ============================================================================
3544 // [X86Test_FuncCallMisc4]
3545 // ============================================================================
3546
3547 class X86Test_FuncCallMisc4 : public X86Test {
3548 public:
3549 X86Test_FuncCallMisc4() : X86Test("FuncCallMisc4") {}
3550
3551 static void add(X86TestApp& app) {
3552 app.add(new X86Test_FuncCallMisc4());
3553 }
3554
3555 virtual void compile(x86::Compiler& cc) {
3556 FuncSignatureBuilder funcPrototype;
3557 funcPrototype.setCallConv(CallConv::kIdHost);
3558 funcPrototype.setRet(Type::kIdF64);
3559 cc.addFunc(funcPrototype);
3560
3561 FuncSignatureBuilder callPrototype;
3562 callPrototype.setCallConv(CallConv::kIdHost);
3563 callPrototype.setRet(Type::kIdF64);
3564 FuncCallNode* call = cc.call(imm((void*)calledFunc), callPrototype);
3565
3566 x86::Xmm ret = cc.newXmmSd("ret");
3567 call->setRet(0, ret);
3568 cc.ret(ret);
3569
3570 cc.endFunc();
3571 }
3572
3573 virtual bool run(void* _func, String& result, String& expect) {
3574 typedef double (*Func)(void);
3575 Func func = ptr_as_func<Func>(_func);
3576
3577 double resultRet = func();
3578 double expectRet = 3.14;
3579
3580 result.assignFormat("ret=%g", resultRet);
3581 expect.assignFormat("ret=%g", expectRet);
3582
3583 return resultRet == expectRet;
3584 }
3585
3586 static double calledFunc() { return 3.14; }
3587 };
3588
3589 // ============================================================================
3590 // [X86Test_FuncCallMisc5]
3591 // ============================================================================
3592
3593 // The register allocator should clobber the register used by the `call` itself.
3594 class X86Test_FuncCallMisc5 : public X86Test {
3595 public:
3596 X86Test_FuncCallMisc5() : X86Test("FuncCallMisc5") {}
3597
3598 static void add(X86TestApp& app) {
3599 app.add(new X86Test_FuncCallMisc5());
3600 }
3601
3602 virtual void compile(x86::Compiler& cc) {
3603 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
3604
3605 x86::Gp pFn = cc.newIntPtr("pFn");
3606 x86::Gp vars[16];
3607
3608 uint32_t i, regCount = cc.gpCount();
3609 ASMJIT_ASSERT(regCount <= ASMJIT_ARRAY_SIZE(vars));
3610
3611 cc.mov(pFn, imm((void*)calledFunc));
3612
3613 for (i = 0; i < regCount; i++) {
3614 if (i == x86::Gp::kIdBp || i == x86::Gp::kIdSp)
3615 continue;
3616
3617 vars[i] = cc.newInt32("%%%u", unsigned(i));
3618 cc.mov(vars[i], 1);
3619 }
3620
3621 cc.call(pFn, FuncSignatureT<void>(CallConv::kIdHost));
3622 for (i = 1; i < regCount; i++)
3623 if (vars[i].isValid())
3624 cc.add(vars[0], vars[i]);
3625 cc.ret(vars[0]);
3626
3627 cc.endFunc();
3628 }
3629
3630 virtual bool run(void* _func, String& result, String& expect) {
3631 typedef int (*Func)(void);
3632 Func func = ptr_as_func<Func>(_func);
3633
3634 int resultRet = func();
3635 int expectRet = sizeof(void*) == 4 ? 6 : 14;
3636
3637 result.assignFormat("ret=%d", resultRet);
3638 expect.assignFormat("ret=%d", expectRet);
3639
3640 return resultRet == expectRet;
3641 }
3642
3643 static void calledFunc() {}
3644 };
3645
3646 // ============================================================================
3647 // [X86Test_MiscLocalConstPool]
3648 // ============================================================================
3649
3650 class X86Test_MiscLocalConstPool : public X86Test {
3651 public:
3652 X86Test_MiscLocalConstPool() : X86Test("MiscLocalConstPool") {}
3653
3654 static void add(X86TestApp& app) {
3655 app.add(new X86Test_MiscLocalConstPool());
3656 }
3657
3658 virtual void compile(x86::Compiler& cc) {
3659 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
3660
3661 x86::Gp v0 = cc.newInt32("v0");
3662 x86::Gp v1 = cc.newInt32("v1");
3663
3664 x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeLocal, 200);
3665 x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeLocal, 33);
3666
3667 cc.mov(v0, c0);
3668 cc.mov(v1, c1);
3669 cc.add(v0, v1);
3670
3671 cc.ret(v0);
3672 cc.endFunc();
3673 }
3674
3675 virtual bool run(void* _func, String& result, String& expect) {
3676 typedef int (*Func)(void);
3677 Func func = ptr_as_func<Func>(_func);
3678
3679 int resultRet = func();
3680 int expectRet = 233;
3681
3682 result.assignFormat("ret=%d", resultRet);
3683 expect.assignFormat("ret=%d", expectRet);
3684
3685 return resultRet == expectRet;
3686 }
3687 };
3688
3689 // ============================================================================
3690 // [X86Test_MiscGlobalConstPool]
3691 // ============================================================================
3692
3693 class X86Test_MiscGlobalConstPool : public X86Test {
3694 public:
3695 X86Test_MiscGlobalConstPool() : X86Test("MiscGlobalConstPool") {}
3696
3697 static void add(X86TestApp& app) {
3698 app.add(new X86Test_MiscGlobalConstPool());
3699 }
3700
3701 virtual void compile(x86::Compiler& cc) {
3702 cc.addFunc(FuncSignatureT<int>(CallConv::kIdHost));
3703
3704 x86::Gp v0 = cc.newInt32("v0");
3705 x86::Gp v1 = cc.newInt32("v1");
3706
3707 x86::Mem c0 = cc.newInt32Const(ConstPool::kScopeGlobal, 200);
3708 x86::Mem c1 = cc.newInt32Const(ConstPool::kScopeGlobal, 33);
3709
3710 cc.mov(v0, c0);
3711 cc.mov(v1, c1);
3712 cc.add(v0, v1);
3713
3714 cc.ret(v0);
3715 cc.endFunc();
3716 }
3717
3718 virtual bool run(void* _func, String& result, String& expect) {
3719 typedef int (*Func)(void);
3720 Func func = ptr_as_func<Func>(_func);
3721
3722 int resultRet = func();
3723 int expectRet = 233;
3724
3725 result.assignFormat("ret=%d", resultRet);
3726 expect.assignFormat("ret=%d", expectRet);
3727
3728 return resultRet == expectRet;
3729 }
3730 };
3731
3732 // ============================================================================
3733 // [X86Test_MiscMultiRet]
3734 // ============================================================================
3735
3736 struct X86Test_MiscMultiRet : public X86Test {
3737 X86Test_MiscMultiRet() : X86Test("MiscMultiRet") {}
3738
3739 static void add(X86TestApp& app) {
3740 app.add(new X86Test_MiscMultiRet());
3741 }
3742
3743 virtual void compile(x86::Compiler& cc) {
3744 cc.addFunc(FuncSignatureT<int, int, int, int>(CallConv::kIdHost));
3745
3746 x86::Gp op = cc.newInt32("op");
3747 x86::Gp a = cc.newInt32("a");
3748 x86::Gp b = cc.newInt32("b");
3749
3750 Label L_Zero = cc.newLabel();
3751 Label L_Add = cc.newLabel();
3752 Label L_Sub = cc.newLabel();
3753 Label L_Mul = cc.newLabel();
3754 Label L_Div = cc.newLabel();
3755
3756 cc.setArg(0, op);
3757 cc.setArg(1, a);
3758 cc.setArg(2, b);
3759
3760 cc.cmp(op, 0);
3761 cc.jz(L_Add);
3762
3763 cc.cmp(op, 1);
3764 cc.jz(L_Sub);
3765
3766 cc.cmp(op, 2);
3767 cc.jz(L_Mul);
3768
3769 cc.cmp(op, 3);
3770 cc.jz(L_Div);
3771
3772 cc.bind(L_Zero);
3773 cc.xor_(a, a);
3774 cc.ret(a);
3775
3776 cc.bind(L_Add);
3777 cc.add(a, b);
3778 cc.ret(a);
3779
3780 cc.bind(L_Sub);
3781 cc.sub(a, b);
3782 cc.ret(a);
3783
3784 cc.bind(L_Mul);
3785 cc.imul(a, b);
3786 cc.ret(a);
3787
3788 cc.bind(L_Div);
3789 cc.cmp(b, 0);
3790 cc.jz(L_Zero);
3791
3792 x86::Gp zero = cc.newInt32("zero");
3793 cc.xor_(zero, zero);
3794 cc.idiv(zero, a, b);
3795 cc.ret(a);
3796
3797 cc.endFunc();
3798 }
3799
3800 virtual bool run(void* _func, String& result, String& expect) {
3801 typedef int (*Func)(int, int, int);
3802
3803 Func func = ptr_as_func<Func>(_func);
3804
3805 int a = 44;
3806 int b = 3;
3807
3808 int r0 = func(0, a, b);
3809 int r1 = func(1, a, b);
3810 int r2 = func(2, a, b);
3811 int r3 = func(3, a, b);
3812 int e0 = a + b;
3813 int e1 = a - b;
3814 int e2 = a * b;
3815 int e3 = a / b;
3816
3817 result.assignFormat("ret={%d %d %d %d}", r0, r1, r2, r3);
3818 expect.assignFormat("ret={%d %d %d %d}", e0, e1, e2, e3);
3819
3820 return result.eq(expect);
3821 }
3822 };
3823
3824 // ============================================================================
3825 // [X86Test_MiscMultiFunc]
3826 // ============================================================================
3827
3828 class X86Test_MiscMultiFunc : public X86Test {
3829 public:
3830 X86Test_MiscMultiFunc() : X86Test("MiscMultiFunc") {}
3831
3832 static void add(X86TestApp& app) {
3833 app.add(new X86Test_MiscMultiFunc());
3834 }
3835
3836 virtual void compile(x86::Compiler& cc) {
3837 FuncNode* f1 = cc.newFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
3838 FuncNode* f2 = cc.newFunc(FuncSignatureT<int, int, int>(CallConv::kIdHost));
3839
3840 {
3841 x86::Gp a = cc.newInt32("a");
3842 x86::Gp b = cc.newInt32("b");
3843
3844 cc.addFunc(f1);
3845 cc.setArg(0, a);
3846 cc.setArg(1, b);
3847
3848 FuncCallNode* call = cc.call(f2->label(), FuncSignatureT<int, int, int>(CallConv::kIdHost));
3849 call->setArg(0, a);
3850 call->setArg(1, b);
3851 call->setRet(0, a);
3852
3853 cc.ret(a);
3854 cc.endFunc();
3855 }
3856
3857 {
3858 x86::Gp a = cc.newInt32("a");
3859 x86::Gp b = cc.newInt32("b");
3860
3861 cc.addFunc(f2);
3862 cc.setArg(0, a);
3863 cc.setArg(1, b);
3864
3865 cc.add(a, b);
3866 cc.ret(a);
3867 cc.endFunc();
3868 }
3869 }
3870
3871 virtual bool run(void* _func, String& result, String& expect) {
3872 typedef int (*Func)(int, int);
3873
3874 Func func = ptr_as_func<Func>(_func);
3875
3876 int resultRet = func(56, 22);
3877 int expectRet = 56 + 22;
3878
3879 result.assignFormat("ret=%d", resultRet);
3880 expect.assignFormat("ret=%d", expectRet);
3881
3882 return result.eq(expect);
3883 }
3884 };
3885
3886 // ============================================================================
3887 // [X86Test_MiscUnfollow]
3888 // ============================================================================
3889
3890 // Global (I didn't find a better way to test this).
3891 static jmp_buf globalJmpBuf;
3892
3893 class X86Test_MiscUnfollow : public X86Test {
3894 public:
3895 X86Test_MiscUnfollow() : X86Test("MiscUnfollow") {}
3896
3897 static void add(X86TestApp& app) {
3898 app.add(new X86Test_MiscUnfollow());
3899 }
3900
3901 virtual void compile(x86::Compiler& cc) {
3902 // NOTE: Fastcall calling convention is the most appropriate here, as all
3903 // arguments will be passed by registers and there won't be any stack
3904 // misalignment when we call the `handler()`. This was failing on OSX
3905 // when targeting 32-bit.
3906 cc.addFunc(FuncSignatureT<int, int, void*>(CallConv::kIdHostFastCall));
3907
3908 x86::Gp a = cc.newInt32("a");
3909 x86::Gp b = cc.newIntPtr("b");
3910 Label tramp = cc.newLabel();
3911
3912 cc.setArg(0, a);
3913 cc.setArg(1, b);
3914
3915 cc.cmp(a, 0);
3916 cc.jz(tramp);
3917
3918 cc.ret(a);
3919
3920 cc.bind(tramp);
3921 cc.unfollow().jmp(b);
3922
3923 cc.endFunc();
3924 }
3925
3926 virtual bool run(void* _func, String& result, String& expect) {
3927 typedef int (ASMJIT_FASTCALL *Func)(int, void*);
3928
3929 Func func = ptr_as_func<Func>(_func);
3930
3931 int resultRet = 0;
3932 int expectRet = 1;
3933
3934 if (!setjmp(globalJmpBuf))
3935 resultRet = func(0, (void*)handler);
3936 else
3937 resultRet = 1;
3938
3939 result.assignFormat("ret={%d}", resultRet);
3940 expect.assignFormat("ret={%d}", expectRet);
3941
3942 return resultRet == expectRet;
3943 }
3944
3945 static void ASMJIT_FASTCALL handler() { longjmp(globalJmpBuf, 1); }
3946 };
3947
3948 // ============================================================================
3949 // [Main]
3950 // ============================================================================
3951
3952 int main(int argc, char* argv[]) {
3953 X86TestApp app;
3954
3955 app.handleArgs(argc, argv);
3956 app.showInfo();
3957
3958 // Base tests.
3959 app.addT<X86Test_NoCode>();
3960 app.addT<X86Test_NoAlign>();
3961 app.addT<X86Test_AlignBase>();
3962
3963 // Jump tests.
3964 app.addT<X86Test_JumpMerge>();
3965 app.addT<X86Test_JumpCross>();
3966 app.addT<X86Test_JumpMany>();
3967 app.addT<X86Test_JumpUnreachable1>();
3968 app.addT<X86Test_JumpUnreachable2>();
3969
3970 // Alloc tests.
3971 app.addT<X86Test_AllocBase>();
3972 app.addT<X86Test_AllocMany1>();
3973 app.addT<X86Test_AllocMany2>();
3974 app.addT<X86Test_AllocImul1>();
3975 app.addT<X86Test_AllocImul2>();
3976 app.addT<X86Test_AllocIdiv1>();
3977 app.addT<X86Test_AllocSetz>();
3978 app.addT<X86Test_AllocShlRor>();
3979 app.addT<X86Test_AllocGpbLo1>();
3980 app.addT<X86Test_AllocGpbLo2>();
3981 app.addT<X86Test_AllocRepMovsb>();
3982 app.addT<X86Test_AllocIfElse1>();
3983 app.addT<X86Test_AllocIfElse2>();
3984 app.addT<X86Test_AllocIfElse3>();
3985 app.addT<X86Test_AllocIfElse4>();
3986 app.addT<X86Test_AllocInt8>();
3987 app.addT<X86Test_AllocUnhandledArg>();
3988 app.addT<X86Test_AllocArgsIntPtr>();
3989 app.addT<X86Test_AllocArgsFloat>();
3990 app.addT<X86Test_AllocArgsDouble>();
3991 app.addT<X86Test_AllocRetFloat1>();
3992 app.addT<X86Test_AllocRetFloat2>();
3993 app.addT<X86Test_AllocRetDouble1>();
3994 app.addT<X86Test_AllocRetDouble2>();
3995 app.addT<X86Test_AllocStack>();
3996 app.addT<X86Test_AllocMemcpy>();
3997 app.addT<X86Test_AllocExtraBlock>();
3998 app.addT<X86Test_AllocAlphaBlend>();
3999
4000 // Function call tests.
4001 app.addT<X86Test_FuncCallBase1>();
4002 app.addT<X86Test_FuncCallBase2>();
4003 app.addT<X86Test_FuncCallStd>();
4004 app.addT<X86Test_FuncCallFast>();
4005 app.addT<X86Test_FuncCallLight>();
4006 app.addT<X86Test_FuncCallManyArgs>();
4007 app.addT<X86Test_FuncCallDuplicateArgs>();
4008 app.addT<X86Test_FuncCallImmArgs>();
4009 app.addT<X86Test_FuncCallPtrArgs>();
4010 app.addT<X86Test_FuncCallRefArgs>();
4011 app.addT<X86Test_FuncCallFloatAsXmmRet>();
4012 app.addT<X86Test_FuncCallDoubleAsXmmRet>();
4013 app.addT<X86Test_FuncCallConditional>();
4014 app.addT<X86Test_FuncCallMultiple>();
4015 app.addT<X86Test_FuncCallRecursive>();
4016 app.addT<X86Test_FuncCallVarArg1>();
4017 app.addT<X86Test_FuncCallVarArg2>();
4018 app.addT<X86Test_FuncCallMisc1>();
4019 app.addT<X86Test_FuncCallMisc2>();
4020 app.addT<X86Test_FuncCallMisc3>();
4021 app.addT<X86Test_FuncCallMisc4>();
4022 app.addT<X86Test_FuncCallMisc5>();
4023
4024 // Miscellaneous tests.
4025 app.addT<X86Test_MiscLocalConstPool>();
4026 app.addT<X86Test_MiscGlobalConstPool>();
4027 app.addT<X86Test_MiscMultiRet>();
4028 app.addT<X86Test_MiscMultiFunc>();
4029 app.addT<X86Test_MiscUnfollow>();
4030
4031 return app.run();
4032 }
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 // This is a working example that demonstrates how multiple sections can be
7 // used in a JIT-based code generator. It shows also the necessary tooling
8 // that is expected to be done by the user when the feature is used. It's
9 // important to handle the following cases:
10 //
11 // - Assign offsets to sections when the code generation is finished.
12 // - Tell the CodeHolder to resolve unresolved links and check whether
13 // all links were resolved.
14 // - Relocate the code
15 // - Copy the code to the location you want.
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20
21 #include "./asmjit.h"
22
23 using namespace asmjit;
24
25 // The generated function is very simple, it only accesses the built-in data
26 // (from .data section) at the index as provided by its first argument. This
27 // data is inlined into the resulting function so we can use it this array
28 // for verification that the function returns correct values.
29 static const uint8_t dataArray[] = { 2, 9, 4, 7, 1, 3, 8, 5, 6, 0 };
30
31 static void fail(const char* message, Error err) {
32 printf("%s: %s\n", message, DebugUtils::errorAsString(err));
33 exit(1);
34 }
35
36 int main(int argc, char* argv[]) {
37 ASMJIT_UNUSED(argc);
38 ASMJIT_UNUSED(argv);
39
40 CodeInfo codeInfo(ArchInfo::kIdHost);
41 JitAllocator allocator;
42
43 FileLogger logger(stdout);
44 logger.setIndentation(FormatOptions::kIndentationCode, 2);
45
46 CodeHolder code;
47 code.init(codeInfo);
48 code.setLogger(&logger);
49
50 Section* dataSection;
51 Error err = code.newSection(&dataSection, ".data", SIZE_MAX, 0, 8);
52
53 if (err) {
54 fail("Failed to create a .data section", err);
55 }
56 else {
57 printf("Generating code:\n");
58 x86::Assembler a(&code);
59 x86::Gp idx = a.zax();
60 x86::Gp addr = a.zcx();
61
62 Label data = a.newLabel();
63
64 FuncDetail func;
65 func.init(FuncSignatureT<size_t, size_t>(CallConv::kIdHost));
66
67 FuncFrame frame;
68 frame.init(func);
69 frame.addDirtyRegs(idx, addr);
70
71 FuncArgsAssignment args(&func);
72 args.assignAll(idx);
73 args.updateFuncFrame(frame);
74 frame.finalize();
75
76 a.emitProlog(frame);
77 a.emitArgsAssignment(frame, args);
78
79 a.lea(addr, x86::ptr(data));
80 a.movzx(idx, x86::byte_ptr(addr, idx));
81
82 a.emitEpilog(frame);
83
84 a.section(dataSection);
85 a.bind(data);
86
87 a.embed(dataArray, sizeof(dataArray));
88 }
89
90 // Manually change he offsets of each section, start at 0. This code is very
91 // similar to what `CodeHolder::flatten()` does, however, it's shown here
92 // how to do it explicitly.
93 printf("\nCalculating section offsets:\n");
94 uint64_t offset = 0;
95 for (Section* section : code.sections()) {
96 offset = Support::alignUp(offset, section->alignment());
97 section->setOffset(offset);
98 offset += section->realSize();
99
100 printf(" [0x%08X %s] {Id=%u Size=%u}\n",
101 uint32_t(section->offset()),
102 section->name(),
103 section->id(),
104 uint32_t(section->realSize()));
105 }
106 size_t codeSize = size_t(offset);
107 printf(" Final code size: %zu\n", codeSize);
108
109 // Resolve cross-section links (if any). On 32-bit X86 this is not necessary
110 // as this is handled through relocations as the addressing is different.
111 if (code.hasUnresolvedLinks()) {
112 printf("\nResolving cross-section links:\n");
113 printf(" Before 'resolveUnresolvedLinks()': %zu\n", code.unresolvedLinkCount());
114
115 err = code.resolveUnresolvedLinks();
116 if (err)
117 fail("Failed to resolve cross-section links", err);
118 printf(" After 'resolveUnresolvedLinks()': %zu\n", code.unresolvedLinkCount());
119 }
120
121 // Allocate memory for the function and relocate it there.
122 void* roPtr;
123 void* rwPtr;
124 err = allocator.alloc(&roPtr, &rwPtr, codeSize);
125 if (err)
126 fail("Failed to allocate executable memory", err);
127
128 // Relocate to the base-address of the allocated memory.
129 code.relocateToBase(uint64_t(uintptr_t(roPtr)));
130
131 // Copy the flattened code into `mem.rw`. There are two ways. You can either copy
132 // everything manually by iterating over all sections or use `copyFlattenedData`.
133 // This code is similar to what `copyFlattenedData(p, codeSize, 0)` would do:
134 for (Section* section : code.sections())
135 memcpy(static_cast<uint8_t*>(rwPtr) + size_t(section->offset()), section->data(), section->bufferSize());
136
137 // Execute the function and test whether it works.
138 typedef size_t (*Func)(size_t idx);
139 Func fn = (Func)roPtr;
140
141 printf("\nTesting the generated function:\n");
142 if (fn(0) != dataArray[0] ||
143 fn(3) != dataArray[3] ||
144 fn(6) != dataArray[6] ||
145 fn(9) != dataArray[9] ) {
146 printf(" [FAILED] The generated function returned incorrect result(s)\n");
147 return 1;
148 }
149 else {
150 printf(" [PASSED] The generated function returned expected results\n");
151 }
152
153 allocator.release((void*)fn);
154 return 0;
155 }
0 // [Broken]
1 // Lightweight Unit Testing for C++.
2 //
3 // [License]
4 // Public Domain (Unlicense)
5
6 #include "./broken.h"
7 #include <stdarg.h>
8
9 // ============================================================================
10 // [Broken - Global]
11 // ============================================================================
12
13 // Zero initialized globals.
14 struct BrokenGlobal {
15 // Application arguments.
16 int _argc;
17 const char** _argv;
18
19 // Output file.
20 FILE* _file;
21
22 // Unit tests.
23 BrokenAPI::Unit* _unitList;
24 BrokenAPI::Unit* _unitRunning;
25
26 bool hasArg(const char* a) const noexcept {
27 for (int i = 1; i < _argc; i++)
28 if (strcmp(_argv[i], a) == 0)
29 return true;
30 return false;
31 }
32
33 inline FILE* file() const noexcept { return _file ? _file : stdout; }
34 };
35 static BrokenGlobal _brokenGlobal;
36
37 // ============================================================================
38 // [Broken - API]
39 // ============================================================================
40
41 // Get whether the string `a` starts with string `b`.
42 static bool BrokenAPI_startsWith(const char* a, const char* b) noexcept {
43 for (size_t i = 0; ; i++) {
44 if (b[i] == '\0') return true;
45 if (a[i] != b[i]) return false;
46 }
47 }
48
49 //! Compares names and priority of two unit tests.
50 static int BrokenAPI_compareUnits(const BrokenAPI::Unit* a, const BrokenAPI::Unit* b) noexcept {
51 if (a->priority == b->priority)
52 return strcmp(a->name, b->name);
53 else
54 return a->priority > b->priority ? 1 : -1;
55 }
56
57 // Get whether the strings `a` and `b` are equal, ignoring case and treating
58 // `-` as `_`.
59 static bool BrokenAPI_matchesFilter(const char* a, const char* b) noexcept {
60 for (size_t i = 0; ; i++) {
61 int ca = (unsigned char)a[i];
62 int cb = (unsigned char)b[i];
63
64 // If filter is defined as wildcard the rest automatically matches.
65 if (cb == '*')
66 return true;
67
68 if (ca == '-') ca = '_';
69 if (cb == '-') cb = '_';
70
71 if (ca >= 'A' && ca <= 'Z') ca += 'a' - 'A';
72 if (cb >= 'A' && cb <= 'Z') cb += 'a' - 'A';
73
74 if (ca != cb)
75 return false;
76
77 if (ca == '\0')
78 return true;
79 }
80 }
81
82 static bool BrokenAPI_canRun(BrokenAPI::Unit* unit) noexcept {
83 BrokenGlobal& global = _brokenGlobal;
84
85 int i, argc = global._argc;
86 const char** argv = global._argv;
87
88 const char* unitName = unit->name;
89 bool hasFilter = false;
90
91 for (i = 1; i < argc; i++) {
92 const char* arg = argv[i];
93
94 if (BrokenAPI_startsWith(arg, "--run-") && strcmp(arg, "--run-all") != 0) {
95 hasFilter = true;
96
97 if (BrokenAPI_matchesFilter(unitName, arg + 6))
98 return true;
99 }
100 }
101
102 // If no filter has been specified the default is to run.
103 return !hasFilter;
104 }
105
106 static void BrokenAPI_runUnit(BrokenAPI::Unit* unit) noexcept {
107 BrokenAPI::info("Running %s", unit->name);
108
109 _brokenGlobal._unitRunning = unit;
110 unit->entry();
111 _brokenGlobal._unitRunning = NULL;
112 }
113
114 static void BrokenAPI_runAll() noexcept {
115 BrokenAPI::Unit* unit = _brokenGlobal._unitList;
116
117 bool hasUnits = unit != NULL;
118 size_t count = 0;
119 int currentPriority = 0;
120
121 while (unit != NULL) {
122 if (BrokenAPI_canRun(unit)) {
123 if (currentPriority != unit->priority) {
124 if (count)
125 INFO("");
126 INFO("[[Priority=%d]]", unit->priority);
127 }
128
129 currentPriority = unit->priority;
130 BrokenAPI_runUnit(unit);
131 count++;
132 }
133 unit = unit->next;
134 }
135
136 if (count) {
137 INFO("\nSuccess:");
138 INFO(" All tests passed!");
139 }
140 else {
141 INFO("\nWarning:");
142 INFO(" No units %s!", hasUnits ? "matched the filter" : "defined");
143 }
144 }
145
146 static void BrokenAPI_listAll() noexcept {
147 BrokenAPI::Unit* unit = _brokenGlobal._unitList;
148
149 if (unit != NULL) {
150 INFO("Units:");
151 do {
152 INFO(" %s [priority=%d]", unit->name, unit->priority);
153 unit = unit->next;
154 } while (unit != NULL);
155 }
156 else {
157 INFO("Warning:");
158 INFO(" No units defined!");
159 }
160 }
161
162 bool BrokenAPI::hasArg(const char* name) noexcept {
163 return _brokenGlobal.hasArg(name);
164 }
165
166 void BrokenAPI::add(Unit* unit) noexcept {
167 Unit** pPrev = &_brokenGlobal._unitList;
168 Unit* current = *pPrev;
169
170 // C++ static initialization doesn't guarantee anything. We sort all units by
171 // name so the execution will always happen in deterministic order.
172 while (current != NULL) {
173 if (BrokenAPI_compareUnits(current, unit) >= 0)
174 break;
175
176 pPrev = &current->next;
177 current = *pPrev;
178 }
179
180 *pPrev = unit;
181 unit->next = current;
182 }
183
184 void BrokenAPI::setOutputFile(FILE* file) noexcept {
185 BrokenGlobal& global = _brokenGlobal;
186
187 global._file = file;
188 }
189
190 int BrokenAPI::run(int argc, const char* argv[], Entry onBeforeRun, Entry onAfterRun) {
191 BrokenGlobal& global = _brokenGlobal;
192
193 global._argc = argc;
194 global._argv = argv;
195
196 if (global.hasArg("--help")) {
197 INFO("Options:");
198 INFO(" --help - print this usage");
199 INFO(" --list - list all tests");
200 INFO(" --run-... - run a test(s), trailing wildcards supported");
201 INFO(" --run-all - run all tests (default)");
202 return 0;
203 }
204
205 if (global.hasArg("--list")) {
206 BrokenAPI_listAll();
207 return 0;
208 }
209
210 if (onBeforeRun)
211 onBeforeRun();
212
213 // We don't care about filters here, it's implemented by `runAll`.
214 BrokenAPI_runAll();
215
216 if (onAfterRun)
217 onAfterRun();
218
219 return 0;
220 }
221
222 static void BrokenAPI_printMessage(const char* prefix, const char* fmt, va_list ap) noexcept {
223 BrokenGlobal& global = _brokenGlobal;
224 FILE* dst = global.file();
225
226 if (!fmt || fmt[0] == '\0') {
227 fprintf(dst, "\n");
228 }
229 else {
230 // This looks scary, but we really want to use only a single call to vfprintf()
231 // in multithreaded code. So we change the format a bit if necessary.
232 enum : unsigned { kBufferSize = 512 };
233 char staticBuffer[512];
234
235 size_t fmtSize = strlen(fmt);
236 size_t prefixSize = strlen(prefix);
237
238 char* fmtBuf = staticBuffer;
239 if (fmtSize > kBufferSize - 2 - prefixSize)
240 fmtBuf = static_cast<char*>(malloc(fmtSize + prefixSize + 2));
241
242 if (!fmtBuf) {
243 fprintf(dst, "%sCannot allocate buffer for vfprintf()\n", prefix);
244 }
245 else {
246 memcpy(fmtBuf, prefix, prefixSize);
247 memcpy(fmtBuf + prefixSize, fmt, fmtSize);
248
249 fmtSize += prefixSize;
250 if (fmtBuf[fmtSize - 1] != '\n')
251 fmtBuf[fmtSize++] = '\n';
252 fmtBuf[fmtSize] = '\0';
253
254 vfprintf(dst, fmtBuf, ap);
255
256 if (fmtBuf != staticBuffer)
257 free(fmtBuf);
258 }
259 }
260
261 fflush(dst);
262 }
263
264 void BrokenAPI::info(const char* fmt, ...) noexcept {
265 BrokenGlobal& global = _brokenGlobal;
266
267 va_list ap;
268 va_start(ap, fmt);
269 BrokenAPI_printMessage(global._unitRunning ? " " : "", fmt, ap);
270 va_end(ap);
271 }
272
273 void BrokenAPI::fail(const char* file, int line, const char* expression, const char* fmt, ...) noexcept {
274 BrokenGlobal& global = _brokenGlobal;
275 FILE* dst = global.file();
276
277 fprintf(dst, " FAILED: %s\n", expression);
278
279 if (fmt) {
280 va_list ap;
281 va_start(ap, fmt);
282 BrokenAPI_printMessage(" REASON: ", fmt, ap);
283 va_end(ap);
284 }
285
286 fprintf(dst, " SOURCE: %s (Line: %d)\n", file, line);
287 fflush(dst);
288
289 exit(1);
290 }
0 // [Broken]
1 // Lightweight Unit Testing for C++.
2 //
3 // [License]
4 // Public Domain (Unlicense) or Zlib.
5
6 #ifndef BROKEN_INTERNAL_H
7 #define BROKEN_INTERNAL_H
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <utility>
13
14 // Hide everything when using Doxygen. Ideally this can be protected by a macro,
15 // but there is not globally and widely used one across multiple projects.
16
17 //! \cond
18
19 // ============================================================================
20 // [Broken - API]
21 // ============================================================================
22
23 struct BrokenAPI {
24 //! Entry point of a unit test defined by `UNIT` macro.
25 typedef void (*Entry)(void);
26
27 enum Flags : unsigned {
28 kFlagFinished = 0x1
29 };
30
31 //! Test defined by `UNIT` macro.
32 struct Unit {
33 Entry entry;
34 const char* name;
35 int priority;
36 unsigned flags;
37 Unit* next;
38 };
39
40 //! Automatic unit registration by using static initialization.
41 struct AutoUnit : Unit {
42 inline AutoUnit(Entry entry_, const char* name_, int priority_ = 0, int dummy_ = 0) noexcept {
43 // Not used, only to trick `UNIT()` macro.
44 (void)dummy_;
45
46 this->entry = entry_;
47 this->name = name_;
48 this->priority = priority_;
49 this->flags = 0;
50 this->next = nullptr;
51 BrokenAPI::add(this);
52 }
53 };
54
55 static bool hasArg(const char* name) noexcept;
56
57 //! Register a new unit test (called automatically by `AutoUnit` and `UNIT`).
58 static void add(Unit* unit) noexcept;
59
60 //! Set output file to a `file`.
61 static void setOutputFile(FILE* file) noexcept;
62
63 //! Initialize `Broken` framework.
64 //!
65 //! Returns `true` if `run()` should be called.
66 static int run(int argc, const char* argv[], Entry onBeforeRun = nullptr, Entry onAfterRun = nullptr);
67
68 //! Log message, adds automatically new line if not present.
69 static void info(const char* fmt, ...) noexcept;
70
71 //! Called on `EXPECT()` failure.
72 static void fail(const char* file, int line, const char* expression, const char* fmt, ...) noexcept;
73
74 //! Used internally by `EXPECT` macro.
75 template<typename T>
76 static inline void expect(const char* file, int line, const char* expression, const T& result) noexcept {
77 if (!result)
78 fail(file, line, expression, nullptr);
79 }
80
81 //! Used internally by `EXPECT` macro.
82 template<typename T, typename... Args>
83 static inline void expect(const char* file, int line, const char* expression, const T& result, const char* fmt, Args&&... args) noexcept {
84 if (!result)
85 fail(file, line, expression, fmt, std::forward<Args>(args)...);
86 }
87 };
88
89 // ============================================================================
90 // [Broken - Macros]
91 // ============================================================================
92
93 //! Internal macro used by `UNIT()`.
94 #define BROKEN_UNIT_INTERNAL(NAME, PRIORITY) \
95 static void unit_##NAME##_entry(void); \
96 static ::BrokenAPI::AutoUnit unit_##NAME##_autoinit(unit_##NAME##_entry, #NAME, PRIORITY); \
97 static void unit_##NAME##_entry(void)
98
99 //! Stringifies the expression used by EXPECT().
100 #define BROKEN_STRINFIGY_EXPRESSION_INTERNAL(EXP, ...) #EXP
101
102 //! \def UNIT(NAME [, PRIORITY])
103 //!
104 //! Define a unit test with an optional priority.
105 //!
106 //! `NAME` can only contain ASCII characters, numbers and underscore. It has
107 //! the same rules as identifiers in C and C++.
108 //!
109 //! `PRIORITY` specifies the order in which unit tests are run. Lesses value
110 //! increases the priority. At the moment all units are first sorted by
111 //! priority and then by name - this makes the run always deterministic.
112 #define UNIT(NAME, ...) BROKEN_UNIT_INTERNAL(NAME, __VA_ARGS__ + 0)
113
114 //! #define INFO(FORMAT [, ...])
115 //!
116 //! Informative message printed to `stdout`.
117 #define INFO(...) ::BrokenAPI::info(__VA_ARGS__)
118
119 //! #define INFO(EXP [, FORMAT [, ...]])
120 //!
121 //! Expect `EXP` to be true or evaluates to true, fail otherwise.
122 #define EXPECT(...) ::BrokenAPI::expect(__FILE__, __LINE__, BROKEN_STRINFIGY_EXPRESSION_INTERNAL(__VA_ARGS__), __VA_ARGS__)
123
124 //! \endcond
125
126 #endif // BROKEN_INTERNAL_H
0 #!/bin/sh
1
2 CURRENT_DIR=`pwd`
3 BUILD_DIR="build"
4 BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
5
6 echo "** Configuring ${BUILD_DIR}_dbg [Debug Build] **"
7 mkdir -p ../${BUILD_DIR}_dbg
8 cd ../${BUILD_DIR}_dbg
9 eval cmake .. -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=1
10 cd ${CURRENT_DIR}
11
12 echo "** Configuring ${BUILD_DIR}_rel [Release Build] **"
13 mkdir -p ../${BUILD_DIR}_rel
14 cd ../${BUILD_DIR}_rel
15 eval cmake .. -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS}
16 cd ${CURRENT_DIR}
0 #!/bin/sh
1
2 CURRENT_DIR=`pwd`
3 BUILD_DIR="build"
4 BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
5
6 echo "** Configuring ${BUILD_DIR}_dbg [Debug Build] **"
7 mkdir -p ../${BUILD_DIR}_dbg
8 cd ../${BUILD_DIR}_dbg
9 eval cmake .. -G"Ninja" -DCMAKE_BUILD_TYPE=Debug ${BUILD_OPTIONS} -DASMJIT_SANITIZE=1
10 cd ${CURRENT_DIR}
11
12 echo "** Configuring ${BUILD_DIR}_rel [Release Build] **"
13 mkdir -p ../${BUILD_DIR}_rel
14 cd ../${BUILD_DIR}_rel
15 eval cmake .. -G"Ninja" -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS}
16 cd ${CURRENT_DIR}
0 #!/bin/sh
1
2 CURRENT_DIR=`pwd`
3 BUILD_DIR="build"
4 BUILD_OPTIONS="-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DASMJIT_TEST=1"
5
6 echo "** Configuring '${BUILD_DIR}_rel_asan' [Sanitize=Address] **"
7 mkdir -p ../${BUILD_DIR}_rel_asan
8 cd ../${BUILD_DIR}_rel_asan
9 eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS} -DASMJIT_SANITIZE=address
10 cd ${CURRENT_DIR}
11
12 echo "** Configuring '${BUILD_DIR}_rel_ubsan' [Sanitize=Undefined] **"
13 mkdir -p ../${BUILD_DIR}_rel_ubsan
14 cd ../${BUILD_DIR}_rel_ubsan
15 eval cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release ${BUILD_OPTIONS} -DASMJIT_SANITIZE=undefined
16 cd ${CURRENT_DIR}
0 @echo off
1
2 set CURRENT_DIR=%CD%
3 set BUILD_DIR="build_vs_x64"
4
5 mkdir ..\%BUILD_DIR%
6 cd ..\%BUILD_DIR%
7 cmake .. -G"Visual Studio 16" -A x64 -DASMJIT_TEST=1
8 cd %CURRENT_DIR%
0 @echo off
1
2 set CURRENT_DIR=%CD%
3 set BUILD_DIR="build_vs_x86"
4
5 mkdir ..\%BUILD_DIR%
6 cd ..\%BUILD_DIR%
7 cmake .. -G"Visual Studio 16" -A Win32 -DASMJIT_TEST=1
8 cd %CURRENT_DIR%
0 #!/bin/sh
1
2 BUILD_DIR="build_xcode"
3 CURRENT_DIR=`pwd`
4
5 mkdir -p ../${BUILD_DIR}
6 cd ../${BUILD_DIR}
7 cmake .. -G"Xcode" -DASMJIT_TEST=1
8 cd ${CURRENT_DIR}
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // Zlib - See LICENSE.md file in the package.
5
6 // ============================================================================
7 // tablegen-x86.js
8 //
9 // The purpose of this script is to fetch all instructions' names into a single
10 // string and to optimize common patterns that appear in instruction data. It
11 // prevents relocation of small strings (instruction names) that has to be done
12 // by a linker to make all pointers the binary application/library uses valid.
13 // This approach decreases the final size of AsmJit binary and relocation data.
14 //
15 // NOTE: This script relies on 'asmdb' package. Either install it by using
16 // node.js package manager (npm) or by copying/symlinking the whole asmdb
17 // directory as [asmjit]/tools/asmdb.
18 // ============================================================================
19
20 "use strict";
21
22 const core = require("./tablegen.js");
23 const asmdb = core.asmdb;
24 const kIndent = core.kIndent;
25
26 const Lang = core.Lang;
27 const CxxUtils = core.CxxUtils;
28 const MapUtils = core.MapUtils;
29 const ArrayUtils = core.ArrayUtils;
30 const StringUtils = core.StringUtils;
31 const IndexedArray = core.IndexedArray;
32
33 const hasOwn = Object.prototype.hasOwnProperty;
34 const disclaimer = StringUtils.disclaimer;
35
36 const FAIL = core.FAIL;
37 const DEBUG = core.DEBUG;
38
39 const decToHex = StringUtils.decToHex;
40
41 // ============================================================================
42 // [tablegen.x86.x86isa]
43 // ============================================================================
44
45 // Create the X86 database and add some special cases recognized by AsmJit.
46 const x86isa = new asmdb.x86.ISA({
47 instructions: [
48 // Imul in [reg, imm] form is encoded as [reg, reg, imm].
49 ["imul", "r16, ib" , "RMI" , "66 6B /r ib" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
50 ["imul", "r32, ib" , "RMI" , "6B /r ib" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
51 ["imul", "r64, ib" , "RMI" , "REX.W 6B /r ib", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"],
52 ["imul", "r16, iw" , "RMI" , "66 69 /r iw" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
53 ["imul", "r32, id" , "RMI" , "69 /r id" , "ANY OF=W SF=W ZF=U AF=U PF=U CF=W"],
54 ["imul", "r64, id" , "RMI" , "REX.W 69 /r id", "X64 OF=W SF=W ZF=U AF=U PF=U CF=W"]
55 ]
56 });
57
58 // Remapped instructions contain mapping between instructions that AsmJit expects
59 // and instructions provided by asmdb. In general, AsmJit uses string instructions
60 // (like cmps, movs, etc...) without the suffix, so we just remap these and keep
61 // all others.
62 const RemappedInsts = {
63 __proto__: null,
64
65 "cmpsd": { names: ["cmpsd"] , rep: false },
66 "movsd": { names: ["movsd"] , rep: false },
67 "cmps" : { names: ["cmpsb", "cmpsw", "cmpsd", "cmpsq"], rep: true },
68 "movs" : { names: ["movsb", "movsw", "movsd", "movsq"], rep: true },
69 "lods" : { names: ["lodsb", "lodsw", "lodsd", "lodsq"], rep: null },
70 "scas" : { names: ["scasb", "scasw", "scasd", "scasq"], rep: null },
71 "stos" : { names: ["stosb", "stosw", "stosd", "stosq"], rep: null },
72 "ins" : { names: ["insb" , "insw" , "insd" ] , rep: null },
73 "outs" : { names: ["outsb", "outsw", "outsd"] , rep: null }
74 };
75
76 // ============================================================================
77 // [tablegen.x86.Filter]
78 // ============================================================================
79
80 class Filter {
81 static unique(instArray) {
82 const result = [];
83 const known = {};
84
85 for (var i = 0; i < instArray.length; i++) {
86 const inst = instArray[i];
87 if (inst.attributes.AltForm)
88 continue;
89
90 const s = inst.operands.map((op) => { return op.isImm() ? "imm" : op.toString(); }).join(", ");
91 if (known[s] === true)
92 continue;
93
94 known[s] = true;
95 result.push(inst);
96 }
97
98 return result;
99 }
100
101 static noAltForm(instArray) {
102 const result = [];
103 for (var i = 0; i < instArray.length; i++) {
104 const inst = instArray[i];
105 if (inst.attributes.AltForm)
106 continue;
107 result.push(inst);
108 }
109 return result;
110 }
111
112 static byArch(instArray, arch) {
113 return instArray.filter(function(inst) {
114 return inst.arch === "ANY" || inst.arch === arch;
115 });
116 }
117 }
118
119 // ============================================================================
120 // [tablegen.x86.GenUtils]
121 // ============================================================================
122
123 class GenUtils {
124 static cpuArchOf(dbInsts) {
125 var anyArch = false;
126 var x86Arch = false;
127 var x64Arch = false;
128
129 for (var i = 0; i < dbInsts.length; i++) {
130 const dbInst = dbInsts[i];
131 if (dbInst.arch === "ANY") anyArch = true;
132 if (dbInst.arch === "X86") x86Arch = true;
133 if (dbInst.arch === "X64") x64Arch = true;
134 }
135
136 return anyArch || (x86Arch && x64Arch) ? "" : x86Arch ? "(X86)" : "(X64)";
137 }
138
139 static cpuFeaturesOf(dbInsts) {
140 return ArrayUtils.sorted(dbInsts.unionCpuFeatures());
141 }
142
143 static flagsOf(dbInsts) {
144 function replace(map, a, b, c) {
145 if (map[a] && map[b]) {
146 delete map[a];
147 delete map[b];
148 map[c] = true;
149 }
150 }
151
152 const f = Object.create(null);
153 var i, j;
154
155 var mib = dbInsts.length > 0 && /^(?:bndldx|bndstx)$/.test(dbInsts[0].name);
156 if (mib) f.Mib = true;
157
158 var mmx = false;
159 var vec = false;
160
161 for (i = 0; i < dbInsts.length; i++) {
162 const dbInst = dbInsts[i];
163 const operands = dbInst.operands;
164
165 if (dbInst.name === "emms")
166 mmx = true;
167
168 if (dbInst.name === "vzeroall" || dbInst.name === "vzeroupper")
169 vec = true;
170
171 for (j = 0; j < operands.length; j++) {
172 const op = operands[j];
173 if (op.reg === "mm")
174 mmx = true;
175 else if (/^(k|xmm|ymm|zmm)$/.test(op.reg)) {
176 vec = true;
177 }
178 }
179 }
180
181 if (mmx) f.Mmx = true;
182 if (vec) f.Vec = true;
183
184 for (i = 0; i < dbInsts.length; i++) {
185 const dbInst = dbInsts[i];
186 const operands = dbInst.operands;
187
188 if (dbInst.attributes.Lock ) f.Lock = true;
189 if (dbInst.attributes.XAcquire ) f.XAcquire = true;
190 if (dbInst.attributes.XRelease ) f.XRelease = true;
191 if (dbInst.attributes.BND ) f.Rep = true;
192 if (dbInst.attributes.REP ) f.Rep = true;
193 if (dbInst.attributes.REPNE ) f.Rep = true;
194 if (dbInst.attributes.RepIgnored) f.RepIgnored = true;
195
196 if (dbInst.fpu) {
197 for (var j = 0; j < operands.length; j++) {
198 const op = operands[j];
199 if (op.memSize === 16) f.FpuM16 = true;
200 if (op.memSize === 32) f.FpuM32 = true;
201 if (op.memSize === 64) f.FpuM64 = true;
202 if (op.memSize === 80) f.FpuM80 = true;
203 }
204 }
205
206 if (dbInst.vsibReg)
207 f.Vsib = true;
208
209 if (dbInst.prefix === "VEX" || dbInst.prefix === "XOP")
210 f.Vex = true;
211
212 if (dbInst.prefix === "EVEX") {
213 f.Evex = true;
214
215 if (dbInst.kmask) f.Avx512K = true;
216 if (dbInst.zmask) f.Avx512Z = true;
217
218 if (dbInst.er) f.Avx512ER = true;
219 if (dbInst.sae) f.Avx512SAE = true;
220
221 if (dbInst.broadcast) f["Avx512B" + String(dbInst.elementSize)] = true;
222 if (dbInst.tupleType === "T1_4X") f.Avx512T4X = true;
223 }
224 }
225
226 replace(f, "Avx512K" , "Avx512Z" , "Avx512KZ");
227 replace(f, "Avx512ER" , "Avx512SAE" , "Avx512ER_SAE");
228 replace(f, "Avx512KZ" , "Avx512SAE" , "Avx512KZ_SAE");
229 replace(f, "Avx512KZ" , "Avx512ER_SAE", "Avx512KZ_ER_SAE");
230 replace(f, "Avx512K" , "Avx512B32" , "Avx512K_B32");
231 replace(f, "Avx512K" , "Avx512B64" , "Avx512K_B64");
232 replace(f, "Avx512KZ" , "Avx512B32" , "Avx512KZ_B32");
233 replace(f, "Avx512KZ" , "Avx512B64" , "Avx512KZ_B64");
234 replace(f, "Avx512KZ_SAE" , "Avx512B32" , "Avx512KZ_SAE_B32");
235 replace(f, "Avx512KZ_SAE" , "Avx512B64" , "Avx512KZ_SAE_B64");
236 replace(f, "Avx512KZ_ER_SAE", "Avx512B32" , "Avx512KZ_ER_SAE_B32");
237 replace(f, "Avx512KZ_ER_SAE", "Avx512B64" , "Avx512KZ_ER_SAE_B64");
238
239 return Object.getOwnPropertyNames(f);
240 }
241
242 static eqOps(aOps, aFrom, bOps, bFrom) {
243 var x = 0;
244 for (;;) {
245 const aIndex = x + aFrom;
246 const bIndex = x + bFrom;
247
248 const aOut = aIndex >= aOps.length;
249 const bOut = bIndex >= bOps.length;
250
251 if (aOut || bOut)
252 return !!(aOut && bOut);
253
254 const aOp = aOps[aIndex];
255 const bOp = bOps[bIndex];
256
257 if (aOp.data !== bOp.data)
258 return false;
259
260 x++;
261 }
262 }
263
264 static singleRegCase(name) {
265 switch (name) {
266 case "xchg" :
267
268 case "and" :
269 case "pand" : case "vpand" : case "vpandd" : case "vpandq" :
270 case "andpd" : case "vandpd" :
271 case "andps" : case "vandps" :
272
273 case "or" :
274 case "por" : case "vpor" : case "vpord" : case "vporq" :
275 case "orpd" : case "vorpd" :
276 case "orps" : case "vorps" :
277
278 case "pminsb" : case "vpminsb": case "pmaxsb" : case "vpmaxsb" :
279 case "pminsw" : case "vpminsw": case "pmaxsw" : case "vpmaxsw" :
280 case "pminsd" : case "vpminsd": case "pmaxsd" : case "vpmaxsd" :
281 case "pminub" : case "vpminub": case "pmaxub" : case "vpmaxub" :
282 case "pminuw" : case "vpminuw": case "pmaxuw" : case "vpmaxuw" :
283 case "pminud" : case "vpminud": case "pmaxud" : case "vpmaxud" :
284 return "RO";
285
286 case "pandn" : case "vpandn" : case "vpandnd" : case "vpandnq" :
287
288 case "xor" :
289 case "pxor" : case "vpxor" : case "vpxord" : case "vpxorq" :
290 case "xorpd" : case "vxorpd" :
291 case "xorps" : case "vxorps" :
292
293 case "sub" :
294 case "sbb" :
295 case "psubb" : case "vpsubb" :
296 case "psubw" : case "vpsubw" :
297 case "psubd" : case "vpsubd" :
298 case "psubq" : case "vpsubq" :
299 case "psubsb" : case "vpsubsb": case "psubusb" : case "vpsubusb" :
300 case "psubsw" : case "vpsubsw": case "psubusw" : case "vpsubusw" :
301
302 case "vpcmpeqb": case "pcmpeqb": case "vpcmpgtb": case "pcmpgtb" :
303 case "vpcmpeqw": case "pcmpeqw": case "vpcmpgtw": case "pcmpgtw" :
304 case "vpcmpeqd": case "pcmpeqd": case "vpcmpgtd": case "pcmpgtd" :
305 case "vpcmpeqq": case "pcmpeqq": case "vpcmpgtq": case "pcmpgtq" :
306
307 case "vpcmpb" : case "vpcmpub":
308 case "vpcmpd" : case "vpcmpud":
309 case "vpcmpw" : case "vpcmpuw":
310 case "vpcmpq" : case "vpcmpuq":
311 return "WO";
312
313 default:
314 return "None";
315 }
316 }
317
318 static fixedRegOf(reg) {
319 switch (reg) {
320 case "es" : return 1;
321 case "cs" : return 2;
322 case "ss" : return 3;
323 case "ds" : return 4;
324 case "fs" : return 5;
325 case "gs" : return 6;
326 case "ah" : return 0;
327 case "ch" : return 1;
328 case "dh" : return 2;
329 case "bh" : return 3;
330 case "al" : case "ax": case "eax": case "rax": case "zax": return 0;
331 case "cl" : case "cx": case "ecx": case "rcx": case "zcx": return 1;
332 case "dl" : case "dx": case "edx": case "rdx": case "zdx": return 2;
333 case "bl" : case "bx": case "ebx": case "rbx": case "zbx": return 3;
334 case "spl" : case "sp": case "esp": case "rsp": case "zsp": return 4;
335 case "bpl" : case "bp": case "ebp": case "rbp": case "zbp": return 5;
336 case "sil" : case "si": case "esi": case "rsi": case "zsi": return 6;
337 case "dil" : case "di": case "edi": case "rdi": case "zdi": return 7;
338 case "st0" : return 0;
339 case "xmm0": return 0;
340 case "ymm0": return 0;
341 case "zmm0": return 0;
342 default:
343 return -1;
344 }
345 }
346
347 static controlType(dbInsts) {
348 if (dbInsts.checkAttribute("Control", "Jump")) return "Jump";
349 if (dbInsts.checkAttribute("Control", "Call")) return "Call";
350 if (dbInsts.checkAttribute("Control", "Branch")) return "Branch";
351 if (dbInsts.checkAttribute("Control", "Return")) return "Return";
352 return "None";
353 }
354 }
355
356 // ============================================================================
357 // [tablegen.x86.X86TableGen]
358 // ============================================================================
359
360 class X86TableGen extends core.TableGen {
361 constructor() {
362 super("X86");
363 }
364
365 // --------------------------------------------------------------------------
366 // [Query]
367 // --------------------------------------------------------------------------
368
369 // Get instructions (dbInsts) having the same name as understood by AsmJit.
370 query(name) {
371 const remapped = RemappedInsts[name];
372 if (!remapped) return x86isa.query(name);
373
374 const dbInsts = x86isa.query(remapped.names);
375 const rep = remapped.rep;
376 if (rep === null) return dbInsts;
377
378 return dbInsts.filter((inst) => {
379 return rep === !!(inst.attributes.REP || inst.attributes.REPNE);
380 });
381 }
382
383 // --------------------------------------------------------------------------
384 // [Parse / Merge]
385 // --------------------------------------------------------------------------
386
387 parse() {
388 const data = this.dataOfFile("src/asmjit/x86/x86instdb.cpp");
389 const re = new RegExp(
390 "INST\\(" +
391 "([A-Za-z0-9_]+)\\s*" + "," + // [01] Instruction.
392 "([^,]+)" + "," + // [02] Encoding.
393 "(.{26}[^,]*)" + "," + // [03] Opcode[0].
394 "(.{26}[^,]*)" + "," + // [04] Opcode[1].
395 // --- autogenerated fields ---
396 "([^\\)]+)" + "," + // [05] MainOpcodeIndex.
397 "([^\\)]+)" + "," + // [06] AltOpcodeIndex.
398 "([^\\)]+)" + "," + // [07] NameIndex.
399 "([^\\)]+)" + "," + // [08] CommonDataIndex.
400 "([^\\)]+)" + "\\)", // [09] OperationDataIndex.
401 "g");
402
403 var m;
404 while ((m = re.exec(data)) !== null) {
405 var enum_ = m[1];
406 var name = enum_ === "None" ? "" : enum_.toLowerCase();
407 var encoding = m[2].trim();
408 var opcode0 = m[3].trim();
409 var opcode1 = m[4].trim();
410
411 const dbInsts = this.query(name);
412 if (name && !dbInsts.length)
413 FAIL(`Instruction '${name}' not found in asmdb`);
414
415 const flags = GenUtils.flagsOf(dbInsts);
416 const controlType = GenUtils.controlType(dbInsts);
417 const singleRegCase = GenUtils.singleRegCase(name);
418
419 this.addInst({
420 id : 0, // Instruction id (numeric value).
421 name : name, // Instruction name.
422 enum : enum_, // Instruction enum without `kId` prefix.
423 dbInsts : dbInsts, // All dbInsts returned from asmdb query.
424 encoding : encoding, // Instruction encoding.
425 opcode0 : opcode0, // Primary opcode.
426 opcode1 : opcode1, // Secondary opcode.
427 flags : flags,
428 signatures : null, // Instruction signatures.
429 controlType : controlType,
430 singleRegCase : singleRegCase,
431
432 mainOpcodeValue : -1, // Main opcode value (0.255 hex).
433 mainOpcodeIndex : -1, // Index to InstDB::_mainOpcodeTable.
434 altOpcodeIndex : -1, // Index to InstDB::_altOpcodeTable.
435 nameIndex : -1, // Index to InstDB::_nameData.
436 commonInfoIndexA : -1,
437 commomInfoIndexB : -1,
438
439 signatureIndex : -1,
440 signatureCount : -1
441 });
442 }
443
444 if (this.insts.length === 0)
445 FAIL("X86TableGen.parse(): Invalid parsing regexp (no data parsed)");
446
447 console.log("Number of Instructions: " + this.insts.length);
448 }
449
450 merge() {
451 var s = StringUtils.format(this.insts, "", true, function(inst) {
452 return "INST(" +
453 String(inst.enum ).padEnd(17) + ", " +
454 String(inst.encoding ).padEnd(19) + ", " +
455 String(inst.opcode0 ).padEnd(26) + ", " +
456 String(inst.opcode1 ).padEnd(26) + ", " +
457 String(inst.mainOpcodeIndex ).padEnd( 3) + ", " +
458 String(inst.altOpcodeIndex ).padEnd( 3) + ", " +
459 String(inst.nameIndex ).padEnd( 5) + ", " +
460 String(inst.commonInfoIndexA).padEnd( 3) + ", " +
461 String(inst.commomInfoIndexB).padEnd( 3) + ")";
462 }) + "\n";
463 this.inject("InstInfo", s, this.insts.length * 8);
464 }
465
466 // --------------------------------------------------------------------------
467 // [Other]
468 // --------------------------------------------------------------------------
469
470 printMissing() {
471 const ignored = MapUtils.arrayToMap([
472 "cmpsb", "cmpsw", "cmpsd", "cmpsq",
473 "lodsb", "lodsw", "lodsd", "lodsq",
474 "movsb", "movsw", "movsd", "movsq",
475 "scasb", "scasw", "scasd", "scasq",
476 "stosb", "stosw", "stosd", "stosq",
477 "insb" , "insw" , "insd" ,
478 "outsb", "outsw", "outsd",
479 "wait" // Maps to `fwait`, which AsmJit uses instead.
480 ]);
481
482 var out = "";
483 x86isa.instructionNames.forEach(function(name) {
484 var dbInsts = x86isa.query(name);
485 if (!this.instMap[name] && ignored[name] !== true) {
486 console.log(`MISSING INSTRUCTION '${name}'`);
487 var inst = this.newInstFromGroup(dbInsts);
488 if (inst) {
489 out += " INST(" +
490 String(inst.enum ).padEnd(17) + ", " +
491 String(inst.encoding ).padEnd(19) + ", " +
492 String(inst.opcode0 ).padEnd(26) + ", " +
493 String(inst.opcode1 ).padEnd(26) + ", " +
494 String("0" ).padEnd( 4) + ", " +
495 String("0" ).padEnd( 3) + ", " +
496 String("0" ).padEnd( 3) + "),\n";
497 }
498 }
499 }, this);
500 console.log(out);
501 }
502
503 newInstFromGroup(dbInsts) {
504 function composeOpCode(obj) {
505 return `${obj.type}(${obj.prefix},${obj.opcode},${obj.o},${obj.l},${obj.w},${obj.ew},${obj.en},${obj.tt})`;
506 }
507
508 function GetAccess(dbInst) {
509 var operands = dbInst.operands;
510 if (!operands.length) return "";
511
512 var op = operands[0];
513 if (op.read && op.write)
514 return "RW";
515 else if (op.read)
516 return "RO";
517 else
518 return "WO";
519 }
520
521 function isVecPrefix(s) {
522 return s === "VEX" || s === "EVEX" || s === "XOP";
523 }
524
525 var dbi = dbInsts[0];
526
527 var id = this.insts.length;
528 var name = dbi.name;
529 var enum_ = name[0].toUpperCase() + name.substr(1);
530
531 var opcode = dbi.opcodeHex;
532 var rm = dbi.rm;
533 var mm = dbi.mm;
534 var pp = dbi.pp;
535 var encoding = dbi.encoding;
536 var isVec = isVecPrefix(dbi.prefix);
537
538 var access = GetAccess(dbi);
539
540 var vexL = undefined;
541 var vexW = undefined;
542 var evexW = undefined;
543
544 for (var i = 0; i < dbInsts.length; i++) {
545 dbi = dbInsts[i];
546
547 if (dbi.prefix === "VEX" || dbi.prefix === "XOP") {
548 var newVexL = String(dbi.l === "128" ? 0 : dbi.l === "256" ? 1 : dbi.l === "512" ? 2 : "_");
549 var newVexW = String(dbi.w === "W0" ? 0 : dbi.w === "W1" ? 1 : "_");
550
551 if (vexL !== undefined && vexL !== newVexL)
552 vexL = "x";
553 else
554 vexL = newVexL;
555 if (vexW !== undefined && vexW !== newVexW)
556 vexW = "x";
557 else
558 vexW = newVexW;
559 }
560
561 if (dbi.prefix === "EVEX") {
562 var newEvexW = String(dbi.w === "W0" ? 0 : dbi.w === "W1" ? 1 : "_");
563 if (evexW !== undefined && evexW !== newEvexW)
564 evexW = "x";
565 else
566 evexW = newEvexW;
567 }
568
569 if (opcode !== dbi.opcodeHex ) { console.log(`ISSUE: Opcode ${opcode} != ${dbi.opcodeHex}`); return null; }
570 if (rm !== dbi.rm ) { console.log(`ISSUE: RM ${rm} != ${dbi.rm}`); return null; }
571 if (mm !== dbi.mm ) { console.log(`ISSUE: MM ${mm} != ${dbi.mm}`); return null; }
572 if (pp !== dbi.pp ) { console.log(`ISSUE: PP ${pp} != ${dbi.pp}`); return null; }
573 if (encoding !== dbi.encoding ) { console.log(`ISSUE: Enc ${encoding} != ${dbi.encoding}`); return null; }
574 if (access !== GetAccess(dbi)) { console.log(`ISSUE: Access ${access} != ${GetAccess(dbi)}`); return null; }
575 if (isVec != isVecPrefix(dbi.prefix)) { console.log(`ISSUE: Vex/Non-Vex mismatch`); return null; }
576 }
577
578 var ppmm = pp.padEnd(2).replace(/ /g, "0") +
579 mm.padEnd(4).replace(/ /g, "0") ;
580
581 var composed = composeOpCode({
582 type : isVec ? "V" : "O",
583 prefix: ppmm,
584 opcode: opcode,
585 o : rm === "r" ? "_" : (rm ? rm : "_"),
586 l : vexL !== undefined ? vexL : "_",
587 w : vexW !== undefined ? vexW : "_",
588 ew : evexW !== undefined ? evexW : "_",
589 en : "_",
590 tt : "_ "
591 });
592
593 return {
594 id : id,
595 name : name,
596 enum : enum_,
597 encoding : encoding,
598 opcode0 : composed,
599 opcode1 : "0",
600 nameIndex : -1,
601 commonInfoIndexA : -1,
602 commomInfoIndexB : -1
603 };
604 }
605
606 // --------------------------------------------------------------------------
607 // [Hooks]
608 // --------------------------------------------------------------------------
609
610 onBeforeRun() {
611 this.load([
612 "src/asmjit/x86/x86globals.h",
613 "src/asmjit/x86/x86instdb.cpp",
614 "src/asmjit/x86/x86instdb.h",
615 "src/asmjit/x86/x86instdb_p.h"
616 ]);
617 this.parse();
618 }
619
620 onAfterRun() {
621 this.merge();
622 this.save();
623 this.dumpTableSizes();
624 this.printMissing();
625 }
626 }
627
628 // ============================================================================
629 // [tablegen.x86.IdEnum]
630 // ============================================================================
631
632 class IdEnum extends core.IdEnum {
633 constructor() {
634 super("IdEnum");
635 }
636
637 comment(inst) {
638 function filterAVX(features, avx) {
639 return features.filter(function(item) { return /^(AVX|FMA)/.test(item) === avx; });
640 }
641
642 var dbInsts = inst.dbInsts;
643 if (!dbInsts.length) return "Invalid instruction id.";
644
645 var text = "";
646 var features = GenUtils.cpuFeaturesOf(dbInsts);
647
648 if (features.length) {
649 text += "{";
650 const avxFeatures = filterAVX(features, true);
651 const otherFeatures = filterAVX(features, false);
652
653 const vl = avxFeatures.indexOf("AVX512_VL");
654 if (vl !== -1) avxFeatures.splice(vl, 1);
655
656 const fma = avxFeatures.indexOf("FMA");
657 if (fma !== -1) { avxFeatures.splice(fma, 1); avxFeatures.splice(0, 0, "FMA"); }
658
659 text += avxFeatures.join("|");
660 if (vl !== -1) text += "+VL";
661
662 if (otherFeatures.length)
663 text += (avxFeatures.length ? " & " : "") + otherFeatures.join("|");
664
665 text += "}";
666 }
667
668 var arch = GenUtils.cpuArchOf(dbInsts);
669 if (arch)
670 text += (text ? " " : "") + arch;
671
672 return `Instruction '${inst.name}'${(text ? " " + text : "")}.`;
673 }
674 }
675
676 // ============================================================================
677 // [tablegen.x86.NameTable]
678 // ============================================================================
679
680 class NameTable extends core.NameTable {
681 constructor() {
682 super("NameTable");
683 }
684 }
685
686 // ============================================================================
687 // [tablegen.x86.AltOpcodeTable]
688 // ============================================================================
689
690 class AltOpcodeTable extends core.Task {
691 constructor() {
692 super("AltOpcodeTable");
693 }
694
695 run() {
696 const insts = this.ctx.insts;
697
698 const mainOpcodeTable = new IndexedArray();
699 const altOpcodeTable = new IndexedArray();
700
701 mainOpcodeTable.addIndexed("O(000000,00,0,0,0,0,0,_ )");
702
703 function indexOpcode(opcode) {
704 if (opcode === "0")
705 return ["00", 0];
706
707 // O_FPU(__,__OP,_)
708 if (opcode.startsWith("O_FPU(")) {
709 var value = opcode.substring(11, 13);
710 var remaining = opcode.substring(0, 11) + "00" + opcode.substring(13);
711
712 return [value, mainOpcodeTable.addIndexed(remaining.padEnd(26))];
713 }
714
715 // X(______,OP,_,_,_,_,_,_ )
716 if (opcode.startsWith("O_FPU(") || opcode.startsWith("O(") || opcode.startsWith("V(") || opcode.startsWith("E(")) {
717 var value = opcode.substring(9, 11);
718 var remaining = opcode.substring(0, 9) + "00" + opcode.substring(11);
719
720 remaining = remaining.replace(/,[_xI],/g, ",0,");
721 remaining = remaining.replace(/,[_xI],/g, ",0,");
722 return [value, mainOpcodeTable.addIndexed(remaining.padEnd(26))];
723 }
724
725 FAIL(`Failed to process opcode '${opcode}'`);
726 }
727
728 insts.map((inst) => {
729 const [value, index] = indexOpcode(inst.opcode0);
730 inst.mainOpcodeValue = value;
731 inst.mainOpcodeIndex = index;
732 inst.altOpcodeIndex = altOpcodeTable.addIndexed(inst.opcode1.padEnd(26));
733 });
734 // console.log(mainOpcodeTable.length);
735 // console.log(StringUtils.format(mainOpcodeTable, kIndent, true));
736
737 this.inject("MainOpcodeTable",
738 disclaimer(`const uint32_t InstDB::_mainOpcodeTable[] = {\n${StringUtils.format(mainOpcodeTable, kIndent, true)}\n};\n`),
739 mainOpcodeTable.length * 4);
740
741 this.inject("AltOpcodeTable",
742 disclaimer(`const uint32_t InstDB::_altOpcodeTable[] = {\n${StringUtils.format(altOpcodeTable, kIndent, true)}\n};\n`),
743 altOpcodeTable.length * 4);
744 }
745 }
746
747 // ============================================================================
748 // [tablegen.x86.SseToAvxTable]
749 // ============================================================================
750 /*
751 // Removed from asmjit.
752 class InstSseToAvxTable extends core.Task {
753 constructor() {
754 super("InstSseToAvxTable", ["IdEnum"]);
755 }
756
757 run() {
758 const insts = this.ctx.insts;
759
760 const dataTable = new IndexedArray();
761 const indexTable = [];
762
763 function add(data) {
764 return dataTable.addIndexed("{ " + `SseToAvxData::kMode${data.mode}`.padEnd(28) + ", " + String(data.delta).padEnd(4) + " }");
765 }
766
767 // This will receive a zero index, which means that no SseToAvx or AvxToSSe translation is possible.
768 const kInvalidIndex = add({ mode: "None", delta: 0 });
769 insts.forEach((inst) => { indexTable.push(kInvalidIndex); });
770
771 insts.forEach((inst) => {
772 // If it's not `kInvalidIndex` it's an AVX instruction that shares the
773 // SseToAvx data. We won't touch it as it already has the index assigned.
774 if (indexTable[inst.id] === kInvalidIndex) {
775 const data = this.calcSseToAvxData(inst.dbInsts);
776 const index = add(data);
777
778 indexTable[inst.id] = index;
779 if (data.delta !== 0)
780 indexTable[this.ctx.instMap["v" + inst.name].id] = index;
781 }
782 });
783
784 this.inject("SseToAvxIndex",
785 disclaimer(`static const uint8_t sseToAvxIndex[] = {\n${StringUtils.format(indexTable, kIndent, -1)}\n};\n`),
786 indexTable.length * 1);
787
788 this.inject("SseToAvxTable",
789 disclaimer(`static const SseToAvxData sseToAvxData[] = {\n${StringUtils.format(dataTable, kIndent, true)}\n};\n`),
790 dataTable.length * 2);
791 }
792
793 filterSseToAvx(dbInsts) {
794 const filtered = [];
795 for (var x = 0; x < dbInsts.length; x++) {
796 const dbInst = dbInsts[x];
797 const ops = dbInst.operands;
798
799 // SSE instruction does never share its name with AVX one.
800 if (/^(VEX|XOP|EVEX)$/.test(dbInst.prefix))
801 return [];
802
803 var ok = false;
804 for (var y = 0; y < ops.length; y++) {
805 // There is no AVX instruction that works with MMX regs.
806 if (ops[y].reg === "mm") { ok = false; break; }
807 if (ops[y].reg === "xmm") { ok = true; }
808 }
809
810 if (ok)
811 filtered.push(dbInst);
812 }
813
814 return filtered;
815 }
816
817 calcSseToAvxData(dbInsts) {
818 const data = {
819 mode : "None", // No conversion by default.
820 delta: 0 // 0 if no conversion is possible.
821 };
822
823 const dbSseInsts = this.filterSseToAvx(dbInsts);
824 if (!dbSseInsts.length)
825 return data;
826
827 const sseName = dbSseInsts[0].name;
828 const avxName = "v" + sseName;
829
830 const dbAvxInsts = this.ctx.query(avxName);
831 if (!dbAvxInsts.length) {
832 DEBUG(`SseToAvx: Instruction '${sseName}' has no AVX counterpart`);
833 return data;
834 }
835
836 if (avxName === "vblendvpd" || avxName === "vblendvps" || avxName === "vpblendvb") {
837 // Special cases first.
838 data.mode = "Blend";
839 }
840 else {
841 // Common case, deduce conversion mode by checking both SSE and AVX instructions.
842 const map = Object.create(null);
843 for (var sseIndex = 0; sseIndex < dbSseInsts.length; sseIndex++) {
844 const sseInst = dbSseInsts[sseIndex];
845 var match = false;
846
847 for (var avxIndex = 0; avxIndex < dbAvxInsts.length; avxIndex++) {
848 const avxInst = dbAvxInsts[avxIndex];
849
850 // Select only VEX instructions.
851 if (avxInst.prefix !== "VEX") continue;
852
853 // Check if the AVX version is the same.
854 if (GenUtils.eqOps(avxInst.operands, 0, sseInst.operands, 0)) {
855 map.raw = true;
856 match = true;
857 }
858 else if (avxInst.operands[0].data === "xmm" && GenUtils.eqOps(avxInst.operands, 1, sseInst.operands, 0)) {
859 map.nds = true;
860 match = true;
861 }
862 }
863
864 if (!match) {
865 const signature = sseInst.operands.map(function(op) { return op.data; }).join(", ");
866 console.log(`SseToAvx: Instruction '${sseName}(${signature})' has no AVX counterpart`);
867 return data;
868 }
869 }
870
871 data.mode = (map.raw && !map.nds) ? "Move" : (map.raw && map.nds) ? "MoveIfMem" : "Extend";
872 }
873 data.delta = this.ctx.instMap[avxName].id - this.ctx.instMap[sseName].id;
874 return data;
875 }
876 }
877 */
878
879 // ============================================================================
880 // [tablegen.x86.InstSignatureTable]
881 // ============================================================================
882
883 const RegOp = MapUtils.arrayToMap(["al", "ah", "ax", "eax", "rax", "cl", "r8lo", "r8hi", "r16", "r32", "r64", "xmm", "ymm", "zmm", "mm", "k", "sreg", "creg", "dreg", "st", "bnd"]);
884 const MemOp = MapUtils.arrayToMap(["m8", "m16", "m32", "m48", "m64", "m80", "m128", "m256", "m512", "m1024"]);
885
886 const cmpOp = StringUtils.makePriorityCompare([
887 "r8lo", "r8hi", "r16", "r32", "r64", "xmm", "ymm", "zmm", "mm", "k", "sreg", "creg", "dreg", "st", "bnd",
888 "mem", "vm", "m8", "m16", "m32", "m48", "m64", "m80", "m128", "m256", "m512", "m1024",
889 "mib",
890 "vm32x", "vm32y", "vm32z", "vm64x", "vm64y", "vm64z",
891 "memBase", "memES", "memDS",
892 "i4", "u4", "i8", "u8", "i16", "u16", "i32", "u32", "i64", "u64",
893 "rel8", "rel32",
894 "implicit"
895 ]);
896
897 const OpToAsmJitOp = {
898 "implicit": "F(Implicit)",
899
900 "r8lo" : "F(GpbLo)",
901 "r8hi" : "F(GpbHi)",
902 "r16" : "F(Gpw)",
903 "r32" : "F(Gpd)",
904 "r64" : "F(Gpq)",
905 "xmm" : "F(Xmm)",
906 "ymm" : "F(Ymm)",
907 "zmm" : "F(Zmm)",
908 "mm" : "F(Mm)",
909 "k" : "F(KReg)",
910 "sreg" : "F(SReg)",
911 "creg" : "F(CReg)",
912 "dreg" : "F(DReg)",
913 "st" : "F(St)",
914 "bnd" : "F(Bnd)",
915
916 "mem" : "F(Mem)",
917 "vm" : "F(Vm)",
918
919 "i4" : "F(I4)",
920 "u4" : "F(U4)",
921 "i8" : "F(I8)",
922 "u8" : "F(U8)",
923 "i16" : "F(I16)",
924 "u16" : "F(U16)",
925 "i32" : "F(I32)",
926 "u32" : "F(U32)",
927 "i64" : "F(I64)",
928 "u64" : "F(U64)",
929
930 "rel8" : "F(Rel8)",
931 "rel32" : "F(Rel32)",
932
933 "m8" : "M(M8)",
934 "m16" : "M(M16)",
935 "m32" : "M(M32)",
936 "m48" : "M(M48)",
937 "m64" : "M(M64)",
938 "m80" : "M(M80)",
939 "m128" : "M(M128)",
940 "m256" : "M(M256)",
941 "m512" : "M(M512)",
942 "m1024" : "M(M1024)",
943 "mib" : "M(Mib)",
944 "mAny" : "M(Any)",
945 "vm32x" : "M(Vm32x)",
946 "vm32y" : "M(Vm32y)",
947 "vm32z" : "M(Vm32z)",
948 "vm64x" : "M(Vm64x)",
949 "vm64y" : "M(Vm64y)",
950 "vm64z" : "M(Vm64z)",
951
952 "memBase" : "M(BaseOnly)",
953 "memDS" : "M(Ds)",
954 "memES" : "M(Es)"
955 };
956
957 function StringifyArray(a, map) {
958 var s = "";
959 for (var i = 0; i < a.length; i++) {
960 const op = a[i];
961 if (!hasOwn.call(map, op))
962 FAIL(`UNHANDLED OPERAND '${op}'`);
963 s += (s ? " | " : "") + map[op];
964 }
965 return s ? s : "0";
966 }
967
968 class OSignature {
969 constructor() {
970 this.flags = Object.create(null);
971 }
972
973 equals(other) {
974 return MapUtils.equals(this.flags, other.flags);
975 }
976
977 xor(other) {
978 const result = MapUtils.xor(this.flags, other.flags);
979 return Object.getOwnPropertyNames(result).length === 0 ? null : result;
980 }
981
982 mergeWith(other) {
983 const af = this.flags;
984 const bf = other.flags;
985
986 var k;
987 var indexKind = "";
988 var hasReg = false;
989
990 for (k in af) {
991 const index = asmdb.x86.Utils.regIndexOf(k);
992 const kind = asmdb.x86.Utils.regKindOf(k);
993
994 if (kind)
995 hasReg = true;
996
997 if (index !== null && index !== -1)
998 indexKind = kind;
999 }
1000
1001 if (hasReg) {
1002 for (k in bf) {
1003 const index = asmdb.x86.Utils.regIndexOf(k);
1004 if (index !== null && index !== -1) {
1005 const kind = asmdb.x86.Utils.regKindOf(k);
1006 if (indexKind !== kind) return false;
1007 }
1008 }
1009 }
1010
1011 // Can merge...
1012 for (k in bf) af[k] = true;
1013 return true;
1014 }
1015
1016 simplify() {
1017 const flags = this.flags;
1018
1019 // 32-bit register or 16-bit memory implies also 16-bit reg.
1020 if (flags.r32 && flags.m16) {
1021 flags.r16 = true;
1022 }
1023
1024 // 32-bit register or 8-bit memory implies also 16-bit and 8-bit reg.
1025 if (flags.r32 && flags.m8) {
1026 flags.r8lo = true;
1027 flags.r8hi = true;
1028 flags.r16 = true;
1029 }
1030 }
1031
1032 toString() {
1033 var s = "";
1034 var flags = this.flags;
1035
1036 for (var k in flags) {
1037 if (k === "read" || k === "write" || k === "implicit" || k === "memDS" || k === "memES")
1038 continue;
1039
1040 var x = k;
1041 if (x === "memZAX") x = "zax";
1042 if (x === "memZDI") x = "zdi";
1043 if (x === "memZSI") x = "zsi";
1044 s += (s ? "|" : "") + x;
1045 }
1046
1047 if (flags.memDS) s = "ds:[" + s + "]";
1048 if (flags.memES) s = "es:[" + s + "]";
1049
1050 if (flags.implicit)
1051 s = "<" + s + ">";
1052
1053 return s;
1054 }
1055
1056 toAsmJitOpData() {
1057 var oFlags = this.flags;
1058
1059 var mFlags = Object.create(null);
1060 var mMemFlags = Object.create(null);
1061 var mExtFlags = Object.create(null);
1062 var sRegMask = 0;
1063
1064 for (var k in oFlags) {
1065 switch (k) {
1066 case "implicit":
1067 case "r8lo" :
1068 case "r8hi" :
1069 case "r16" :
1070 case "r32" :
1071 case "r64" :
1072 case "creg" :
1073 case "dreg" :
1074 case "sreg" :
1075 case "bnd" :
1076 case "st" :
1077 case "k" :
1078 case "mm" :
1079 case "xmm" :
1080 case "ymm" :
1081 case "zmm" : mFlags[k] = true; break;
1082
1083 case "m8" :
1084 case "m16" :
1085 case "m32" :
1086 case "m48" :
1087 case "m64" :
1088 case "m80" :
1089 case "m128" :
1090 case "m256" :
1091 case "m512" :
1092 case "m1024" : mFlags.mem = true; mMemFlags[k] = true; break;
1093 case "mib" : mFlags.mem = true; mMemFlags.mib = true; break;
1094 case "mem" : mFlags.mem = true; mMemFlags.mAny = true; break;
1095
1096 case "memBase" : mFlags.mem = true; mMemFlags.memBase = true; break;
1097 case "memDS" : mFlags.mem = true; mMemFlags.memDS = true; break;
1098 case "memES" : mFlags.mem = true; mMemFlags.memES = true; break;
1099 case "memZAX" : mFlags.mem = true; sRegMask |= 1 << 0; break;
1100 case "memZSI" : mFlags.mem = true; sRegMask |= 1 << 6; break;
1101 case "memZDI" : mFlags.mem = true; sRegMask |= 1 << 7; break;
1102
1103 case "vm32x" : mFlags.vm = true; mMemFlags.vm32x = true; break;
1104 case "vm32y" : mFlags.vm = true; mMemFlags.vm32y = true; break;
1105 case "vm32z" : mFlags.vm = true; mMemFlags.vm32z = true; break;
1106 case "vm64x" : mFlags.vm = true; mMemFlags.vm64x = true; break;
1107 case "vm64y" : mFlags.vm = true; mMemFlags.vm64y = true; break;
1108 case "vm64z" : mFlags.vm = true; mMemFlags.vm64z = true; break;
1109
1110 case "i4" :
1111 case "u4" :
1112 case "i8" :
1113 case "u8" :
1114 case "i16" :
1115 case "u16" :
1116 case "i32" :
1117 case "u32" :
1118 case "i64" :
1119 case "u64" : mFlags[k] = true; break;
1120
1121 case "rel8" :
1122 case "rel32" :
1123 mFlags.i32 = true;
1124 mFlags.i64 = true;
1125 mFlags[k] = true;
1126 break;
1127
1128 case "rel16" :
1129 mFlags.i32 = true;
1130 mFlags.i64 = true;
1131 mFlags.rel32 = true;
1132 break;
1133
1134 default: {
1135 switch (k) {
1136 case "es" : mFlags.sreg = true; sRegMask |= 1 << 1; break;
1137 case "cs" : mFlags.sreg = true; sRegMask |= 1 << 2; break;
1138 case "ss" : mFlags.sreg = true; sRegMask |= 1 << 3; break;
1139 case "ds" : mFlags.sreg = true; sRegMask |= 1 << 4; break;
1140 case "fs" : mFlags.sreg = true; sRegMask |= 1 << 5; break;
1141 case "gs" : mFlags.sreg = true; sRegMask |= 1 << 6; break;
1142 case "al" : mFlags.r8lo = true; sRegMask |= 1 << 0; break;
1143 case "ah" : mFlags.r8hi = true; sRegMask |= 1 << 0; break;
1144 case "ax" : mFlags.r16 = true; sRegMask |= 1 << 0; break;
1145 case "eax" : mFlags.r32 = true; sRegMask |= 1 << 0; break;
1146 case "rax" : mFlags.r64 = true; sRegMask |= 1 << 0; break;
1147 case "cl" : mFlags.r8lo = true; sRegMask |= 1 << 1; break;
1148 case "ch" : mFlags.r8hi = true; sRegMask |= 1 << 1; break;
1149 case "cx" : mFlags.r16 = true; sRegMask |= 1 << 1; break;
1150 case "ecx" : mFlags.r32 = true; sRegMask |= 1 << 1; break;
1151 case "rcx" : mFlags.r64 = true; sRegMask |= 1 << 1; break;
1152 case "dl" : mFlags.r8lo = true; sRegMask |= 1 << 2; break;
1153 case "dh" : mFlags.r8hi = true; sRegMask |= 1 << 2; break;
1154 case "dx" : mFlags.r16 = true; sRegMask |= 1 << 2; break;
1155 case "edx" : mFlags.r32 = true; sRegMask |= 1 << 2; break;
1156 case "rdx" : mFlags.r64 = true; sRegMask |= 1 << 2; break;
1157 case "bl" : mFlags.r8lo = true; sRegMask |= 1 << 3; break;
1158 case "bh" : mFlags.r8hi = true; sRegMask |= 1 << 3; break;
1159 case "bx" : mFlags.r16 = true; sRegMask |= 1 << 3; break;
1160 case "ebx" : mFlags.r32 = true; sRegMask |= 1 << 3; break;
1161 case "rbx" : mFlags.r64 = true; sRegMask |= 1 << 3; break;
1162 case "si" : mFlags.r16 = true; sRegMask |= 1 << 6; break;
1163 case "esi" : mFlags.r32 = true; sRegMask |= 1 << 6; break;
1164 case "rsi" : mFlags.r64 = true; sRegMask |= 1 << 6; break;
1165 case "di" : mFlags.r16 = true; sRegMask |= 1 << 7; break;
1166 case "edi" : mFlags.r32 = true; sRegMask |= 1 << 7; break;
1167 case "rdi" : mFlags.r64 = true; sRegMask |= 1 << 7; break;
1168 case "st0" : mFlags.st = true; sRegMask |= 1 << 0; break;
1169 case "xmm0" : mFlags.xmm = true; sRegMask |= 1 << 0; break;
1170 case "ymm0" : mFlags.ymm = true; sRegMask |= 1 << 0; break;
1171 default:
1172 console.log(`UNKNOWN OPERAND '${k}'`);
1173 }
1174 }
1175 }
1176 }
1177
1178 const sFlags = StringifyArray(ArrayUtils.sorted(mFlags , cmpOp), OpToAsmJitOp);
1179 const sMemFlags = StringifyArray(ArrayUtils.sorted(mMemFlags, cmpOp), OpToAsmJitOp);
1180 const sExtFlags = StringifyArray(ArrayUtils.sorted(mExtFlags, cmpOp), OpToAsmJitOp);
1181
1182 return `ROW(${sFlags || 0}, ${sMemFlags || 0}, ${sExtFlags || 0}, ${decToHex(sRegMask, 2)})`;
1183 }
1184 }
1185
1186 class ISignature extends Array {
1187 constructor(name) {
1188 super();
1189 this.name = name;
1190 this.x86 = false;
1191 this.x64 = false;
1192 this.implicit = 0; // Number of implicit operands.
1193 }
1194
1195 simplify() {
1196 for (var i = 0; i < this.length; i++)
1197 this[i].simplify();
1198 }
1199
1200 opEquals(other) {
1201 const len = this.length;
1202 if (len !== other.length) return false;
1203
1204 for (var i = 0; i < len; i++)
1205 if (!this[i].equals(other[i]))
1206 return false;
1207
1208 return true;
1209 }
1210
1211 mergeWith(other) {
1212 // If both architectures are the same, it's fine to merge.
1213 var ok = this.x86 === other.x86 && this.x64 === other.x64;
1214
1215 // If the first arch is [X86|X64] and the second [X64] it's also fine.
1216 if (!ok && this.x86 && this.x64 && !other.x86 && other.x64)
1217 ok = true;
1218
1219 // It's not ok if both signatures have different number of implicit operands.
1220 if (!ok || this.implicit !== other.implicit)
1221 return false;
1222
1223 // It's not ok if both signatures have different number of operands.
1224 const len = this.length;
1225 if (len !== other.length)
1226 return false;
1227
1228 var xorIndex = -1;
1229 for (var i = 0; i < len; i++) {
1230 const xor = this[i].xor(other[i]);
1231 if (xor === null) continue;
1232
1233 if (xorIndex === -1)
1234 xorIndex = i;
1235 else
1236 return false;
1237 }
1238
1239 // Bail if mergeWidth at operand-level failed.
1240 if (xorIndex !== -1 && !this[xorIndex].mergeWith(other[xorIndex]))
1241 return false;
1242
1243 this.x86 = this.x86 || other.x86;
1244 this.x64 = this.x64 || other.x64;
1245
1246 return true;
1247 }
1248
1249 toString() {
1250 return "{" + this.join(", ") + "}";
1251 }
1252 }
1253
1254 class SignatureArray extends Array {
1255 // Iterate over all signatures and check which operands don't need explicit memory size.
1256 calcImplicitMemSize() {
1257 // Calculates a hash-value (aka key) of all register operands specified by `regOps` in `inst`.
1258 function keyOf(inst, regOps) {
1259 var s = "";
1260 for (var i = 0; i < inst.length; i++) {
1261 const op = inst[i];
1262 if (regOps & (1 << i))
1263 s += "{" + ArrayUtils.sorted(MapUtils.and(op.flags, RegOp)).join("|") + "}";
1264 }
1265 return s || "?";
1266 }
1267
1268 var i;
1269 var aIndex, bIndex;
1270
1271 for (aIndex = 0; aIndex < this.length; aIndex++) {
1272 const aInst = this[aIndex];
1273 const len = aInst.length;
1274
1275 var memOp = "";
1276 var memPos = -1;
1277 var regOps = 0;
1278
1279 // Check if this instruction signature has a memory operand of explicit size.
1280 for (i = 0; i < len; i++) {
1281 const aOp = aInst[i];
1282 const mem = MapUtils.firstOf(aOp.flags, MemOp);
1283
1284 if (mem) {
1285 // Stop if the memory operand has implicit-size or if there is more than one.
1286 if (aOp.flags.mem || memPos >= 0) {
1287 memPos = -1;
1288 break;
1289 }
1290 else {
1291 memOp = mem;
1292 memPos = i;
1293 }
1294 }
1295 else if (MapUtils.anyOf(aOp.flags, RegOp)) {
1296 // Doesn't consider 'r/m' as we already checked 'm'.
1297 regOps |= (1 << i);
1298 }
1299 }
1300
1301 if (memPos < 0)
1302 continue;
1303
1304 // Create a `sameSizeSet` set of all instructions having the exact
1305 // explicit memory operand at the same position and registers at
1306 // positions matching `regOps` bits and `diffSizeSet` having memory
1307 // operand of different size, but registers at the same positions.
1308 const sameSizeSet = [aInst];
1309 const diffSizeSet = [];
1310 const diffSizeHash = Object.create(null);
1311
1312 for (bIndex = 0; bIndex < this.length; bIndex++) {
1313 const bInst = this[bIndex];
1314 if (aIndex === bIndex || len !== bInst.length) continue;
1315
1316 var hasMatch = 1;
1317 for (i = 0; i < len; i++) {
1318 if (i === memPos) continue;
1319
1320 const reg = MapUtils.anyOf(bInst[i].flags, RegOp);
1321 if (regOps & (1 << i))
1322 hasMatch &= reg;
1323 else if (reg)
1324 hasMatch = 0;
1325 }
1326
1327 if (hasMatch) {
1328 const bOp = bInst[memPos];
1329 if (bOp.flags.mem) continue;
1330
1331 const mem = MapUtils.firstOf(bOp.flags, MemOp);
1332 if (mem === memOp) {
1333 sameSizeSet.push(bInst);
1334 }
1335 else if (mem) {
1336 const key = keyOf(bInst, regOps);
1337 diffSizeSet.push(bInst);
1338 if (!diffSizeHash[key])
1339 diffSizeHash[key] = [bInst];
1340 else
1341 diffSizeHash[key].push(bInst);
1342 }
1343 }
1344 }
1345
1346 // Two cases.
1347 // A) The memory operand has implicit-size if `diffSizeSet` is empty. That
1348 // means that the instruction only uses one size for all reg combinations.
1349 //
1350 // B) The memory operand has implicit-size if `diffSizeSet` contains different
1351 // register signatures than `sameSizeSet`.
1352 var implicit = true;
1353
1354 if (!diffSizeSet.length) {
1355 // Case A:
1356 }
1357 else {
1358 // Case B: Find collisions in `sameSizeSet` and `diffSizeSet`.
1359 for (bIndex = 0; bIndex < sameSizeSet.length; bIndex++) {
1360 const bInst = sameSizeSet[bIndex];
1361 const key = keyOf(bInst, regOps);
1362
1363 const diff = diffSizeHash[key];
1364 if (diff) {
1365 diff.forEach((diffInst) => {
1366 if ((bInst.x86 && !diffInst.x86) || (!bInst.x86 && diffInst.x86)) {
1367 // If this is X86|ANY instruction and the other is X64, or vice-versa,
1368 // then keep this implicit as it won't do any harm. These instructions
1369 // cannot be mixed and it will make implicit the 32-bit one in cases
1370 // where X64 introduced 64-bit ones like `cvtsi2ss`.
1371 }
1372 else {
1373 implicit = false;
1374 }
1375 });
1376 }
1377 }
1378 }
1379
1380 // Patch all instructions to accept implicit-size memory operand.
1381 for (bIndex = 0; bIndex < sameSizeSet.length; bIndex++) {
1382 const bInst = sameSizeSet[bIndex];
1383 if (implicit) bInst[memPos].flags.mem = true;
1384
1385 if (!implicit)
1386 DEBUG(`${this.name}: Explicit: ${bInst}`);
1387 }
1388 }
1389 }
1390
1391 simplify() {
1392 for (var i = 0; i < this.length; i++)
1393 this[i].simplify();
1394 }
1395
1396 compact() {
1397 for (var i = 0; i < this.length; i++) {
1398 var row = this[i];
1399 var j = i + 1;
1400 while (j < this.length) {
1401 if (row.mergeWith(this[j])) {
1402 this.splice(j, 1);
1403 continue;
1404 }
1405 j++;
1406 }
1407 }
1408 }
1409
1410 toString() {
1411 return `[${this.join(", ")}]`;
1412 }
1413 }
1414
1415 class InstSignatureTable extends core.Task {
1416 constructor() {
1417 super("InstSignatureTable");
1418
1419 this.maxOpRows = 0;
1420 this.opBlackList = {
1421 "moff8" : true,
1422 "moff16": true,
1423 "moff32": true,
1424 "moff64": true
1425 };
1426 }
1427
1428 run() {
1429 const insts = this.ctx.insts;
1430
1431 insts.forEach((inst) => {
1432 inst.signatures = this.makeSignatures(Filter.noAltForm(inst.dbInsts));
1433 this.maxOpRows = Math.max(this.maxOpRows, inst.signatures.length);
1434 });
1435
1436 const iSignatureMap = Object.create(null);
1437 const iSignatureArr = [];
1438
1439 const oSignatureMap = Object.create(null);
1440 const oSignatureArr = [];
1441
1442 // Must be first to be assigned to zero.
1443 const oSignatureNone = "ROW(0, 0, 0, 0xFF)";
1444 oSignatureMap[oSignatureNone] = [0];
1445 oSignatureArr.push(oSignatureNone);
1446
1447 function findSignaturesIndex(rows) {
1448 const len = rows.length;
1449 if (!len) return 0;
1450
1451 const indexes = iSignatureMap[rows[0].data];
1452 if (indexes === undefined) return -1;
1453
1454 for (var i = 0; i < indexes.length; i++) {
1455 const index = indexes[i];
1456 if (index + len > iSignatureArr.length) continue;
1457
1458 var ok = true;
1459 for (var j = 0; j < len; j++) {
1460 if (iSignatureArr[index + j].data !== rows[j].data) {
1461 ok = false;
1462 break;
1463 }
1464 }
1465
1466 if (ok)
1467 return index;
1468 }
1469
1470 return -1;
1471 }
1472
1473 function indexSignatures(signatures) {
1474 const result = iSignatureArr.length;
1475
1476 for (var i = 0; i < signatures.length; i++) {
1477 const signature = signatures[i];
1478 const idx = iSignatureArr.length;
1479
1480 if (!hasOwn.call(iSignatureMap, signature.data))
1481 iSignatureMap[signature.data] = [];
1482
1483 iSignatureMap[signature.data].push(idx);
1484 iSignatureArr.push(signature);
1485 }
1486
1487 return result;
1488 }
1489
1490 for (var len = this.maxOpRows; len >= 0; len--) {
1491 insts.forEach((inst) => {
1492 const signatures = inst.signatures;
1493 if (signatures.length === len) {
1494 const signatureEntries = [];
1495 for (var j = 0; j < len; j++) {
1496 const signature = signatures[j];
1497
1498 var signatureEntry = `ROW(${signature.length}, ${signature.x86 ? 1 : 0}, ${signature.x64 ? 1 : 0}, ${signature.implicit}`;
1499 var signatureComment = signature.toString();
1500
1501 var x = 0;
1502 while (x < signature.length) {
1503 const h = signature[x].toAsmJitOpData();
1504 var index = -1;
1505 if (!hasOwn.call(oSignatureMap, h)) {
1506 index = oSignatureArr.length;
1507 oSignatureMap[h] = index;
1508 oSignatureArr.push(h);
1509 }
1510 else {
1511 index = oSignatureMap[h];
1512 }
1513
1514 signatureEntry += `, ${String(index).padEnd(3)}`;
1515 x++;
1516 }
1517
1518 while (x < 6) {
1519 signatureEntry += `, ${String(0).padEnd(3)}`;
1520 x++;
1521 }
1522
1523 signatureEntry += `)`;
1524 signatureEntries.push({ data: signatureEntry, comment: signatureComment, refs: 0 });
1525 }
1526
1527 var count = signatureEntries.length;
1528 var index = findSignaturesIndex(signatureEntries);
1529
1530 if (index === -1)
1531 index = indexSignatures(signatureEntries);
1532
1533 iSignatureArr[index].refs++;
1534 inst.signatureIndex = index;
1535 inst.signatureCount = count;
1536 }
1537 });
1538 }
1539
1540 var s = `#define ROW(count, x86, x64, implicit, o0, o1, o2, o3, o4, o5) \\\n` +
1541 ` { count, (x86 ? uint8_t(InstDB::kModeX86) : uint8_t(0)) | \\\n` +
1542 ` (x64 ? uint8_t(InstDB::kModeX64) : uint8_t(0)) , \\\n` +
1543 ` implicit, \\\n` +
1544 ` 0, \\\n` +
1545 ` { o0, o1, o2, o3, o4, o5 } \\\n` +
1546 ` }\n` +
1547 StringUtils.makeCxxArrayWithComment(iSignatureArr, "const InstDB::InstSignature InstDB::_instSignatureTable[]") +
1548 `#undef ROW\n` +
1549 `\n` +
1550 `#define ROW(flags, mFlags, extFlags, regId) { uint32_t(flags), uint16_t(mFlags), uint8_t(extFlags), uint8_t(regId) }\n` +
1551 `#define F(VAL) InstDB::kOp##VAL\n` +
1552 `#define M(VAL) InstDB::kMemOp##VAL\n` +
1553 StringUtils.makeCxxArray(oSignatureArr, "const InstDB::OpSignature InstDB::_opSignatureTable[]") +
1554 `#undef M\n` +
1555 `#undef F\n` +
1556 `#undef ROW\n`;
1557 this.inject("InstSignatureTable", disclaimer(s), oSignatureArr.length * 8 + iSignatureArr.length * 8);
1558 }
1559
1560 makeSignatures(dbInsts) {
1561 const signatures = new SignatureArray();
1562 for (var i = 0; i < dbInsts.length; i++) {
1563 const inst = dbInsts[i];
1564 const ops = inst.operands;
1565
1566 // NOTE: This changed from having reg|mem merged into creating two signatures
1567 // instead. Imagine two instructions in one `dbInsts` array:
1568 //
1569 // 1. mov reg, reg/mem
1570 // 2. mov reg/mem, reg
1571 //
1572 // If we merge them and then unmerge, we will have 4 signatures, when iterated:
1573 //
1574 // 1a. mov reg, reg
1575 // 1b. mov reg, mem
1576 // 2a. mov reg, reg
1577 // 2b. mov mem, reg
1578 //
1579 // So, instead of merging them here, we insert separated signatures and let
1580 // the tool merge them in a way that can be easily unmerged at runtime into:
1581 //
1582 // 1a. mov reg, reg
1583 // 1b. mov reg, mem
1584 // 2b. mov mem, reg
1585 var modrmCount = 1;
1586 for (var modrm = 0; modrm < modrmCount; modrm++) {
1587 var row = new ISignature(inst.name);
1588 row.x86 = (inst.arch === "ANY" || inst.arch === "X86");
1589 row.x64 = (inst.arch === "ANY" || inst.arch === "X64");
1590
1591 for (var j = 0; j < ops.length; j++) {
1592 var iop = ops[j];
1593
1594 var reg = iop.reg;
1595 var mem = iop.mem;
1596 var imm = iop.imm;
1597 var rel = iop.rel;
1598
1599 // Terminate if this operand is something asmjit doesn't support
1600 // and skip all instructions having implicit `imm` operand of `1`,
1601 // which are handled fine by asmjit.
1602 if (this.opBlackList[mem] === true || iop.immValue !== null)
1603 break;
1604
1605 if (reg === "r8") reg = "r8lo";
1606 if (reg === "seg") reg = "sreg";
1607 if (reg === "st(i)") reg = "st";
1608 if (reg === "st(0)") reg = "st0";
1609
1610 if (mem === "m32fp") mem = "m32";
1611 if (mem === "m64fp") mem = "m64";
1612 if (mem === "m80fp") mem = "m80";
1613 if (mem === "m80bcd") mem = "m80";
1614 if (mem === "m80dec") mem = "m80";
1615 if (mem === "m16int") mem = "m16";
1616 if (mem === "m32int") mem = "m32";
1617 if (mem === "m64int") mem = "m64";
1618
1619 if (mem === "m16_16") mem = "m32";
1620 if (mem === "m16_32") mem = "m48";
1621 if (mem === "m16_64") mem = "m80";
1622
1623 if (reg && mem) {
1624 if (modrmCount === 1) {
1625 mem = null;
1626 modrmCount++;
1627 }
1628 else {
1629 reg = null;
1630 }
1631 }
1632
1633 const op = new OSignature();
1634 if (iop.implicit) {
1635 row.implicit++;
1636 op.flags.implicit = true;
1637 }
1638
1639 const seg = iop.memSeg;
1640 if (seg) {
1641 if (seg === "ds") op.flags.memDS = true;
1642 if (seg === "es") op.flags.memES = true;
1643 if (reg === "reg") { op.flags.memBase = true; }
1644 if (reg === "r32") { op.flags.memBase = true; }
1645 if (reg === "r64") { op.flags.memBase = true; }
1646 if (reg === "zax") { op.flags.memBase = true; op.flags.memZAX = true; }
1647 if (reg === "zsi") { op.flags.memBase = true; op.flags.memZSI = true; }
1648 if (reg === "zdi") { op.flags.memBase = true; op.flags.memZDI = true; }
1649 }
1650 else if (reg) {
1651 op.flags[reg] = true;
1652 if (reg === "r8lo") op.flags.r8hi = true;
1653 }
1654 if (mem) {
1655 op.flags[mem] = true;
1656 // Exception: Allow LEA to use any memory size.
1657 if (inst.name === "lea") MapUtils.add(op.flags, MemOp);
1658 }
1659 if (imm) {
1660 if (iop.immSign === "any" || iop.immSign === "signed" ) op.flags["i" + imm] = true;
1661 if (iop.immSign === "any" || iop.immSign === "unsigned") op.flags["u" + imm] = true;
1662 }
1663 if (rel) op.flags["rel" + rel] = true;
1664
1665 row.push(op);
1666 }
1667
1668 // Not equal if we terminated the loop.
1669 if (j === ops.length)
1670 signatures.push(row);
1671 }
1672 }
1673
1674 signatures.calcImplicitMemSize();
1675 signatures.simplify();
1676 signatures.compact();
1677
1678 signatures.simplify();
1679 signatures.compact();
1680
1681 return signatures;
1682 }
1683 }
1684
1685 // ============================================================================
1686 // [tablegen.x86.InstCommonInfoTableB]
1687 // ============================================================================
1688
1689 class InstCommonInfoTableB extends core.Task {
1690 constructor() {
1691 super("InstCommonInfoTableB");
1692 }
1693
1694 run() {
1695 const insts = this.ctx.insts;
1696 const commonTableB = new IndexedArray();
1697 const rwInfoTable = new IndexedArray();
1698
1699 // If the instruction doesn't read any flags it should point to the first index.
1700 rwInfoTable.addIndexed(`{ 0, 0 }`);
1701
1702 insts.forEach((inst) => {
1703 const dbInsts = inst.dbInsts;
1704
1705 var features = GenUtils.cpuFeaturesOf(dbInsts).map(function(f) { return `EXT(${f})`; }).join(", ");
1706 if (!features) features = "0";
1707
1708 var [r, w] = this.rwFlagsOf(dbInsts);
1709 const rData = r.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
1710 const wData = w.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
1711 const rwDataIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
1712
1713 inst.commomInfoIndexB = commonTableB.addIndexed(`{ { ${features} }, ${rwDataIndex}, 0 }`);
1714 });
1715
1716 var s = `#define EXT(VAL) uint32_t(Features::k##VAL)\n` +
1717 `const InstDB::CommonInfoTableB InstDB::_commonInfoTableB[] = {\n${StringUtils.format(commonTableB, kIndent, true)}\n};\n` +
1718 `#undef EXT\n` +
1719 `\n` +
1720 `#define FLAG(VAL) uint32_t(Status::k##VAL)\n` +
1721 `const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {\n${StringUtils.format(rwInfoTable, kIndent, true)}\n};\n` +
1722 `#undef FLAG\n`;
1723 this.inject("InstCommonInfoTableB", disclaimer(s), commonTableB.length * 8 + rwInfoTable.length * 8);
1724 }
1725
1726 rwFlagsOf(dbInsts) {
1727 const r = Object.create(null);
1728 const w = Object.create(null);
1729
1730 for (var i = 0; i < dbInsts.length; i++) {
1731 const dbInst = dbInsts[i];
1732
1733 // Omit special cases, this is handled well in C++ code.
1734 if (dbInst.name === "mov")
1735 continue;
1736
1737 const specialRegs = dbInst.specialRegs;
1738
1739 // Mov is a special case, moving to/from control regs makes flags undefined,
1740 // which we don't want to have in `X86InstDB::operationData`. This is, thus,
1741 // a special case instruction analyzer must deal with.
1742 if (dbInst.name === "mov")
1743 continue;
1744
1745 for (var specialReg in specialRegs) {
1746 var flag = "";
1747 switch (specialReg) {
1748 case "FLAGS.CF": flag = "CF"; break;
1749 case "FLAGS.OF": flag = "OF"; break;
1750 case "FLAGS.SF": flag = "SF"; break;
1751 case "FLAGS.ZF": flag = "ZF"; break;
1752 case "FLAGS.AF": flag = "AF"; break;
1753 case "FLAGS.PF": flag = "PF"; break;
1754 case "FLAGS.DF": flag = "DF"; break;
1755 case "FLAGS.IF": flag = "IF"; break;
1756 //case "FLAGS.TF": flag = "TF"; break;
1757 case "FLAGS.AC": flag = "AC"; break;
1758 case "X86SW.C0": flag = "C0"; break;
1759 case "X86SW.C1": flag = "C1"; break;
1760 case "X86SW.C2": flag = "C2"; break;
1761 case "X86SW.C3": flag = "C3"; break;
1762 default:
1763 continue;
1764 }
1765
1766 switch (specialRegs[specialReg]) {
1767 case "R":
1768 r[flag] = true;
1769 break;
1770 case "X":
1771 r[flag] = true;
1772 // ... fallthrough ...
1773 case "W":
1774 case "U":
1775 case "0":
1776 case "1":
1777 w[flag] = true;
1778 break;
1779 }
1780 }
1781 }
1782
1783 return [ArrayUtils.sorted(r), ArrayUtils.sorted(w)];
1784 }
1785 }
1786
1787 // ============================================================================
1788 // [tablegen.x86.InstRWInfoTable]
1789 // ============================================================================
1790
1791 const NOT_MEM_AMBIGUOUS = MapUtils.arrayToMap([
1792 "call", "movq"
1793 ]);
1794
1795 class InstRWInfoTable extends core.Task {
1796 constructor() {
1797 super("InstRWInfoTable");
1798
1799 this.rwInfoIndex = [];
1800 this.rwInfoTable = new IndexedArray();
1801 this.rmInfoTable = new IndexedArray();
1802 this.opInfoTable = new IndexedArray();
1803
1804 const _ = null;
1805 this.rwCategoryByName = {
1806 "imul" : "Imul",
1807 "mov" : "Mov",
1808 "movhpd" : "Movh64",
1809 "movhps" : "Movh64",
1810 "vmaskmovpd": "Vmaskmov",
1811 "vmaskmovps": "Vmaskmov",
1812 "vmovddup" : "Vmovddup",
1813 "vmovmskpd" : "Vmovmskpd",
1814 "vmovmskps" : "Vmovmskps",
1815 "vpmaskmovd": "Vmaskmov",
1816 "vpmaskmovq": "Vmaskmov"
1817 };
1818 this.rwCategoryByData = {
1819 Vmov1_8: [
1820 [{access: "W", flags: {}, fixed: -1, index: 0, width: 8}, {access: "R", flags: {}, fixed: -1, index: 0, width: 64},_,_,_,_],
1821 [{access: "W", flags: {}, fixed: -1, index: 0, width: 16}, {access: "R", flags: {}, fixed: -1, index: 0, width:128},_,_,_,_],
1822 [{access: "W", flags: {}, fixed: -1, index: 0, width: 32}, {access: "R", flags: {}, fixed: -1, index: 0, width:256},_,_,_,_],
1823 [{access: "W", flags: {}, fixed: -1, index: 0, width: 64}, {access: "R", flags: {}, fixed: -1, index: 0, width:512},_,_,_,_]
1824 ],
1825 Vmov1_4: [
1826 [{access: "W", flags: {}, fixed: -1, index: 0, width: 32}, {access: "R", flags: {}, fixed: -1, index: 0, width:128},_,_,_,_],
1827 [{access: "W", flags: {}, fixed: -1, index: 0, width: 64}, {access: "R", flags: {}, fixed: -1, index: 0, width:256},_,_,_,_],
1828 [{access: "W", flags: {}, fixed: -1, index: 0, width:128}, {access: "R", flags: {}, fixed: -1, index: 0, width:512},_,_,_,_]
1829 ],
1830 Vmov1_2: [
1831 [{access: "W", flags: {}, fixed: -1, index: 0, width: 64}, {access: "R", flags: {}, fixed: -1, index: 0, width:128},_,_,_,_],
1832 [{access: "W", flags: {}, fixed: -1, index: 0, width:128}, {access: "R", flags: {}, fixed: -1, index: 0, width:256},_,_,_,_],
1833 [{access: "W", flags: {}, fixed: -1, index: 0, width:256}, {access: "R", flags: {}, fixed: -1, index: 0, width:512},_,_,_,_]
1834 ],
1835 Vmov2_1: [
1836 [{access: "W", flags: {}, fixed: -1, index: 0, width: 128}, {access: "R", flags: {}, fixed: -1, index: 0, width: 64},_,_,_,_],
1837 [{access: "W", flags: {}, fixed: -1, index: 0, width: 256}, {access: "R", flags: {}, fixed: -1, index: 0, width:128},_,_,_,_],
1838 [{access: "W", flags: {}, fixed: -1, index: 0, width: 512}, {access: "R", flags: {}, fixed: -1, index: 0, width:256},_,_,_,_]
1839 ],
1840 Vmov4_1: [
1841 [{access: "W", flags: {}, fixed: -1, index: 0, width: 128}, {access: "R", flags: {}, fixed: -1, index: 0, width: 32},_,_,_,_],
1842 [{access: "W", flags: {}, fixed: -1, index: 0, width: 256}, {access: "R", flags: {}, fixed: -1, index: 0, width: 64},_,_,_,_],
1843 [{access: "W", flags: {}, fixed: -1, index: 0, width: 512}, {access: "R", flags: {}, fixed: -1, index: 0, width:128},_,_,_,_]
1844 ],
1845 Vmov8_1: [
1846 [{access: "W", flags: {}, fixed: -1, index: 0, width: 128}, {access: "R", flags: {}, fixed: -1, index: 0, width: 16},_,_,_,_],
1847 [{access: "W", flags: {}, fixed: -1, index: 0, width: 256}, {access: "R", flags: {}, fixed: -1, index: 0, width: 32},_,_,_,_],
1848 [{access: "W", flags: {}, fixed: -1, index: 0, width: 512}, {access: "R", flags: {}, fixed: -1, index: 0, width: 64},_,_,_,_]
1849 ]
1850 };
1851 }
1852
1853 run() {
1854 const insts = this.ctx.insts;
1855
1856 const noRmInfo = CxxUtils.struct(
1857 "InstDB::RWInfoRm::kCategory" + "None".padEnd(10),
1858 StringUtils.decToHex(0, 2),
1859 String(0).padEnd(2),
1860 CxxUtils.flags({}),
1861 "0"
1862 );
1863
1864 const noOpInfo = CxxUtils.struct(
1865 "0x0000000000000000u",
1866 "0x0000000000000000u",
1867 "0xFF",
1868 CxxUtils.struct(0),
1869 "0"
1870 );
1871
1872 this.rmInfoTable.addIndexed(noRmInfo);
1873 this.opInfoTable.addIndexed(noOpInfo);
1874
1875 insts.forEach((inst) => {
1876 // Alternate forms would only mess this up, so filter them out.
1877 const dbInsts = Filter.noAltForm(inst.dbInsts);
1878
1879 // The best we can do is to divide instructions that have 2 operands and others.
1880 // This gives us the highest chance of preventing special cases (which were not
1881 // entirely avoided).
1882 const o2Insts = dbInsts.filter((inst) => { return inst.operands.length === 2; });
1883 const oxInsts = dbInsts.filter((inst) => { return inst.operands.length !== 2; });
1884
1885 const rwInfoArray = [this.rwInfo(o2Insts), this.rwInfo(oxInsts)];
1886 const rmInfoArray = [this.rmInfo(o2Insts), this.rmInfo(oxInsts)];
1887
1888 for (var i = 0; i < 2; i++) {
1889 const rwInfo = rwInfoArray[i];
1890 const rmInfo = rmInfoArray[i];
1891
1892 const rwOps = rwInfo.rwOps;
1893 const rwOpsIndex = [];
1894 for (var j = 0; j < rwOps.length; j++) {
1895 const op = rwOps[j];
1896 if (!op) {
1897 rwOpsIndex.push(this.opInfoTable.addIndexed(noOpInfo));
1898 continue;
1899 }
1900
1901 const flags = {};
1902 const opAcc = op.access;
1903
1904 if (opAcc === "R") flags.Read = true;
1905 if (opAcc === "W") flags.Write = true;
1906 if (opAcc === "X") flags.RW = true;
1907 Lang.merge(flags, op.flags);
1908
1909 const rIndex = opAcc === "X" || opAcc === "R" ? op.index : -1;
1910 const rWidth = opAcc === "X" || opAcc === "R" ? op.width : -1;
1911 const wIndex = opAcc === "X" || opAcc === "W" ? op.index : -1;
1912 const wWidth = opAcc === "X" || opAcc === "W" ? op.width : -1;
1913
1914 const opData = CxxUtils.struct(
1915 this.byteMaskFromBitRanges([{ start: rIndex, end: rIndex + rWidth - 1 }]) + "u",
1916 this.byteMaskFromBitRanges([{ start: wIndex, end: wIndex + wWidth - 1 }]) + "u",
1917 StringUtils.decToHex(op.fixed === -1 ? 0xFF : op.fixed, 2),
1918 CxxUtils.struct(0),
1919 CxxUtils.flags(flags, function(flag) { return "OpRWInfo::k" + flag; })
1920 );
1921
1922 rwOpsIndex.push(this.opInfoTable.addIndexed(opData));
1923 }
1924
1925 const rmData = CxxUtils.struct(
1926 "InstDB::RWInfoRm::kCategory" + rmInfo.category.padEnd(10),
1927 StringUtils.decToHex(rmInfo.rmIndexes, 2),
1928 String(Math.max(rmInfo.memFixed, 0)).padEnd(2),
1929 CxxUtils.flags({ "InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous) }),
1930 rmInfo.memExtension === "None" ? "0" : "Features::k" + rmInfo.memExtension
1931 );
1932
1933 const rwData = CxxUtils.struct(
1934 "InstDB::RWInfo::kCategory" + rwInfo.category.padEnd(10),
1935 String(this.rmInfoTable.addIndexed(rmData)).padEnd(2),
1936 CxxUtils.struct(...(rwOpsIndex.map(function(item) { return String(item).padEnd(2); })))
1937 );
1938
1939 this.rwInfoIndex.push(this.rwInfoTable.addIndexed(rwData));
1940 }
1941 });
1942
1943 var s = "";
1944 s += "const uint8_t InstDB::rwInfoIndex[Inst::_kIdCount * 2] = {\n" + StringUtils.format(this.rwInfoIndex, kIndent, -1) + "\n};\n";
1945 s += "\n";
1946 s += "const InstDB::RWInfo InstDB::rwInfo[] = {\n" + StringUtils.format(this.rwInfoTable, kIndent, true) + "\n};\n";
1947 s += "\n";
1948 s += "const InstDB::RWInfoOp InstDB::rwInfoOp[] = {\n" + StringUtils.format(this.opInfoTable, kIndent, true) + "\n};\n";
1949 s += "\n";
1950 s += "const InstDB::RWInfoRm InstDB::rwInfoRm[] = {\n" + StringUtils.format(this.rmInfoTable, kIndent, true) + "\n};\n";
1951
1952 const size = this.rwInfoIndex.length +
1953 this.rwInfoTable.length * 8 +
1954 this.rmInfoTable.length * 4 +
1955 this.opInfoTable.length * 24;
1956
1957 this.inject("InstRWInfoTable", disclaimer(s), size);
1958 }
1959
1960 byteMaskFromBitRanges(ranges) {
1961 const arr = [];
1962 for (var i = 0; i < 64; i++)
1963 arr.push(0);
1964
1965 for (var i = 0; i < ranges.length; i++) {
1966 const start = ranges[i].start;
1967 const end = ranges[i].end;
1968
1969 if (start < 0)
1970 continue;
1971
1972 for (var j = start; j <= end; j++) {
1973 const bytePos = j >> 3;
1974 if (bytePos < 0 || bytePos >= arr.length)
1975 FAIL(`Range ${start}:${end} cannot be used to create a byte-mask`);
1976 arr[bytePos] = 1;
1977 }
1978 }
1979
1980 var s = "0x";
1981 for (var i = arr.length - 4; i >= 0; i -= 4) {
1982 const value = (arr[i + 3] << 3) | (arr[i + 2] << 2) | (arr[i + 1] << 1) | arr[i];
1983 s += value.toString(16).toUpperCase();
1984 }
1985 return s;
1986 }
1987
1988 // Read/Write Info
1989 // ---------------
1990
1991 rwInfo(dbInsts) {
1992 function nullOps() {
1993 return [null, null, null, null, null, null];
1994 }
1995
1996 function makeRwFromOp(op) {
1997 if (!op.isRegOrMem())
1998 return null;
1999
2000 return {
2001 access: op.read && op.write ? "X" : op.read ? "R" : op.write ? "W" : "?",
2002 flags: {},
2003 fixed: GenUtils.fixedRegOf(op.reg),
2004 index: op.rwxIndex,
2005 width: op.rwxWidth
2006 };
2007 }
2008
2009 function queryRwGeneric(dbInsts, step) {
2010 var rwOps = nullOps();
2011 for (var i = 0; i < dbInsts.length; i++) {
2012 const dbInst = dbInsts[i];
2013 const operands = dbInst.operands;
2014
2015 for (var j = 0; j < operands.length; j++) {
2016 const op = operands[j];
2017 if (!op.isRegOrMem())
2018 continue;
2019
2020 const opSize = op.isReg() ? op.regSize : op.memSize;
2021 var d = {
2022 access: op.read && op.write ? "X" : op.read ? "R" : op.write ? "W" : "?",
2023 flags: {},
2024 fixed: -1,
2025 index: -1,
2026 width: -1
2027 };
2028
2029 if (op.isReg())
2030 d.fixed = GenUtils.fixedRegOf(op.reg);
2031 else
2032 d.fixed = GenUtils.fixedRegOf(op.mem);
2033
2034 if (op.zext)
2035 d.flags.ZExt = true;
2036
2037 if ((step === -1 || step === j) || op.rwxIndex !== 0 || op.rwxWidth !== opSize) {
2038 d.index = op.rwxIndex;
2039 d.width = op.rwxWidth;
2040 }
2041
2042 if (d.fixed !== -1) {
2043 if (op.memSeg)
2044 d.flags.MemPhysId = true;
2045 else
2046 d.flags.RegPhysId = true;
2047 }
2048
2049 if (rwOps[j] === null) {
2050 rwOps[j] = d;
2051 }
2052 else {
2053 if (!Lang.deepEqExcept(rwOps[j], d, { "fixed": true, "flags": true }))
2054 return null;
2055
2056 if (rwOps[j].fixed === -1)
2057 rwOps[j].fixed = d.fixed;
2058 Lang.merge(rwOps[j].flags, d.flags);
2059 }
2060 }
2061 }
2062 return { category: "Generic", rwOps };
2063 }
2064
2065 function queryRwByData(dbInsts, rwOpsArray) {
2066 for (var i = 0; i < dbInsts.length; i++) {
2067 const dbInst = dbInsts[i];
2068 const operands = dbInst.operands;
2069 const rwOps = nullOps();
2070
2071 for (var j = 0; j < operands.length; j++)
2072 rwOps[j] = makeRwFromOp(operands[j])
2073
2074 var match = 0;
2075 for (var j = 0; j < rwOpsArray.length; j++)
2076 match |= Lang.deepEq(rwOps, rwOpsArray[j]);
2077
2078 if (!match)
2079 return false;
2080 }
2081
2082 return true;
2083 }
2084
2085 function dumpRwToData(dbInsts) {
2086 const out = [];
2087 for (var i = 0; i < dbInsts.length; i++) {
2088 const dbInst = dbInsts[i];
2089 const operands = dbInst.operands;
2090 const rwOps = nullOps();
2091
2092 for (var j = 0; j < operands.length; j++)
2093 rwOps[j] = makeRwFromOp(operands[j])
2094
2095 if (ArrayUtils.deepIndexOf(out, rwOps) !== -1)
2096 continue;
2097
2098 out.push(rwOps);
2099 }
2100 return out;
2101 }
2102
2103 // Some instructions are just special...
2104 const name = dbInsts.length ? dbInsts[0].name : "";
2105 if (name in this.rwCategoryByName)
2106 return { category: this.rwCategoryByName[name], rwOps: nullOps() };
2107
2108 // Generic rules.
2109 for (var i = -1; i <= 6; i++) {
2110 const rwInfo = queryRwGeneric(dbInsts, i);
2111 if (rwInfo)
2112 return rwInfo;
2113 }
2114
2115 // Specific rules.
2116 for (var k in this.rwCategoryByData)
2117 if (queryRwByData(dbInsts, this.rwCategoryByData[k]))
2118 return { category: k, rwOps: nullOps() };
2119
2120 // FAILURE: Missing data to categorize this instruction.
2121 if (name) {
2122 const items = dumpRwToData(dbInsts)
2123 console.log(`RW: ${dbInsts.length ? dbInsts[0].name : ""}:`);
2124 items.forEach((item) => {
2125 console.log(" " + JSON.stringify(item));
2126 });
2127 }
2128
2129 return null;
2130 }
2131
2132 // Reg/Mem Info
2133 // ------------
2134
2135 rmInfo(dbInsts) {
2136 const info = {
2137 category: "None",
2138 rmIndexes: this.rmReplaceableIndexes(dbInsts),
2139 memFixed: this.rmFixedSize(dbInsts),
2140 memAmbiguous: this.rmIsAmbiguous(dbInsts),
2141 memConsistent: this.rmIsConsistent(dbInsts),
2142 memExtension: this.rmExtension(dbInsts)
2143 };
2144
2145 if (info.memFixed !== -1)
2146 info.category = "Fixed";
2147 else if (info.memConsistent)
2148 info.category = "Consistent";
2149 else if (info.rmIndexes)
2150 info.category = this.rmReplaceableCategory(dbInsts);
2151
2152 return info;
2153 }
2154
2155 rmReplaceableCategory(dbInsts) {
2156 var category = null;
2157
2158 for (var i = 0; i < dbInsts.length; i++) {
2159 const dbInst = dbInsts[i];
2160 const operands = dbInst.operands;
2161
2162 var rs = -1;
2163 var ms = -1;
2164
2165 for (var j = 0; j < operands.length; j++) {
2166 const op = operands[j];
2167 if (op.isMem())
2168 ms = op.memSize;
2169 else if (op.isReg())
2170 rs = Math.max(rs, op.regSize);
2171 }
2172
2173 var c = (rs === -1 ) ? "None" :
2174 (ms === -1 ) ? "None" :
2175 (ms === rs ) ? "Fixed" :
2176 (ms === rs / 2) ? "Half" :
2177 (ms === rs / 4) ? "Quarter" :
2178 (ms === rs / 8) ? "Eighth" : "Unknown";
2179
2180 if (category === null)
2181 category = c;
2182 else if (category !== c) {
2183 if (dbInst.name === "mov" || dbInst.name === "vmovddup")
2184 return "None"; // Special case
2185 return StringUtils.capitalize(dbInst.name); // Special case.
2186 }
2187 }
2188
2189 if (category === "Unknown")
2190 console.log(`Instruction '${dbInsts[0].name}' has no RMInfo category.`);
2191
2192 return category || "Unknown";
2193 }
2194
2195 rmReplaceableIndexes(dbInsts) {
2196 function maskOf(inst, fn) {
2197 var m = 0;
2198 var operands = inst.operands;
2199 for (var i = 0; i < operands.length; i++)
2200 if (fn(operands[i]))
2201 m |= (1 << i);
2202 return m;
2203 }
2204
2205 function getRegIndexes(inst) { return maskOf(inst, function(op) { return op.isReg(); }); };
2206 function getMemIndexes(inst) { return maskOf(inst, function(op) { return op.isMem(); }); };
2207
2208 var mask = 0;
2209
2210 for (var i = 0; i < dbInsts.length; i++) {
2211 const dbInst = dbInsts[i];
2212
2213 var mi = getMemIndexes(dbInst);
2214 var ri = getRegIndexes(dbInst) & ~mi;
2215
2216 if (!mi)
2217 continue;
2218
2219 const match = dbInsts.some((inst) => {
2220 var ti = getRegIndexes(inst);
2221 return ((ri & ti) === ri && (mi & ti) === mi);
2222 });
2223
2224 if (!match)
2225 return 0;
2226 mask |= mi;
2227 }
2228
2229 return mask;
2230 }
2231
2232 rmFixedSize(insts) {
2233 var savedOp = null;
2234
2235 for (var i = 0; i < insts.length; i++) {
2236 const inst = insts[i];
2237 const operands = inst.operands;
2238
2239 for (var j = 0; j < operands.length; j++) {
2240 const op = operands[j];
2241 if (op.mem) {
2242 if (savedOp && savedOp.mem !== op.mem)
2243 return -1;
2244 savedOp = op;
2245 }
2246 }
2247 }
2248
2249 return savedOp ? Math.max(savedOp.memSize, 0) / 8 : -1;
2250 }
2251
2252 rmIsConsistent(insts) {
2253 var hasMem = 0;
2254 for (var i = 0; i < insts.length; i++) {
2255 const inst = insts[i];
2256 const operands = inst.operands;
2257 for (var j = 0; j < operands.length; j++) {
2258 const op = operands[j];
2259 if (op.mem) {
2260 hasMem = 1;
2261 if (!op.reg)
2262 return 0;
2263 if (asmdb.x86.Utils.regSize(op.reg) !== op.memSize)
2264 return 0;
2265 }
2266 }
2267 }
2268 return hasMem;
2269 }
2270
2271 rmIsAmbiguous(dbInsts) {
2272 function isAmbiguous(dbInsts) {
2273 const memMap = {};
2274 const immMap = {};
2275
2276 for (var i = 0; i < dbInsts.length; i++) {
2277 const dbInst = dbInsts[i];
2278 const operands = dbInst.operands;
2279
2280 var memStr = "";
2281 var immStr = "";
2282 var hasMem = false;
2283 var hasImm = false;
2284
2285 for (var j = 0; j < operands.length; j++) {
2286 const op = operands[j];
2287 if (j) {
2288 memStr += ", ";
2289 immStr += ", ";
2290 }
2291
2292 if (op.isImm()) {
2293 immStr += "imm";
2294 hasImm = true;
2295 }
2296 else {
2297 immStr += op.toString();
2298 }
2299
2300 if (op.mem) {
2301 memStr += "m";
2302 hasMem = true;
2303 }
2304 else {
2305 memStr += op.isImm() ? "imm" : op.toString();
2306 }
2307 }
2308
2309 if (hasImm) {
2310 if (immMap[immStr] === true)
2311 continue;
2312 immMap[immStr] = true;
2313 }
2314
2315 if (hasMem) {
2316 if (memMap[memStr] === true)
2317 return 1;
2318 memMap[memStr] = true;
2319 }
2320 }
2321 return 0;
2322 }
2323
2324 const uniqueInsts = Filter.unique(dbInsts);
2325
2326 // Special cases.
2327 if (!dbInsts.length)
2328 return 0;
2329
2330 if (NOT_MEM_AMBIGUOUS[dbInsts[0].name])
2331 return 0;
2332
2333 return (isAmbiguous(Filter.byArch(uniqueInsts, "X86")) << 0) |
2334 (isAmbiguous(Filter.byArch(uniqueInsts, "X64")) << 1) ;
2335 }
2336
2337 rmExtension(dbInsts) {
2338 if (!dbInsts.length)
2339 return "None";
2340
2341 const name = dbInsts[0].name;
2342 switch (name) {
2343 case "pextrw":
2344 return "SSE4_1";
2345
2346 case "vpslldq":
2347 case "vpsrldq":
2348 return "AVX512_BW";
2349
2350 default:
2351 return "None";
2352 }
2353 }
2354 }
2355
2356 // ============================================================================
2357 // [tablegen.x86.InstCommonTable]
2358 // ============================================================================
2359
2360 class InstCommonTable extends core.Task {
2361 constructor() {
2362 super("InstCommonTable", [
2363 "IdEnum",
2364 "NameTable",
2365 "InstSignatureTable",
2366 "InstCommonInfoTableB",
2367 "InstRWInfoTable"
2368 ]);
2369 }
2370
2371 run() {
2372 const insts = this.ctx.insts;
2373 const table = new IndexedArray();
2374
2375 insts.forEach((inst) => {
2376 const flags = inst.flags.map(function(flag) { return `F(${flag})`; }).join("|") || "0";
2377 const singleRegCase = `SINGLE_REG(${inst.singleRegCase})`;
2378 const controlType = `CONTROL(${inst.controlType})`;
2379
2380 const row = "{ " +
2381 String(flags ).padEnd(54) + ", " +
2382 String(inst.signatureIndex).padEnd( 3) + ", " +
2383 String(inst.signatureCount).padEnd( 2) + ", " +
2384 String(controlType ).padEnd(16) + ", " +
2385 String(singleRegCase ).padEnd(16) + ", " + "0 }";
2386 inst.commonInfoIndexA = table.addIndexed(row);
2387 });
2388
2389 var s = `#define F(VAL) InstDB::kFlag##VAL\n` +
2390 `#define CONTROL(VAL) Inst::kControl##VAL\n` +
2391 `#define SINGLE_REG(VAL) InstDB::kSingleReg##VAL\n` +
2392 `const InstDB::CommonInfo InstDB::_commonInfoTable[] = {\n${StringUtils.format(table, kIndent, true)}\n};\n` +
2393 `#undef SINGLE_REG\n` +
2394 `#undef CONTROL\n` +
2395 `#undef F\n`;
2396 this.inject("InstCommonTable", disclaimer(s), table.length * 8);
2397 }
2398 }
2399
2400 // ============================================================================
2401 // [Main]
2402 // ============================================================================
2403
2404 new X86TableGen()
2405 .addTask(new IdEnum())
2406 .addTask(new NameTable())
2407 .addTask(new AltOpcodeTable())
2408 .addTask(new InstSignatureTable())
2409 .addTask(new InstCommonInfoTableB())
2410 .addTask(new InstRWInfoTable())
2411 .addTask(new InstCommonTable())
2412 .run();
0 // [AsmJit]
1 // Machine Code Generation for C++.
2 //
3 // [License]
4 // ZLIB - See LICENSE.md file in the package.
5
6 // ============================================================================
7 // tablegen.js
8 //
9 // Provides core foundation for generating tables that AsmJit requires. This
10 // file should provide everything table generators need in general.
11 // ============================================================================
12
13 "use strict";
14
15 const VERBOSE = false;
16
17 // ============================================================================
18 // [Imports]
19 // ============================================================================
20
21 const fs = require("fs");
22 const hasOwn = Object.prototype.hasOwnProperty;
23
24 const asmdb = (function() {
25 // Try to import a local 'asmdb' package, if available.
26 try {
27 return require("./asmdb");
28 }
29 catch (ex) {
30 if (ex.code !== "MODULE_NOT_FOUND") {
31 console.log(`FATAL ERROR: ${ex.message}`);
32 throw ex;
33 }
34 }
35
36 // Try to import global 'asmdb' package as local package is not available.
37 return require("asmdb");
38 })();
39 exports.asmdb = asmdb;
40
41 // ============================================================================
42 // [Constants]
43 // ============================================================================
44
45 const kIndent = " ";
46 const kJustify = 119;
47 const kAsmJitRoot = "..";
48
49 exports.kIndent = kIndent;
50 exports.kJustify = kJustify;
51 exports.kAsmJitRoot = kAsmJitRoot;
52
53 // ============================================================================
54 // [Debugging]
55 // ============================================================================
56
57 function DEBUG(msg) {
58 if (VERBOSE)
59 console.log(msg);
60 }
61 exports.DEBUG = DEBUG;
62
63 function WARN(msg) {
64 console.log(msg);
65 }
66 exports.WARN = WARN;
67
68 function FAIL(msg) {
69 console.log(`FATAL ERROR: ${msg}`);
70 throw new Error(msg);
71 }
72 exports.FAIL = FAIL;
73
74 // ============================================================================
75 // [Lang]
76 // ============================================================================
77
78 function nop(x) { return x; }
79
80 class Lang {
81 static merge(a, b) {
82 if (a === b)
83 return a;
84
85 for (var k in b) {
86 var av = a[k];
87 var bv = b[k];
88
89 if (typeof av === "object" && typeof bv === "object")
90 Lang.merge(av, bv);
91 else
92 a[k] = bv;
93 }
94
95 return a;
96 }
97
98 static deepEq(a, b) {
99 if (a === b)
100 return true;
101
102 if (typeof a !== typeof b)
103 return false;
104
105 if (typeof a !== "object")
106 return a === b;
107
108 if (Array.isArray(a) || Array.isArray(b)) {
109 if (Array.isArray(a) !== Array.isArray(b))
110 return false;
111
112 const len = a.length;
113 if (b.length !== len)
114 return false;
115
116 for (var i = 0; i < len; i++)
117 if (!Lang.deepEq(a[i], b[i]))
118 return false;
119 }
120 else {
121 if (a === null || b === null)
122 return a === b;
123
124 for (var k in a)
125 if (!hasOwn.call(b, k) || !Lang.deepEq(a[k], b[k]))
126 return false;
127
128 for (var k in b)
129 if (!hasOwn.call(a, k))
130 return false;
131 }
132
133 return true;
134 }
135
136 static deepEqExcept(a, b, except) {
137 if (a === b)
138 return true;
139
140 if (typeof a !== "object" || typeof b !== "object" || Array.isArray(a) || Array.isArray(b))
141 return Lang.deepEq(a, b);
142
143 for (var k in a)
144 if (!hasOwn.call(except, k) && (!hasOwn.call(b, k) || !Lang.deepEq(a[k], b[k])))
145 return false;
146
147 for (var k in b)
148 if (!hasOwn.call(except, k) && !hasOwn.call(a, k))
149 return false;
150
151 return true;
152 }
153 }
154 exports.Lang = Lang;
155
156 // ============================================================================
157 // [StringUtils]
158 // ============================================================================
159
160 class StringUtils {
161 static asString(x) { return String(x); }
162
163 static capitalize(s) {
164 s = String(s);
165 return !s ? s : s[0].toUpperCase() + s.substr(1);
166 }
167
168 static trimLeft(s) { return s.replace(/^\s+/, ""); }
169 static trimRight(s) { return s.replace(/\s+$/, ""); }
170
171 static upFirst(s) {
172 if (!s) return "";
173 return s[0].toUpperCase() + s.substr(1);
174 }
175
176 static decToHex(n, nPad) {
177 var hex = Number(n < 0 ? 0x100000000 + n : n).toString(16);
178 while (nPad > hex.length)
179 hex = "0" + hex;
180 return "0x" + hex.toUpperCase();
181 }
182
183 static format(array, indent, showIndex, mapFn) {
184 if (!mapFn)
185 mapFn = StringUtils.asString;
186
187 var s = "";
188 var threshold = 80;
189
190 if (showIndex === -1)
191 s += indent;
192
193 for (var i = 0; i < array.length; i++) {
194 const item = array[i];
195 const last = i === array.length - 1;
196
197 if (showIndex !== -1)
198 s += indent;
199
200 s += mapFn(item);
201 if (showIndex > 0) {
202 s += `${last ? " " : ","} // #${i}`;
203 if (typeof array.refCountOf === "function")
204 s += ` [ref=${array.refCountOf(item)}x]`;
205 }
206 else if (!last) {
207 s += ",";
208 }
209
210 if (showIndex === -1) {
211 if (s.length >= threshold - 1 && !last) {
212 s += "\n" + indent;
213 threshold += 80;
214 }
215 else {
216 if (!last) s += " ";
217 }
218 }
219 else {
220 if (!last) s += "\n";
221 }
222 }
223
224 return s;
225 }
226
227 static makeCxxArray(array, code, indent) {
228 if (!indent) indent = kIndent;
229 return `${code} = {\n${indent}` + array.join(`,\n${indent}`) + `\n};\n`;
230 }
231
232 static makeCxxArrayWithComment(array, code, indent) {
233 if (!indent) indent = kIndent;
234 var s = "";
235 for (var i = 0; i < array.length; i++) {
236 const last = i === array.length - 1;
237 s += indent + array[i].data +
238 (last ? " // " : ", // ") + (array[i].refs ? "#" + String(i) : "").padEnd(5) + array[i].comment + "\n";
239 }
240 return `${code} = {\n${s}};\n`;
241 }
242
243 static disclaimer(s) {
244 return "// ------------------- Automatically generated, do not edit -------------------\n" +
245 s +
246 "// ----------------------------------------------------------------------------\n";
247 }
248
249 static indent(s, indentation) {
250 var lines = s.split(/\r?\n/g);
251 if (indentation) {
252 for (var i = 0; i < lines.length; i++) {
253 var line = lines[i];
254 if (line) lines[i] = indentation + line;
255 }
256 }
257
258 return lines.join("\n");
259 }
260
261 static inject(s, start, end, code) {
262 var iStart = s.indexOf(start);
263 var iEnd = s.indexOf(end);
264
265 if (iStart === -1)
266 FAIL(`Utils.inject(): Couldn't locate start mark '${start}'`);
267
268 if (iEnd === -1)
269 FAIL(`Utils.inject(): Couldn't locate end mark '${end}'`);
270
271 var nIndent = 0;
272 while (iStart > 0 && s[iStart-1] === " ") {
273 iStart--;
274 nIndent++;
275 }
276
277 if (nIndent) {
278 const indentation = " ".repeat(nIndent);
279 code = StringUtils.indent(code, indentation) + indentation;
280 }
281
282 return s.substr(0, iStart + start.length + nIndent) + code + s.substr(iEnd);
283 }
284
285 static makePriorityCompare(priorityArray) {
286 const map = Object.create(null);
287 priorityArray.forEach((str, index) => { map[str] = index; });
288
289 return function(a, b) {
290 const ax = hasOwn.call(map, a) ? map[a] : Infinity;
291 const bx = hasOwn.call(map, b) ? map[b] : Infinity;
292 return ax != bx ? ax - bx : a < b ? -1 : a > b ? 1 : 0;
293 }
294 }
295 }
296 exports.StringUtils = StringUtils;
297
298 // ============================================================================
299 // [ArrayUtils]
300 // ============================================================================
301
302 class ArrayUtils {
303 static min(arr, fn) {
304 if (!arr.length)
305 return null;
306
307 if (!fn)
308 fn = nop;
309
310 var v = fn(arr[0]);
311 for (var i = 1; i < arr.length; i++)
312 v = Math.min(v, fn(arr[i]));
313 return v;
314 }
315
316 static max(arr, fn) {
317 if (!arr.length)
318 return null;
319
320 if (!fn)
321 fn = nop;
322
323 var v = fn(arr[0]);
324 for (var i = 1; i < arr.length; i++)
325 v = Math.max(v, fn(arr[i]));
326 return v;
327 }
328
329 static sorted(obj, cmp) {
330 const out = Array.isArray(obj) ? obj.slice() : Object.getOwnPropertyNames(obj);
331 out.sort(cmp);
332 return out;
333 }
334
335 static deepIndexOf(arr, what) {
336 for (var i = 0; i < arr.length; i++)
337 if (Lang.deepEq(arr[i], what))
338 return i;
339 return -1;
340 }
341 }
342 exports.ArrayUtils = ArrayUtils;
343
344 // ============================================================================
345 // [MapUtils]
346 // ============================================================================
347
348 class MapUtils {
349 static clone(map) {
350 return Object.assign(Object.create(null), map);
351 }
352
353 static arrayToMap(arr, value) {
354 if (value === undefined)
355 value = true;
356
357 const out = Object.create(null);
358 for (var i = 0; i < arr.length; i++)
359 out[arr[i]] = value;
360 return out;
361 }
362
363 static equals(a, b) {
364 for (var k in a) if (!hasOwn.call(b, k)) return false;
365 for (var k in b) if (!hasOwn.call(a, k)) return false;
366 return true;
367 }
368
369 static firstOf(map, flags) {
370 for (var k in flags)
371 if (hasOwn.call(map, k))
372 return k;
373 return undefined;
374 }
375
376 static anyOf(map, flags) {
377 for (var k in flags)
378 if (hasOwn.call(map, k))
379 return true;
380 return false;
381 }
382
383 static add(a, b) {
384 for (var k in b)
385 a[k] = b[k];
386 return a;
387 }
388
389 static and(a, b) {
390 const out = Object.create(null);
391 for (var k in a)
392 if (hasOwn.call(b, k))
393 out[k] = true;
394 return out;
395 }
396
397 static xor(a, b) {
398 const out = Object.create(null);
399 for (var k in a) if (!hasOwn.call(b, k)) out[k] = true;
400 for (var k in b) if (!hasOwn.call(a, k)) out[k] = true;
401 return out;
402 }
403 };
404 exports.MapUtils = MapUtils;
405
406 // ============================================================================
407 // [CxxUtils]
408 // ============================================================================
409
410 class CxxUtils {
411 static flags(obj, fn) {
412 if (!fn)
413 fn = nop;
414
415 var out = "";
416 for (var k in obj) {
417 if (obj[k])
418 out += (out ? " | " : "") + fn(k);
419 }
420 return out ? out : "0";
421 }
422
423 static struct(...args) {
424 return "{ " + args.join(", ") + " }";
425 }
426 };
427 exports.CxxUtils = CxxUtils;
428
429 // ============================================================================
430 // [IndexedString]
431 // ============================================================================
432
433 // IndexedString is mostly used to merge all instruction names into a single
434 // string with external index. It's designed mostly for generating C++ tables.
435 //
436 // Consider the following cases in C++:
437 //
438 // a) static const char* const* instNames = { "add", "mov", "vpunpcklbw" };
439 //
440 // b) static const char instNames[] = { "add\0" "mov\0" "vpunpcklbw\0" };
441 // static const uint16_t instNameIndex[] = { 0, 4, 8 };
442 //
443 // The latter (b) has an advantage that it doesn't have to be relocated by the
444 // linker, which saves a lot of space in the resulting binary and a lot of CPU
445 // cycles (and memory) when the linker loads it. AsmJit supports thousands of
446 // instructions so each optimization like this makes it smaller and faster to
447 // load.
448 class IndexedString {
449 constructor() {
450 this.map = Object.create(null);
451 this.array = [];
452 this.size = -1;
453 }
454
455 add(s) {
456 this.map[s] = -1;
457 }
458
459 index() {
460 const map = this.map;
461 const array = this.array;
462 const partialMap = Object.create(null);
463
464 var k, kp;
465 var i, len;
466
467 // Create a map that will contain all keys and partial keys.
468 for (k in map) {
469 if (!k) {
470 partialMap[k] = k;
471 }
472 else {
473 for (i = 0, len = k.length; i < len; i++) {
474 kp = k.substr(i);
475 if (!hasOwn.call(partialMap, kp) || partialMap[kp].length < len)
476 partialMap[kp] = k;
477 }
478 }
479 }
480
481 // Create an array that will only contain keys that are needed.
482 for (k in map)
483 if (partialMap[k] === k)
484 array.push(k);
485 array.sort();
486
487 // Create valid offsets to the `array`.
488 var offMap = Object.create(null);
489 var offset = 0;
490
491 for (i = 0, len = array.length; i < len; i++) {
492 k = array[i];
493
494 offMap[k] = offset;
495 offset += k.length + 1;
496 }
497 this.size = offset;
498
499 // Assign valid offsets to `map`.
500 for (kp in map) {
501 k = partialMap[kp];
502 map[kp] = offMap[k] + k.length - kp.length;
503 }
504 }
505
506 format(indent, justify) {
507 if (this.size === -1)
508 FAIL(`IndexedString.format(): not indexed yet, call index()`);
509
510 const array = this.array;
511 if (!justify) justify = 0;
512
513 var i;
514 var s = "";
515 var line = "";
516
517 for (i = 0; i < array.length; i++) {
518 const item = "\"" + array[i] + ((i !== array.length - 1) ? "\\0\"" : "\";");
519 const newl = line + (line ? " " : indent) + item;
520
521 if (newl.length <= justify) {
522 line = newl;
523 continue;
524 }
525 else {
526 s += line + "\n";
527 line = indent + item;
528 }
529 }
530
531 return s + line;
532 }
533
534 getSize() {
535 if (this.size === -1)
536 FAIL(`IndexedString.getSize(): Not indexed yet, call index()`);
537 return this.size;
538 }
539
540 getIndex(k) {
541 if (this.size === -1)
542 FAIL(`IndexedString.getIndex(): Not indexed yet, call index()`);
543
544 if (!hasOwn.call(this.map, k))
545 FAIL(`IndexedString.getIndex(): Key '${k}' not found.`);
546
547 return this.map[k];
548 }
549 }
550 exports.IndexedString = IndexedString;
551
552 // ============================================================================
553 // [IndexedArray]
554 // ============================================================================
555
556 // IndexedArray is an Array replacement that allows to index each item inserted
557 // to it. Its main purpose is to avoid data duplication, if an item passed to
558 // `addIndexed()` is already within the Array then it's not inserted and the
559 // existing index is returned instead.
560 function IndexedArray_keyOf(item) {
561 return typeof item === "string" ? item : JSON.stringify(item);
562 }
563
564 class IndexedArray extends Array {
565 constructor() {
566 super();
567 this._index = Object.create(null);
568 }
569
570 refCountOf(item) {
571 const key = IndexedArray_keyOf(item);
572 const idx = this._index[key];
573
574 return idx !== undefined ? idx.refCount : 0;
575 }
576
577 addIndexed(item) {
578 const key = IndexedArray_keyOf(item);
579 var idx = this._index[key];
580
581 if (idx !== undefined) {
582 idx.refCount++;
583 return idx.data;
584 }
585
586 idx = this.length;
587 this._index[key] = {
588 data: idx,
589 refCount: 1
590 };
591 this.push(item);
592 return idx;
593 }
594 }
595 exports.IndexedArray = IndexedArray;
596
597 // ============================================================================
598 // [Task]
599 // ============================================================================
600
601 // A base runnable task that can access the TableGen through `this.ctx`.
602 class Task {
603 constructor(name, deps) {
604 this.ctx = null;
605 this.name = name || "";
606 this.deps = deps || [];
607 }
608
609 inject(key, str, size) {
610 this.ctx.inject(key, str, size);
611 return this;
612 }
613
614 run() {
615 FAIL("Task.run(): Must be reimplemented");
616 }
617 }
618 exports.Task = Task;
619
620 // ============================================================================
621 // [TableGen]
622 // ============================================================================
623
624 // Main context used to load, generate, and store instruction tables. The idea
625 // is to be extensible, so it stores 'Task's to be executed with minimal deps
626 // management.
627 class TableGen {
628 constructor(arch) {
629 this.arch = arch;
630 this.files = Object.create(null);
631 this.tableSizes = Object.create(null);
632
633 this.tasks = [];
634 this.taskMap = Object.create(null);
635
636 this.insts = [];
637 this.instMap = Object.create(null);
638
639 this.aliases = [];
640 this.aliasMem = Object.create(null);
641 }
642
643 // --------------------------------------------------------------------------
644 // [File Management]
645 // --------------------------------------------------------------------------
646
647 load(fileList) {
648 for (var i = 0; i < fileList.length; i++) {
649 const file = fileList[i];
650 const path = kAsmJitRoot + "/" + file;
651 const data = fs.readFileSync(path, "utf8").replace(/\r\n/g, "\n");
652
653 this.files[file] = {
654 prev: data,
655 data: data
656 };
657 }
658 return this;
659 }
660
661 save() {
662 for (var file in this.files) {
663 const obj = this.files[file];
664 if (obj.data !== obj.prev) {
665 const path = kAsmJitRoot + "/" + file;
666 console.log(`MODIFIED '${file}'`);
667
668 fs.writeFileSync(path + ".backup", obj.prev, "utf8");
669 fs.writeFileSync(path, obj.data, "utf8");
670 }
671 }
672 }
673
674 dataOfFile(file) {
675 const obj = this.files[file];
676 if (!obj)
677 FAIL(`TableGen.dataOfFile(): File '${file}' not loaded`);
678 return obj.data;
679 }
680
681 inject(key, str, size) {
682 const begin = "// ${" + key + ":Begin}\n";
683 const end = "// ${" + key + ":End}\n";
684
685 var done = false;
686 for (var file in this.files) {
687 const obj = this.files[file];
688 const data = obj.data;
689
690 if (data.indexOf(begin) !== -1) {
691 obj.data = StringUtils.inject(data, begin, end, str);
692 done = true;
693 break;
694 }
695 }
696
697 if (!done)
698 FAIL(`TableGen.inject(): Cannot find '${key}'`);
699
700 if (size)
701 this.tableSizes[key] = size;
702
703 return this;
704 }
705
706 // --------------------------------------------------------------------------
707 // [Task Management]
708 // --------------------------------------------------------------------------
709
710 addTask(task) {
711 if (!task.name)
712 FAIL(`TableGen.addModule(): Module must have a name`);
713
714 if (this.taskMap[task.name])
715 FAIL(`TableGen.addModule(): Module '${task.name}' already added`);
716
717 task.deps.forEach((dependency) => {
718 if (!this.taskMap[dependency])
719 FAIL(`TableGen.addModule(): Dependency '${dependency}' of module '${task.name}' doesn't exist`);
720 });
721
722 this.tasks.push(task);
723 this.taskMap[task.name] = task;
724
725 task.ctx = this;
726 return this;
727 }
728
729 runTasks() {
730 const tasks = this.tasks;
731 const tasksDone = Object.create(null);
732
733 var pending = tasks.length;
734 while (pending) {
735 const oldPending = pending;
736 const arrPending = [];
737
738 for (var i = 0; i < tasks.length; i++) {
739 const task = tasks[i];
740 if (tasksDone[task.name])
741 continue;
742
743 if (task.deps.every((dependency) => { return tasksDone[dependency] === true; })) {
744 task.run();
745 tasksDone[task.name] = true;
746 pending--;
747 }
748 else {
749 arrPending.push(task.name);
750 }
751 }
752
753 if (oldPending === pending)
754 throw Error(`TableGen.runModules(): Modules '${arrPending.join("|")}' stuck (cyclic dependency?)`);
755 }
756 }
757
758 // --------------------------------------------------------------------------
759 // [Instruction Management]
760 // --------------------------------------------------------------------------
761
762 addInst(inst) {
763 if (this.instMap[inst.name])
764 FAIL(`TableGen.addInst(): Instruction '${inst.name}' already added`);
765
766 inst.id = this.insts.length;
767 this.insts.push(inst);
768 this.instMap[inst.name] = inst;
769
770 return this;
771 }
772
773 addAlias(alias, name) {
774 this.aliases.push(alias);
775 this.aliasMap[alias] = name;
776
777 return this;
778 }
779
780 // --------------------------------------------------------------------------
781 // [Run]
782 // --------------------------------------------------------------------------
783
784 run() {
785 this.onBeforeRun();
786 this.runTasks();
787 this.onAfterRun();
788 }
789
790 // --------------------------------------------------------------------------
791 // [Other]
792 // --------------------------------------------------------------------------
793
794 dumpTableSizes() {
795 const sizes = this.tableSizes;
796
797 var pad = 26;
798 var total = 0;
799
800 for (var name in sizes) {
801 const size = sizes[name];
802 total += size;
803 console.log(("Size of " + name).padEnd(pad) + ": " + size);
804 }
805
806 console.log("Size of all tables".padEnd(pad) + ": " + total);
807 }
808
809 // --------------------------------------------------------------------------
810 // [Hooks]
811 // --------------------------------------------------------------------------
812
813 onBeforeRun() {}
814 onAfterRun() {}
815 }
816 exports.TableGen = TableGen;
817
818 // ============================================================================
819 // [IdEnum]
820 // ============================================================================
821
822 class IdEnum extends Task {
823 constructor(name, deps) {
824 super(name || "IdEnum", deps);
825 }
826
827 comment(name) {
828 FAIL("IdEnum.comment(): Must be reimplemented");
829 }
830
831 run() {
832 const insts = this.ctx.insts;
833
834 var s = "";
835 for (var i = 0; i < insts.length; i++) {
836 const inst = insts[i];
837
838 var line = "kId" + inst.enum + (i ? "" : " = 0") + ",";
839 var text = this.comment(inst);
840
841 if (text)
842 line = line.padEnd(37) + "//!< " + text;
843
844 s += line + "\n";
845 }
846 s += "_kIdCount\n";
847
848 return this.ctx.inject("InstId", s);
849 }
850 }
851 exports.IdEnum = IdEnum;
852
853 // ============================================================================
854 // [NameTable]
855 // ============================================================================
856
857 class NameTable extends Task {
858 constructor(name, deps) {
859 super(name || "NameTable", deps);
860 }
861
862 run() {
863 const arch = this.ctx.arch;
864 const none = "Inst::kIdNone";
865
866 const insts = this.ctx.insts;
867 const instNames = new IndexedString();
868
869 const instFirst = new Array(26);
870 const instLast = new Array(26);
871
872 var maxLength = 0;
873 for (var i = 0; i < insts.length; i++) {
874 const inst = insts[i];
875 instNames.add(inst.name);
876 maxLength = Math.max(maxLength, inst.name.length);
877 }
878 instNames.index();
879
880 for (var i = 0; i < insts.length; i++) {
881 const inst = insts[i];
882 const name = inst.name;
883 const nameIndex = instNames.getIndex(name);
884
885 const index = name.charCodeAt(0) - 'a'.charCodeAt(0);
886 if (index < 0 || index >= 26)
887 FAIL(`TableGen.generateNameData(): Invalid lookup character '${name[0]}' of '${name}'`);
888
889 inst.nameIndex = nameIndex;
890 if (instFirst[index] === undefined)
891 instFirst[index] = `Inst::kId${inst.enum}`;
892 instLast[index] = `Inst::kId${inst.enum}`;
893 }
894
895 var s = "";
896 s += `const char InstDB::_nameData[] =\n${instNames.format(kIndent, kJustify)}\n`;
897 s += `\n`;
898 s += `const InstDB::InstNameIndex InstDB::instNameIndex[26] = {\n`;
899 for (var i = 0; i < instFirst.length; i++) {
900 const firstId = instFirst[i] || none;
901 const lastId = instLast[i] || none;
902
903 s += ` { ${String(firstId).padEnd(22)}, ${String(lastId).padEnd(22)} + 1 }`;
904 if (i !== 26 - 1)
905 s += `,`;
906 s += `\n`;
907 }
908 s += `};\n`;
909
910 this.ctx.inject("NameLimits",
911 StringUtils.disclaimer(`enum : uint32_t { kMaxNameSize = ${maxLength} };\n`));
912
913 return this.ctx.inject("NameData", StringUtils.disclaimer(s), instNames.getSize() + 26 * 4);
914 }
915 }
916 exports.NameTable = NameTable;
0 #!/bin/sh
1
2 node ./tablegen-x86.js