|
|
|||
File indexing completed on 2026-05-07 08:53:40
0001 0002 /*---------------------------------------------------------------*/ 0003 /*--- begin libvex_ir.h ---*/ 0004 /*---------------------------------------------------------------*/ 0005 0006 /* 0007 This file is part of Valgrind, a dynamic binary instrumentation 0008 framework. 0009 0010 Copyright (C) 2004-2017 OpenWorks LLP 0011 info@open-works.net 0012 0013 This program is free software; you can redistribute it and/or 0014 modify it under the terms of the GNU General Public License as 0015 published by the Free Software Foundation; either version 3 of the 0016 License, or (at your option) any later version. 0017 0018 This program is distributed in the hope that it will be useful, but 0019 WITHOUT ANY WARRANTY; without even the implied warranty of 0020 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0021 General Public License for more details. 0022 0023 You should have received a copy of the GNU General Public License 0024 along with this program; if not, see <http://www.gnu.org/licenses/>. 0025 0026 The GNU General Public License is contained in the file COPYING. 0027 0028 Neither the names of the U.S. Department of Energy nor the 0029 University of California nor the names of its contributors may be 0030 used to endorse or promote products derived from this software 0031 without prior written permission. 0032 */ 0033 0034 #ifndef __LIBVEX_IR_H 0035 #define __LIBVEX_IR_H 0036 0037 #include "libvex_basictypes.h" 0038 0039 0040 /*---------------------------------------------------------------*/ 0041 /*--- High-level IR description ---*/ 0042 /*---------------------------------------------------------------*/ 0043 0044 /* Vex IR is an architecture-neutral intermediate representation. 0045 Unlike some IRs in systems similar to Vex, it is not like assembly 0046 language (ie. a list of instructions). Rather, it is more like the 0047 IR that might be used in a compiler. 0048 0049 Code blocks 0050 ~~~~~~~~~~~ 0051 The code is broken into small code blocks ("superblocks", type: 0052 'IRSB'). Each code block typically represents from 1 to perhaps 50 0053 instructions. IRSBs are single-entry, multiple-exit code blocks. 0054 Each IRSB contains three things: 0055 - a type environment, which indicates the type of each temporary 0056 value present in the IRSB 0057 - a list of statements, which represent code 0058 - a jump that exits from the end the IRSB 0059 Because the blocks are multiple-exit, there can be additional 0060 conditional exit statements that cause control to leave the IRSB 0061 before the final exit. Also because of this, IRSBs can cover 0062 multiple non-consecutive sequences of code (up to 3). These are 0063 recorded in the type VexGuestExtents (see libvex.h). 0064 0065 Statements and expressions 0066 ~~~~~~~~~~~~~~~~~~~~~~~~~~ 0067 Statements (type 'IRStmt') represent operations with side-effects, 0068 eg. guest register writes, stores, and assignments to temporaries. 0069 Expressions (type 'IRExpr') represent operations without 0070 side-effects, eg. arithmetic operations, loads, constants. 0071 Expressions can contain sub-expressions, forming expression trees, 0072 eg. (3 + (4 * load(addr1)). 0073 0074 Storage of guest state 0075 ~~~~~~~~~~~~~~~~~~~~~~ 0076 The "guest state" contains the guest registers of the guest machine 0077 (ie. the machine that we are simulating). It is stored by default 0078 in a block of memory supplied by the user of the VEX library, 0079 generally referred to as the guest state (area). To operate on 0080 these registers, one must first read ("Get") them from the guest 0081 state into a temporary value. Afterwards, one can write ("Put") 0082 them back into the guest state. 0083 0084 Get and Put are characterised by a byte offset into the guest 0085 state, a small integer which effectively gives the identity of the 0086 referenced guest register, and a type, which indicates the size of 0087 the value to be transferred. 0088 0089 The basic "Get" and "Put" operations are sufficient to model normal 0090 fixed registers on the guest. Selected areas of the guest state 0091 can be treated as a circular array of registers (type: 0092 'IRRegArray'), which can be indexed at run-time. This is done with 0093 the "GetI" and "PutI" primitives. This is necessary to describe 0094 rotating register files, for example the x87 FPU stack, SPARC 0095 register windows, and the Itanium register files. 0096 0097 Examples, and flattened vs. unflattened code 0098 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 0099 For example, consider this x86 instruction: 0100 0101 addl %eax, %ebx 0102 0103 One Vex IR translation for this code would be this: 0104 0105 ------ IMark(0x24F275, 7, 0) ------ 0106 t3 = GET:I32(0) # get %eax, a 32-bit integer 0107 t2 = GET:I32(12) # get %ebx, a 32-bit integer 0108 t1 = Add32(t3,t2) # addl 0109 PUT(0) = t1 # put %eax 0110 0111 (For simplicity, this ignores the effects on the condition codes, and 0112 the update of the instruction pointer.) 0113 0114 The "IMark" is an IR statement that doesn't represent actual code. 0115 Instead it indicates the address and length of the original 0116 instruction. The numbers 0 and 12 are offsets into the guest state 0117 for %eax and %ebx. The full list of offsets for an architecture 0118 <ARCH> can be found in the type VexGuest<ARCH>State in the file 0119 VEX/pub/libvex_guest_<ARCH>.h. 0120 0121 The five statements in this example are: 0122 - the IMark 0123 - three assignments to temporaries 0124 - one register write (put) 0125 0126 The six expressions in this example are: 0127 - two register reads (gets) 0128 - one arithmetic (add) operation 0129 - three temporaries (two nested within the Add32, one in the PUT) 0130 0131 The above IR is "flattened", ie. all sub-expressions are "atoms", 0132 either constants or temporaries. An equivalent, unflattened version 0133 would be: 0134 0135 PUT(0) = Add32(GET:I32(0), GET:I32(12)) 0136 0137 IR is guaranteed to be flattened at instrumentation-time. This makes 0138 instrumentation easier. Equivalent flattened and unflattened IR 0139 typically results in the same generated code. 0140 0141 Another example, this one showing loads and stores: 0142 0143 addl %edx,4(%eax) 0144 0145 This becomes (again ignoring condition code and instruction pointer 0146 updates): 0147 0148 ------ IMark(0x4000ABA, 3, 0) ------ 0149 t3 = Add32(GET:I32(0),0x4:I32) 0150 t2 = LDle:I32(t3) 0151 t1 = GET:I32(8) 0152 t0 = Add32(t2,t1) 0153 STle(t3) = t0 0154 0155 The "le" in "LDle" and "STle" is short for "little-endian". 0156 0157 No need for deallocations 0158 ~~~~~~~~~~~~~~~~~~~~~~~~~ 0159 Although there are allocation functions for various data structures 0160 in this file, there are no deallocation functions. This is because 0161 Vex uses a memory allocation scheme that automatically reclaims the 0162 memory used by allocated structures once translation is completed. 0163 This makes things easier for tools that instruments/transforms code 0164 blocks. 0165 0166 SSAness and typing 0167 ~~~~~~~~~~~~~~~~~~ 0168 The IR is fully typed. For every IRSB (IR block) it is possible to 0169 say unambiguously whether or not it is correctly typed. 0170 Incorrectly typed IR has no meaning and the VEX will refuse to 0171 process it. At various points during processing VEX typechecks the 0172 IR and aborts if any violations are found. This seems overkill but 0173 makes it a great deal easier to build a reliable JIT. 0174 0175 IR also has the SSA property. SSA stands for Static Single 0176 Assignment, and what it means is that each IR temporary may be 0177 assigned to only once. This idea became widely used in compiler 0178 construction in the mid to late 90s. It makes many IR-level 0179 transformations/code improvements easier, simpler and faster. 0180 Whenever it typechecks an IR block, VEX also checks the SSA 0181 property holds, and will abort if not so. So SSAness is 0182 mechanically and rigidly enforced. 0183 */ 0184 0185 /*---------------------------------------------------------------*/ 0186 /*--- Type definitions for the IR ---*/ 0187 /*---------------------------------------------------------------*/ 0188 0189 /* General comments about naming schemes: 0190 0191 All publically visible functions contain the name of the primary 0192 type on which they operate (IRFoo, IRBar, etc). Hence you should 0193 be able to identify these functions by grepping for "IR[A-Z]". 0194 0195 For some type 'IRFoo': 0196 0197 - ppIRFoo is the printing method for IRFoo, printing it to the 0198 output channel specified in the LibVEX_Initialise call. 0199 0200 - eqIRFoo is a structural equality predicate for IRFoos. 0201 0202 - deepCopyIRFoo is a deep copy constructor for IRFoos. 0203 It recursively traverses the entire argument tree and 0204 produces a complete new tree. All types have a deep copy 0205 constructor. 0206 0207 - shallowCopyIRFoo is the shallow copy constructor for IRFoos. 0208 It creates a new top-level copy of the supplied object, 0209 but does not copy any sub-objects. Only some types have a 0210 shallow copy constructor. 0211 */ 0212 0213 /* ------------------ Types ------------------ */ 0214 0215 /* A type indicates the size of a value, and whether it's an integer, a 0216 float, or a vector (SIMD) value. */ 0217 typedef 0218 enum { 0219 Ity_INVALID=0x1100, 0220 Ity_I1, 0221 Ity_I8, 0222 Ity_I16, 0223 Ity_I32, 0224 Ity_I64, 0225 Ity_I128, /* 128-bit scalar */ 0226 Ity_F16, /* 16 bit float */ 0227 Ity_F32, /* IEEE 754 float */ 0228 Ity_F64, /* IEEE 754 double */ 0229 Ity_D32, /* 32-bit Decimal floating point */ 0230 Ity_D64, /* 64-bit Decimal floating point */ 0231 Ity_D128, /* 128-bit Decimal floating point */ 0232 Ity_F128, /* 128-bit floating point; implementation defined */ 0233 Ity_V128, /* 128-bit SIMD */ 0234 Ity_V256 /* 256-bit SIMD */ 0235 } 0236 IRType; 0237 0238 /* Pretty-print an IRType */ 0239 extern void ppIRType ( IRType ); 0240 0241 /* Get the size (in bytes) of an IRType */ 0242 extern Int sizeofIRType ( IRType ); 0243 0244 /* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively. Asserts on 0245 any other input. */ 0246 extern IRType integerIRTypeOfSize ( Int szB ); 0247 0248 0249 /* ------------------ Endianness ------------------ */ 0250 0251 /* IREndness is used in load IRExprs and store IRStmts. */ 0252 typedef 0253 enum { 0254 Iend_LE=0x1200, /* little endian */ 0255 Iend_BE /* big endian */ 0256 } 0257 IREndness; 0258 0259 0260 /* ------------------ Constants ------------------ */ 0261 0262 /* IRConsts are used within 'Const' and 'Exit' IRExprs. */ 0263 0264 /* The various kinds of constant. */ 0265 typedef 0266 enum { 0267 Ico_U1=0x1300, 0268 Ico_U8, 0269 Ico_U16, 0270 Ico_U32, 0271 Ico_U64, 0272 Ico_U128, /* 128-bit restricted integer constant, 0273 same encoding scheme as V128 */ 0274 Ico_F32, /* 32-bit IEEE754 floating */ 0275 Ico_F32i, /* 32-bit unsigned int to be interpreted literally 0276 as a IEEE754 single value. */ 0277 Ico_F64, /* 64-bit IEEE754 floating */ 0278 Ico_F64i, /* 64-bit unsigned int to be interpreted literally 0279 as a IEEE754 double value. */ 0280 Ico_V128, /* 128-bit restricted vector constant, with 1 bit 0281 (repeated 8 times) for each of the 16 x 1-byte lanes */ 0282 Ico_V256 /* 256-bit restricted vector constant, with 1 bit 0283 (repeated 8 times) for each of the 32 x 1-byte lanes */ 0284 } 0285 IRConstTag; 0286 0287 /* A constant. Stored as a tagged union. 'tag' indicates what kind of 0288 constant this is. 'Ico' is the union that holds the fields. If an 0289 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant, 0290 and its value can be accessed with 'c.Ico.U32'. */ 0291 typedef 0292 struct _IRConst { 0293 IRConstTag tag; 0294 union { 0295 Bool U1; 0296 UChar U8; 0297 UShort U16; 0298 UInt U32; 0299 ULong U64; 0300 UShort U128; 0301 Float F32; 0302 UInt F32i; 0303 Double F64; 0304 ULong F64i; 0305 UShort V128; /* 16-bit value; see Ico_V128 comment above */ 0306 UInt V256; /* 32-bit value; see Ico_V256 comment above */ 0307 } Ico; 0308 } 0309 IRConst; 0310 0311 /* IRConst constructors */ 0312 extern IRConst* IRConst_U1 ( Bool ); 0313 extern IRConst* IRConst_U8 ( UChar ); 0314 extern IRConst* IRConst_U16 ( UShort ); 0315 extern IRConst* IRConst_U32 ( UInt ); 0316 extern IRConst* IRConst_U64 ( ULong ); 0317 extern IRConst* IRConst_U128 ( UShort ); 0318 extern IRConst* IRConst_F32 ( Float ); 0319 extern IRConst* IRConst_F32i ( UInt ); 0320 extern IRConst* IRConst_F64 ( Double ); 0321 extern IRConst* IRConst_F64i ( ULong ); 0322 extern IRConst* IRConst_V128 ( UShort ); 0323 extern IRConst* IRConst_V256 ( UInt ); 0324 0325 /* Deep-copy an IRConst */ 0326 extern IRConst* deepCopyIRConst ( const IRConst* ); 0327 0328 /* Pretty-print an IRConst */ 0329 extern void ppIRConst ( const IRConst* ); 0330 0331 /* Compare two IRConsts for equality */ 0332 extern Bool eqIRConst ( const IRConst*, const IRConst* ); 0333 0334 0335 /* ------------------ Call targets ------------------ */ 0336 0337 /* Describes a helper function to call. The name part is purely for 0338 pretty printing and not actually used. regparms=n tells the back 0339 end that the callee has been declared 0340 "__attribute__((regparm(n)))", although indirectly using the 0341 VEX_REGPARM(n) macro. On some targets (x86) the back end will need 0342 to construct a non-standard sequence to call a function declared 0343 like this. 0344 0345 mcx_mask is a sop to Memcheck. It indicates which args should be 0346 considered 'always defined' when lazily computing definedness of 0347 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to 0348 args[1], etc. If a bit is set, the corresponding arg is excluded 0349 (hence "x" in "mcx") from definedness checking. 0350 */ 0351 0352 typedef 0353 struct { 0354 Int regparms; 0355 const HChar* name; 0356 void* addr; 0357 UInt mcx_mask; 0358 } 0359 IRCallee; 0360 0361 /* Create an IRCallee. */ 0362 extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr ); 0363 0364 /* Deep-copy an IRCallee. */ 0365 extern IRCallee* deepCopyIRCallee ( const IRCallee* ); 0366 0367 /* Pretty-print an IRCallee. */ 0368 extern void ppIRCallee ( const IRCallee* ); 0369 0370 0371 /* ------------------ Guest state arrays ------------------ */ 0372 0373 /* This describes a section of the guest state that we want to 0374 be able to index at run time, so as to be able to describe 0375 indexed or rotating register files on the guest. */ 0376 typedef 0377 struct { 0378 Int base; /* guest state offset of start of indexed area */ 0379 IRType elemTy; /* type of each element in the indexed area */ 0380 Int nElems; /* number of elements in the indexed area */ 0381 } 0382 IRRegArray; 0383 0384 extern IRRegArray* mkIRRegArray ( Int, IRType, Int ); 0385 0386 extern IRRegArray* deepCopyIRRegArray ( const IRRegArray* ); 0387 0388 extern void ppIRRegArray ( const IRRegArray* ); 0389 extern Bool eqIRRegArray ( const IRRegArray*, const IRRegArray* ); 0390 0391 0392 /* ------------------ Temporaries ------------------ */ 0393 0394 /* This represents a temporary, eg. t1. The IR optimiser relies on the 0395 fact that IRTemps are 32-bit ints. Do not change them to be ints of 0396 any other size. */ 0397 typedef UInt IRTemp; 0398 0399 /* Pretty-print an IRTemp. */ 0400 extern void ppIRTemp ( IRTemp ); 0401 0402 #define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) 0403 0404 0405 /* --------------- Primops (arity 1,2,3 and 4) --------------- */ 0406 0407 /* Primitive operations that are used in Unop, Binop, Triop and Qop 0408 IRExprs. Once we take into account integer, floating point and SIMD 0409 operations of all the different sizes, there are quite a lot of them. 0410 Most instructions supported by the architectures that Vex supports 0411 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid) 0412 are not; they are instead handled with dirty helpers that emulate 0413 their functionality. Such obscure ones are thus not directly visible 0414 in the IR, but their effects on guest state (memory and registers) 0415 are made visible via the annotations in IRDirty structures. 0416 0417 2018-Dec-27: some of int<->fp conversion operations have been renamed so as 0418 to have a trailing _DEP, meaning "deprecated". This is because they don't 0419 specify a rounding mode to be used for the conversion and so are 0420 underspecified. Their use should be replaced with equivalents that do 0421 specify a rounding mode, either as a first argument or using a suffix on the 0422 name, that indicates the rounding mode to use. 0423 */ 0424 typedef 0425 enum { 0426 /* -- Do not change this ordering. The IR generators rely on 0427 (eg) Iop_Add64 == IopAdd8 + 3. -- */ 0428 0429 Iop_INVALID=0x1400, 0430 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, 0431 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, 0432 /* Signless mul. MullS/MullU is elsewhere. */ 0433 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64, 0434 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64, 0435 Iop_And8, Iop_And16, Iop_And32, Iop_And64, 0436 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64, 0437 /* Bitwise shift ops 0438 Semantics as per C standard: 0439 If the value of the right operand is negative or is greater 0440 than or equal to the width of the left operand, the behaviour is 0441 undefined. 0442 For Shl: The result of E1 << E2 is E1 left-shifted E2 bit positions. 0443 Vacated bits are filled with zeroes. 0444 For Shr: The result of E1 >> E2 is E1 right-shifted E2 bit positions. 0445 Vacated bits are filled with zeroes. 0446 For Sar: The result of E1 >> E2 is E1 right-shifted E2 bit positions. 0447 Vacated bits are filled with the most significant bit of E1 prior 0448 to shifting. */ 0449 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64, 0450 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64, 0451 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64, 0452 /* Integer comparisons. */ 0453 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64, 0454 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, 0455 /* Tags for unary ops */ 0456 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, 0457 0458 /* Exactly like CmpEQ8/16/32/64, but carrying the additional 0459 hint that these compute the success/failure of a CAS 0460 operation, and hence are almost certainly applied to two 0461 copies of the same value, which in turn has implications for 0462 Memcheck's instrumentation. */ 0463 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, 0464 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, 0465 0466 /* Exactly like CmpNE8/16/32/64, but carrying the additional 0467 hint that these needs expensive definedness tracking. */ 0468 Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64, 0469 0470 /* -- Ordering not important after here. -- */ 0471 0472 /* Widening multiplies */ 0473 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64, 0474 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64, 0475 0476 /* Counting bits */ 0477 /* Count leading/trailing zeroes, with "natural" semantics for the 0478 case where the input is zero: then the result is the number of bits 0479 in the word. */ 0480 Iop_ClzNat64, Iop_ClzNat32, 0481 Iop_CtzNat64, Iop_CtzNat32, 0482 /* Population count -- compute the number of 1 bits in the argument. */ 0483 Iop_PopCount64, Iop_PopCount32, 0484 0485 /* Standard integer comparisons */ 0486 Iop_CmpLT32S, Iop_CmpLT64S, 0487 Iop_CmpLE32S, Iop_CmpLE64S, 0488 Iop_CmpLT32U, Iop_CmpLT64U, 0489 Iop_CmpLE32U, Iop_CmpLE64U, 0490 0491 /* As a sop to Valgrind-Memcheck, the following are useful. */ 0492 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, 0493 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ 0494 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ 0495 Iop_Max32U, /* unsigned max */ 0496 0497 /* PowerPC-style 3-way integer comparisons. Without them it is 0498 difficult to simulate PPC efficiently. 0499 op(x,y) | x < y = 0x8 else 0500 | x > y = 0x4 else 0501 | x == y = 0x2 0502 */ 0503 Iop_CmpORD32U, Iop_CmpORD64U, 0504 Iop_CmpORD32S, Iop_CmpORD64S, 0505 0506 /* Division */ 0507 /* Semantics of division as per C standard: 0508 If the value of the divisor is zero, the behaviour is undefined. 0509 When integers are divided, the result of division is the algebraic 0510 quotient with any fractional part discarded. In other words: 0511 truncation towards zero. If the quotient a/b is representable, 0512 the expression (a/b)*b + a%b shall equal a; otherwise, the behaviour 0513 of division and modulo operation is undefined. */ 0514 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod) 0515 Iop_DivS32, // ditto, signed 0516 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) 0517 Iop_DivS64, // ditto, signed 0518 Iop_DivU128, // :: I128,I128 -> I128 (simple div, no mod) 0519 Iop_DivS128, // ditto, signed 0520 0521 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) 0522 // concat with 32 0's (low)) 0523 Iop_DivS32E, // ditto, signed 0524 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) 0525 // concat with 64 0's (low)) 0526 Iop_DivS64E, // ditto, signed 0527 Iop_DivU128E, // :: I128,I128 -> I128 (dividend is 128-bit arg (hi) 0528 // concat with 128 0's (low)) 0529 Iop_DivS128E, // ditto, signed 0530 0531 Iop_DivModU64to32, // :: I64,I32 -> I64 0532 // of which lo half is div and hi half is mod 0533 Iop_DivModS64to32, // ditto, signed 0534 0535 Iop_DivModU128to64, // :: I128,I64 -> I128 0536 // of which lo half is div and hi half is mod 0537 Iop_DivModS128to64, // ditto, signed 0538 0539 Iop_DivModS64to64, // :: I64,I64 -> I128 0540 // of which lo half is div and hi half is mod 0541 Iop_DivModU64to64, // :: I64,I64 -> I128 0542 // of which lo half is div and hi half is mod 0543 Iop_DivModS32to32, // :: I32,I32 -> I64 0544 // of which lo half is div and hi half is mod 0545 Iop_DivModU32to32, // :: I32,I32 -> I64 0546 // of which lo half is div and hi half is mod 0547 0548 Iop_ModU128, // :: I128,I128 -> I128 normal modulo operation 0549 Iop_ModS128, // ditto, signed 0550 0551 /* Integer conversions. Some of these are redundant (eg 0552 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but 0553 having a complete set reduces the typical dynamic size of IR 0554 and makes the instruction selectors easier to write. */ 0555 0556 /* Widening conversions */ 0557 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64, 0558 Iop_16Uto32, Iop_16Uto64, 0559 Iop_32Uto64, 0560 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64, 0561 Iop_16Sto32, Iop_16Sto64, 0562 Iop_32Sto64, 0563 0564 /* Narrowing conversions */ 0565 Iop_64to8, Iop_32to8, Iop_64to16, 0566 /* 8 <-> 16 bit conversions */ 0567 Iop_16to8, // :: I16 -> I8, low half 0568 Iop_16HIto8, // :: I16 -> I8, high half 0569 Iop_8HLto16, // :: (I8,I8) -> I16 0570 /* 16 <-> 32 bit conversions */ 0571 Iop_32to16, // :: I32 -> I16, low half 0572 Iop_32HIto16, // :: I32 -> I16, high half 0573 Iop_16HLto32, // :: (I16,I16) -> I32 0574 /* 32 <-> 64 bit conversions */ 0575 Iop_64to32, // :: I64 -> I32, low half 0576 Iop_64HIto32, // :: I64 -> I32, high half 0577 Iop_32HLto64, // :: (I32,I32) -> I64 0578 /* 64 <-> 128 bit conversions */ 0579 Iop_128to64, // :: I128 -> I64, low half 0580 Iop_128HIto64, // :: I128 -> I64, high half 0581 Iop_64HLto128, // :: (I64,I64) -> I128 0582 /* 1-bit stuff */ 0583 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */ 0584 Iop_And1, /* :: (Ity_Bit, Ity_Bit) -> Ity_Bit. Evaluates both args! */ 0585 Iop_Or1, /* :: (Ity_Bit, Ity_Bit) -> Ity_Bit. Evaluates both args! */ 0586 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ 0587 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */ 0588 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ 0589 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */ 0590 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */ 0591 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */ 0592 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */ 0593 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */ 0594 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */ 0595 0596 /* ------ Floating point. We try to be IEEE754 compliant. ------ */ 0597 0598 /* --- Simple stuff as mandated by 754. --- */ 0599 0600 /* Binary operations, with rounding. */ 0601 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 0602 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, 0603 0604 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ 0605 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, 0606 0607 /* Variants of the above which produce a 64-bit result but which 0608 round their result to a IEEE float range first. */ 0609 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 0610 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, 0611 0612 /* Unary operations, without rounding. */ 0613 /* :: F64 -> F64 */ 0614 Iop_NegF64, Iop_AbsF64, 0615 0616 /* :: F32 -> F32 */ 0617 Iop_NegF32, Iop_AbsF32, 0618 0619 /* :: F16 -> F16 */ 0620 Iop_NegF16, Iop_AbsF16, 0621 0622 /* Unary operations, with rounding. */ 0623 /* :: IRRoundingMode(I32) x F64 -> F64 */ 0624 Iop_SqrtF64, 0625 0626 /* :: IRRoundingMode(I32) x F32 -> F32 */ 0627 Iop_SqrtF32, 0628 0629 /* :: IRRoundingMode(I32) x F16 -> F16 */ 0630 Iop_SqrtF16, 0631 0632 /* :: IRRoundingMode(I32) x F16 x F16 -> F16 */ 0633 Iop_SubF16, Iop_AddF16, 0634 0635 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following: 0636 0x45 Unordered 0637 0x01 LT 0638 0x00 GT 0639 0x40 EQ 0640 This just happens to be the Intel encoding. The values 0641 are recorded in the type IRCmpF64Result. 0642 */ 0643 /* :: F64 x F64 -> IRCmpF64Result(I32) */ 0644 Iop_CmpF64, 0645 Iop_CmpF32, 0646 Iop_CmpF16, 0647 Iop_CmpF128, 0648 0649 /* --- Int to/from FP conversions. --- */ 0650 0651 /* For the most part, these take a first argument :: Ity_I32 (as 0652 IRRoundingMode) which is an indication of the rounding mode 0653 to use, as per the following encoding ("the standard 0654 encoding"): 0655 00b to nearest (the default) 0656 01b to -infinity 0657 10b to +infinity 0658 11b to zero 0659 This just happens to be the Intel encoding. For reference only, 0660 the PPC encoding is: 0661 00b to nearest (the default) 0662 01b to zero 0663 10b to +infinity 0664 11b to -infinity 0665 Any PPC -> IR front end will have to translate these PPC 0666 encodings, as encoded in the guest state, to the standard 0667 encodings, to pass to the primops. 0668 For reference only, the ARM VFP encoding is: 0669 00b to nearest 0670 01b to +infinity 0671 10b to -infinity 0672 11b to zero 0673 Again, this will have to be converted to the standard encoding 0674 to pass to primops. 0675 0676 If one of these conversions gets an out-of-range condition, 0677 or a NaN, as an argument, the result is host-defined. On x86 0678 the "integer indefinite" value 0x80..00 is produced. On PPC 0679 it is either 0x80..00 or 0x7F..FF depending on the sign of 0680 the argument. 0681 0682 On ARMvfp, when converting to a signed integer result, the 0683 overflow result is 0x80..00 for negative args and 0x7F..FF 0684 for positive args. For unsigned integer results it is 0685 0x00..00 and 0xFF..FF respectively. 0686 0687 Rounding is required whenever the destination type cannot 0688 represent exactly all values of the source type. 0689 */ 0690 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */ 0691 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */ 0692 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */ 0693 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */ 0694 0695 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */ 0696 0697 Iop_I32StoF64, /* signed I32 -> F64 */ 0698 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ 0699 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ 0700 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ 0701 0702 Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */ 0703 Iop_I32UtoF64, /* unsigned I32 -> F64 */ 0704 0705 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */ 0706 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */ 0707 Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */ 0708 Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */ 0709 0710 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */ 0711 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */ 0712 0713 /* Conversion between floating point formats */ 0714 Iop_F32toF64, /* F32 -> F64 */ 0715 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */ 0716 0717 /* Reinterpretation. Take an F32/64/128 and produce an I32/64/128 0718 with the same bit pattern, or vice versa. */ 0719 Iop_ReinterpV128asI128, Iop_ReinterpI128asV128, 0720 Iop_ReinterpF128asI128, Iop_ReinterpI128asF128, 0721 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64, 0722 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32, 0723 0724 /* Support for 128-bit floating point */ 0725 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */ 0726 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */ 0727 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */ 0728 0729 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */ 0730 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128, 0731 Iop_MAddF128, // (A * B) + C 0732 Iop_MSubF128, // (A * B) - C 0733 Iop_NegMAddF128, // -((A * B) + C) 0734 Iop_NegMSubF128, // -((A * B) - C) 0735 0736 /* :: F128 -> F128 */ 0737 Iop_NegF128, Iop_AbsF128, 0738 0739 /* :: IRRoundingMode(I32) x F128 -> F128 */ 0740 Iop_SqrtF128, 0741 0742 Iop_I32StoF128, /* signed I32 -> F128 */ 0743 Iop_I64StoF128, /* signed I64 -> F128 */ 0744 Iop_I32UtoF128, /* unsigned I32 -> F128 */ 0745 Iop_I64UtoF128, /* unsigned I64 -> F128 */ 0746 Iop_F32toF128, /* F32 -> F128 */ 0747 Iop_F64toF128, /* F64 -> F128 */ 0748 Iop_I128UtoF128, /* unsigned I128 -> F128 */ 0749 Iop_I128StoF128, /* signed I128 -> F128 */ 0750 0751 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */ 0752 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */ 0753 Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 0754 Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 0755 Iop_F128toI128S,/* IRRoundingMode(I32) x F128 -> signed I128 */ 0756 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */ 0757 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */ 0758 Iop_RndF128, /* IRRoundingMode(I32) x F128 -> F128 */ 0759 0760 /* Truncate to the specified value, source and result 0761 * are stroed in a F128 register. 0762 */ 0763 Iop_TruncF128toI32S, /* truncate F128 -> I32 */ 0764 Iop_TruncF128toI32U, /* truncate F128 -> I32 */ 0765 Iop_TruncF128toI64U, /* truncate F128 -> I64 */ 0766 Iop_TruncF128toI64S, /* truncate F128 -> I64 */ 0767 Iop_TruncF128toI128U, /* truncate F128 -> I128 */ 0768 Iop_TruncF128toI128S, /* truncate F128 -> I128 */ 0769 0770 /* --- guest x86/amd64 specifics, not mandated by 754. --- */ 0771 0772 /* Binary ops, with rounding. */ 0773 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 0774 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */ 0775 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */ 0776 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */ 0777 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */ 0778 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */ 0779 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */ 0780 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */ 0781 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */ 0782 /* Note that on x86 guest, PRem1{C3210} has the same behaviour 0783 as the IEEE mandated RemF64, except it is limited in the 0784 range of its operand. Hence the partialness. */ 0785 0786 /* Unary ops, with rounding. */ 0787 /* :: IRRoundingMode(I32) x F64 -> F64 */ 0788 Iop_SinF64, /* FSIN */ 0789 Iop_CosF64, /* FCOS */ 0790 Iop_TanF64, /* FTAN */ 0791 Iop_2xm1F64, /* (2^arg - 1.0) */ 0792 Iop_RoundF128toInt, /* F128 value to nearest integral value (still 0793 as F128) */ 0794 Iop_RoundF64toInt, /* F64 value to nearest integral value (still 0795 as F64) */ 0796 Iop_RoundF64toIntA0, /* As Iop_RoundF64toInt but ties to above zero*/ 0797 0798 Iop_RoundF64toIntE, /* As Iop_RoundF64toInt but ties to even */ 0799 Iop_RoundF32toInt, /* F32 value to nearest integral value (still 0800 as F32) */ 0801 Iop_RoundF32toIntA0, /* As Iop_RoundF32toInt but ties to above zero*/ 0802 Iop_RoundF32toIntE, /* As Iop_RoundF32toInt but ties to even */ 0803 0804 /* --- guest s390 specifics, not mandated by 754. --- */ 0805 0806 /* Fused multiply-add/sub */ 0807 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32 0808 (computes arg2 * arg3 +/- arg4) */ 0809 Iop_MAddF32, Iop_MSubF32, 0810 0811 /* --- guest ppc32/64 specifics, not mandated by 754. --- */ 0812 0813 /* Ternary operations, with rounding. */ 0814 /* Fused multiply-add/sub, with 112-bit intermediate 0815 precision for ppc. 0816 Also used to implement fused multiply-add/sub for s390. */ 0817 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 0818 (computes arg2 * arg3 +/- arg4) */ 0819 Iop_MAddF64, Iop_MSubF64, 0820 0821 /* Variants of the above which produce a 64-bit result but which 0822 round their result to a IEEE float range first. */ 0823 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ 0824 Iop_MAddF64r32, Iop_MSubF64r32, 0825 0826 /* :: F64 -> F64 */ 0827 Iop_RSqrtEst5GoodF64, /* reciprocal square root estimate, 5 good bits */ 0828 Iop_RoundF64toF64_NEAREST, /* frin */ 0829 Iop_RoundF64toF64_NegINF, /* frim */ 0830 Iop_RoundF64toF64_PosINF, /* frip */ 0831 Iop_RoundF64toF64_ZERO, /* friz */ 0832 0833 /* :: F64 -> F32 */ 0834 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */ 0835 0836 /* :: IRRoundingMode(I32) x F64 -> F64 */ 0837 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */ 0838 /* NB: pretty much the same as Iop_F64toF32, except no change 0839 of type. */ 0840 0841 /* --- guest arm64 specifics, not mandated by 754. --- */ 0842 0843 Iop_RecpExpF64, /* FRECPX d :: IRRoundingMode(I32) x F64 -> F64 */ 0844 Iop_RecpExpF32, /* FRECPX s :: IRRoundingMode(I32) x F32 -> F32 */ 0845 0846 /* --------- Possibly required by IEEE 754-2008. --------- */ 0847 0848 Iop_MaxNumF64, /* max, F64, numerical operand if other is a qNaN */ 0849 Iop_MinNumF64, /* min, F64, ditto */ 0850 Iop_MaxNumF32, /* max, F32, ditto */ 0851 Iop_MinNumF32, /* min, F32, ditto */ 0852 0853 /* ------------------ 16-bit scalar FP ------------------ */ 0854 0855 Iop_F16toF64, /* F16 -> F64 */ 0856 Iop_F64toF16, /* IRRoundingMode(I32) x F64 -> F16 */ 0857 0858 Iop_F16toF32, /* F16 -> F32 */ 0859 Iop_F32toF16, /* IRRoundingMode(I32) x F32 -> F16 */ 0860 0861 /* ------------------ 32-bit SIMD Integer ------------------ */ 0862 0863 /* 32x1 saturating add/sub (ok, well, not really SIMD :) */ 0864 Iop_QAdd32S, 0865 Iop_QSub32S, 0866 0867 /* 16x2 add/sub, also signed/unsigned saturating variants */ 0868 Iop_Add16x2, Iop_Sub16x2, 0869 Iop_QAdd16Sx2, Iop_QAdd16Ux2, 0870 Iop_QSub16Sx2, Iop_QSub16Ux2, 0871 0872 /* 16x2 signed/unsigned halving add/sub. For each lane, these 0873 compute bits 16:1 of (eg) sx(argL) + sx(argR), 0874 or zx(argL) - zx(argR) etc. */ 0875 Iop_HAdd16Ux2, Iop_HAdd16Sx2, 0876 Iop_HSub16Ux2, Iop_HSub16Sx2, 0877 0878 /* 8x4 add/sub, also signed/unsigned saturating variants */ 0879 Iop_Add8x4, Iop_Sub8x4, 0880 Iop_QAdd8Sx4, Iop_QAdd8Ux4, 0881 Iop_QSub8Sx4, Iop_QSub8Ux4, 0882 0883 /* 8x4 signed/unsigned halving add/sub. For each lane, these 0884 compute bits 8:1 of (eg) sx(argL) + sx(argR), 0885 or zx(argL) - zx(argR) etc. */ 0886 Iop_HAdd8Ux4, Iop_HAdd8Sx4, 0887 Iop_HSub8Ux4, Iop_HSub8Sx4, 0888 0889 /* 8x4 sum of absolute unsigned differences. */ 0890 Iop_Sad8Ux4, 0891 0892 /* MISC (vector integer cmp != 0) */ 0893 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4, 0894 0895 /* Byte swap in a 32-bit word */ 0896 Iop_Reverse8sIn32_x1, 0897 0898 /* ------------------ 64-bit SIMD FP ------------------------ */ 0899 0900 /* Conversion to/from int */ 0901 // Deprecated: these don't specify a rounding mode 0902 Iop_I32UtoF32x2_DEP, Iop_I32StoF32x2_DEP, /* I32x2 -> F32x2 */ 0903 0904 Iop_F32toI32Ux2_RZ, Iop_F32toI32Sx2_RZ, /* F32x2 -> I32x2 */ 0905 0906 /* Fixed32 format is floating-point number with fixed number of fraction 0907 bits. The number of fraction bits is passed as a second argument of 0908 type I8. */ 0909 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */ 0910 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */ 0911 0912 /* Binary operations */ 0913 Iop_Max32Fx2, Iop_Min32Fx2, 0914 /* Pairwise Min and Max. See integer pairwise operations for more 0915 details. */ 0916 Iop_PwMax32Fx2, Iop_PwMin32Fx2, 0917 /* Note: For the following compares, the arm front-end assumes a 0918 nan in a lane of either argument returns zero for that lane. */ 0919 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2, 0920 0921 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 0922 element in the operand vector, and places the results in the destination 0923 vector. */ 0924 Iop_RecipEst32Fx2, 0925 0926 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 0927 Note, that if one of the arguments is zero and another one is infinity 0928 of arbitrary sign the result of the operation is 2.0. */ 0929 Iop_RecipStep32Fx2, 0930 0931 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 0932 square root of each element in the operand vector. */ 0933 Iop_RSqrtEst32Fx2, 0934 0935 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 0936 Note, that of one of the arguments is zero and another one is infiinty 0937 of arbitrary sign the result of the operation is 1.5. */ 0938 Iop_RSqrtStep32Fx2, 0939 0940 /* Unary */ 0941 Iop_Neg32Fx2, Iop_Abs32Fx2, 0942 0943 /* ------------------ 64-bit SIMD Integer. ------------------ */ 0944 0945 /* MISC (vector integer cmp != 0) */ 0946 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2, 0947 0948 /* ADDITION (normal / unsigned sat / signed sat) */ 0949 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2, 0950 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1, 0951 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1, 0952 0953 /* PAIRWISE operations */ 0954 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 0955 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 0956 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2, 0957 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2, 0958 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2, 0959 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2, 0960 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2, 0961 /* Longening variant is unary. The resulting vector contains two times 0962 less elements than operand, but they are two times wider. 0963 Example: 0964 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 0965 where a+b and c+d are unsigned 32-bit values. */ 0966 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2, 0967 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2, 0968 0969 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 0970 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2, 0971 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1, 0972 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1, 0973 0974 /* ABSOLUTE VALUE */ 0975 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2, 0976 0977 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */ 0978 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2, 0979 Iop_Mul32Fx2, 0980 Iop_MulHi16Ux4, 0981 Iop_MulHi16Sx4, 0982 /* Plynomial multiplication treats it's arguments as coefficients of 0983 polynoms over {0, 1}. */ 0984 Iop_PolynomialMul8x8, 0985 0986 /* Vector Saturating Doubling Multiply Returning High Half and 0987 Vector Saturating Rounding Doubling Multiply Returning High Half */ 0988 /* These IROp's multiply corresponding elements in two vectors, double 0989 the results, and place the most significant half of the final results 0990 in the destination vector. The results are truncated or rounded. If 0991 any of the results overflow, they are saturated. */ 0992 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2, 0993 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2, 0994 0995 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 0996 Iop_Avg8Ux8, 0997 Iop_Avg16Ux4, 0998 0999 /* MIN/MAX */ 1000 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2, 1001 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2, 1002 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2, 1003 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2, 1004 1005 /* COMPARISON */ 1006 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2, 1007 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2, 1008 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, 1009 1010 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 1011 bit) */ 1012 Iop_Cnt8x8, 1013 Iop_Clz8x8, Iop_Clz16x4, Iop_Clz32x2, 1014 Iop_Cls8x8, Iop_Cls16x4, Iop_Cls32x2, 1015 Iop_Clz64x2, 1016 1017 /*Vector COUNT trailing zeros */ 1018 Iop_Ctz8x16, Iop_Ctz16x8, Iop_Ctz32x4, Iop_Ctz64x2, 1019 1020 /* VECTOR x VECTOR SHIFT / ROTATE */ 1021 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2, 1022 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2, 1023 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2, 1024 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1, 1025 1026 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 1027 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2, 1028 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2, 1029 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, 1030 1031 /* VECTOR x VECTOR SATURATING SHIFT */ 1032 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1, 1033 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1, 1034 /* VECTOR x INTEGER SATURATING SHIFT */ 1035 Iop_QShlNsatSU8x8, Iop_QShlNsatSU16x4, 1036 Iop_QShlNsatSU32x2, Iop_QShlNsatSU64x1, 1037 Iop_QShlNsatUU8x8, Iop_QShlNsatUU16x4, 1038 Iop_QShlNsatUU32x2, Iop_QShlNsatUU64x1, 1039 Iop_QShlNsatSS8x8, Iop_QShlNsatSS16x4, 1040 Iop_QShlNsatSS32x2, Iop_QShlNsatSS64x1, 1041 1042 /* NARROWING (binary) 1043 -- narrow 2xI64 into 1xI64, hi half from left arg */ 1044 /* For saturated narrowing, I believe there are 4 variants of 1045 the basic arithmetic operation, depending on the signedness 1046 of argument and result. Here are examples that exemplify 1047 what I mean: 1048 1049 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; 1050 return x[7:0]; 1051 1052 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128; 1053 if (x >s 127) x = 127; 1054 return x[7:0]; 1055 1056 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; 1057 return x[7:0]; 1058 1059 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0; 1060 if (x >s 255) x = 255; 1061 return x[7:0]; 1062 */ 1063 Iop_QNarrowBin16Sto8Ux8, 1064 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, 1065 Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4, 1066 1067 /* INTERLEAVING */ 1068 /* Interleave lanes from low or high halves of 1069 operands. Most-significant result lane is from the left 1070 arg. */ 1071 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2, 1072 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2, 1073 /* Interleave odd/even lanes of operands. Most-significant result lane 1074 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are 1075 identical to Interleave{HI,LO}32x2 and so are omitted.*/ 1076 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8, 1077 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4, 1078 1079 /* CONCATENATION -- build a new value by concatenating either 1080 the even or odd lanes of both operands. Note that 1081 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2 1082 and so are omitted. */ 1083 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4, 1084 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4, 1085 1086 /* GET / SET elements of VECTOR 1087 GET is binop (I64, I8) -> I<elem_size> 1088 SET is triop (I64, I8, I<elem_size>) -> I64 */ 1089 /* Note: the arm back-end handles only constant second argument */ 1090 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2, 1091 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2, 1092 1093 /* DUPLICATING -- copy value to all lanes */ 1094 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2, 1095 1096 /* SLICE -- produces the lowest 64 bits of (arg1:arg2) >> (8 * arg3). 1097 arg3 is a shift amount in bytes and may be between 0 and 8 1098 inclusive. When 0, the result is arg2; when 8, the result is arg1. 1099 Not all back ends handle all values. The arm32 and arm64 back 1100 ends handle only immediate arg3 values. */ 1101 Iop_Slice64, // (I64, I64, I8) -> I64 1102 1103 /* REVERSE the order of chunks in vector lanes. Chunks must be 1104 smaller than the vector lanes (obviously) and so may be 8-, 16- and 1105 32-bit in size. Note that the degenerate case, 1106 Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */ 1107 /* Examples: 1108 Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 1109 Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] 1110 Reverse8sIn64_x1([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */ 1111 Iop_Reverse8sIn16_x4, 1112 Iop_Reverse8sIn32_x2, Iop_Reverse16sIn32_x2, 1113 Iop_Reverse8sIn64_x1, Iop_Reverse16sIn64_x1, Iop_Reverse32sIn64_x1, 1114 1115 /* PERMUTING -- copy src bytes to dst, 1116 as indexed by control vector bytes: 1117 for i in 0 .. 7 . result[i] = argL[ argR[i] ] 1118 argR[i] values may only be in the range 0 .. 7, else behaviour 1119 is undefined. That is, argR[i][7:3] must be zero. */ 1120 Iop_Perm8x8, 1121 1122 /* PERMUTING with optional zeroing: 1123 for i in 0 .. 7 . result[i] = if argR[i] bit 7 is set 1124 then zero else argL[ argR[i] ] 1125 argR[i][6:3] must be zero, else behaviour is undefined. 1126 */ 1127 Iop_PermOrZero8x8, 1128 1129 /* MISC CONVERSION -- get high bits of each byte lane, a la 1130 x86/amd64 pmovmskb */ 1131 Iop_GetMSBs8x8, /* I64 -> I8 */ 1132 1133 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1134 See floating-point equivalents for details. */ 1135 Iop_RecipEst32Ux2, Iop_RSqrtEst32Ux2, 1136 1137 /* ------------------ Decimal Floating Point ------------------ */ 1138 1139 /* ARITHMETIC INSTRUCTIONS 64-bit 1140 ---------------------------------- 1141 IRRoundingMode(I32) X D64 X D64 -> D64 1142 */ 1143 Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64, 1144 1145 /* ARITHMETIC INSTRUCTIONS 128-bit 1146 ---------------------------------- 1147 IRRoundingMode(I32) X D128 X D128 -> D128 1148 */ 1149 Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128, 1150 1151 /* SHIFT SIGNIFICAND INSTRUCTIONS 1152 * The DFP significand is shifted by the number of digits specified 1153 * by the U8 operand. Digits shifted out of the leftmost digit are 1154 * lost. Zeros are supplied to the vacated positions on the right. 1155 * The sign of the result is the same as the sign of the original 1156 * operand. 1157 * 1158 * D64 x U8 -> D64 left shift and right shift respectively */ 1159 Iop_ShlD64, Iop_ShrD64, 1160 1161 /* D128 x U8 -> D128 left shift and right shift respectively */ 1162 Iop_ShlD128, Iop_ShrD128, 1163 1164 1165 /* FORMAT CONVERSION INSTRUCTIONS 1166 * D32 -> D64 1167 */ 1168 Iop_D32toD64, 1169 1170 /* D64 -> D128 */ 1171 Iop_D64toD128, 1172 1173 /* I32S -> D128 */ 1174 Iop_I32StoD128, 1175 1176 /* I32U -> D128 */ 1177 Iop_I32UtoD128, 1178 1179 /* I64S -> D128 */ 1180 Iop_I64StoD128, 1181 1182 /* I64U -> D128 */ 1183 Iop_I64UtoD128, 1184 1185 /* IRRoundingMode(I32) x I128S -> D128 */ 1186 Iop_I128StoD128, 1187 1188 /* IRRoundingMode(I32) x D64 -> D32 */ 1189 Iop_D64toD32, 1190 1191 /* IRRoundingMode(I32) x D128 -> D64 */ 1192 Iop_D128toD64, 1193 1194 /* I32S -> D64 */ 1195 Iop_I32StoD64, 1196 1197 /* I32U -> D64 */ 1198 Iop_I32UtoD64, 1199 1200 /* IRRoundingMode(I32) x I64 -> D64 */ 1201 Iop_I64StoD64, 1202 1203 /* IRRoundingMode(I32) x I64 -> D64 */ 1204 Iop_I64UtoD64, 1205 1206 /* IRRoundingMode(I32) x D64 -> I32 */ 1207 Iop_D64toI32S, 1208 1209 /* IRRoundingMode(I32) x D64 -> I32 */ 1210 Iop_D64toI32U, 1211 1212 /* IRRoundingMode(I32) x D64 -> I64 */ 1213 Iop_D64toI64S, 1214 1215 /* IRRoundingMode(I32) x D64 -> I64 */ 1216 Iop_D64toI64U, 1217 1218 /* IRRoundingMode(I32) x D128 -> I32 */ 1219 Iop_D128toI32S, 1220 1221 /* IRRoundingMode(I32) x D128 -> I32 */ 1222 Iop_D128toI32U, 1223 1224 /* IRRoundingMode(I32) x D128 -> I64 */ 1225 Iop_D128toI64S, 1226 1227 /* IRRoundingMode(I32) x D128 -> I64 */ 1228 Iop_D128toI64U, 1229 1230 /* IRRoundingMode(I32) x D128 -> I128 */ 1231 Iop_D128toI128S, 1232 1233 /* IRRoundingMode(I32) x F32 -> D32 */ 1234 Iop_F32toD32, 1235 1236 /* IRRoundingMode(I32) x F32 -> D64 */ 1237 Iop_F32toD64, 1238 1239 /* IRRoundingMode(I32) x F32 -> D128 */ 1240 Iop_F32toD128, 1241 1242 /* IRRoundingMode(I32) x F64 -> D32 */ 1243 Iop_F64toD32, 1244 1245 /* IRRoundingMode(I32) x F64 -> D64 */ 1246 Iop_F64toD64, 1247 1248 /* IRRoundingMode(I32) x F64 -> D128 */ 1249 Iop_F64toD128, 1250 1251 /* IRRoundingMode(I32) x F128 -> D32 */ 1252 Iop_F128toD32, 1253 1254 /* IRRoundingMode(I32) x F128 -> D64 */ 1255 Iop_F128toD64, 1256 1257 /* IRRoundingMode(I32) x F128 -> D128 */ 1258 Iop_F128toD128, 1259 1260 /* IRRoundingMode(I32) x D32 -> F32 */ 1261 Iop_D32toF32, 1262 1263 /* IRRoundingMode(I32) x D32 -> F64 */ 1264 Iop_D32toF64, 1265 1266 /* IRRoundingMode(I32) x D32 -> F128 */ 1267 Iop_D32toF128, 1268 1269 /* IRRoundingMode(I32) x D64 -> F32 */ 1270 Iop_D64toF32, 1271 1272 /* IRRoundingMode(I32) x D64 -> F64 */ 1273 Iop_D64toF64, 1274 1275 /* IRRoundingMode(I32) x D64 -> F128 */ 1276 Iop_D64toF128, 1277 1278 /* IRRoundingMode(I32) x D128 -> F32 */ 1279 Iop_D128toF32, 1280 1281 /* IRRoundingMode(I32) x D128 -> F64 */ 1282 Iop_D128toF64, 1283 1284 /* IRRoundingMode(I32) x D128 -> F128 */ 1285 Iop_D128toF128, 1286 1287 /* ROUNDING INSTRUCTIONS 1288 * IRRoundingMode(I32) x D64 -> D64 1289 * The D64 operand, if a finite number, it is rounded to a 1290 * floating point integer value, i.e. no fractional part. 1291 */ 1292 Iop_RoundD64toInt, 1293 1294 /* IRRoundingMode(I32) x D128 -> D128 */ 1295 Iop_RoundD128toInt, 1296 1297 /* COMPARE INSTRUCTIONS 1298 * D64 x D64 -> IRCmpD64Result(I32) */ 1299 Iop_CmpD64, 1300 1301 /* D128 x D128 -> IRCmpD128Result(I32) */ 1302 Iop_CmpD128, 1303 1304 /* COMPARE BIASED EXPONENET INSTRUCTIONS 1305 * D64 x D64 -> IRCmpD64Result(I32) */ 1306 Iop_CmpExpD64, 1307 1308 /* D128 x D128 -> IRCmpD128Result(I32) */ 1309 Iop_CmpExpD128, 1310 1311 /* QUANTIZE AND ROUND INSTRUCTIONS 1312 * The source operand is converted and rounded to the form with the 1313 * immediate exponent specified by the rounding and exponent parameter. 1314 * 1315 * The second operand is converted and rounded to the form 1316 * of the first operand's exponent and the rounded based on the specified 1317 * rounding mode parameter. 1318 * 1319 * IRRoundingMode(I32) x D64 x D64-> D64 */ 1320 Iop_QuantizeD64, 1321 1322 /* IRRoundingMode(I32) x D128 x D128 -> D128 */ 1323 Iop_QuantizeD128, 1324 1325 /* IRRoundingMode(I32) x I8 x D64 -> D64 1326 * The Decimal Floating point operand is rounded to the requested 1327 * significance given by the I8 operand as specified by the rounding 1328 * mode. 1329 */ 1330 Iop_SignificanceRoundD64, 1331 1332 /* IRRoundingMode(I32) x I8 x D128 -> D128 */ 1333 Iop_SignificanceRoundD128, 1334 1335 /* EXTRACT AND INSERT INSTRUCTIONS 1336 * D64 -> I64 1337 * The exponent of the D32 or D64 operand is extracted. The 1338 * extracted exponent is converted to a 64-bit signed binary integer. 1339 */ 1340 Iop_ExtractExpD64, 1341 1342 /* D128 -> I64 */ 1343 Iop_ExtractExpD128, 1344 1345 /* D64 -> I64 1346 * The number of significand digits of the D64 operand is extracted. 1347 * The number is stored as a 64-bit signed binary integer. 1348 */ 1349 Iop_ExtractSigD64, 1350 1351 /* D128 -> I64 */ 1352 Iop_ExtractSigD128, 1353 1354 /* I64 x D64 -> D64 1355 * The exponent is specified by the first I64 operand the signed 1356 * significand is given by the second I64 value. The result is a D64 1357 * value consisting of the specified significand and exponent whose 1358 * sign is that of the specified significand. 1359 */ 1360 Iop_InsertExpD64, 1361 1362 /* I64 x D128 -> D128 */ 1363 Iop_InsertExpD128, 1364 1365 /* Support for 128-bit DFP type */ 1366 Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64, 1367 1368 /* I64 -> I64 1369 * Convert 50-bit densely packed BCD string to 60 bit BCD string 1370 */ 1371 Iop_DPBtoBCD, 1372 1373 /* I64 -> I64 1374 * Convert 60 bit BCD string to 50-bit densely packed BCD string 1375 */ 1376 Iop_BCDtoDPB, 1377 1378 /* BCD arithmetic instructions, (V128, V128) -> V128 1379 * The BCD format is the same as that used in the BCD<->DPB conversion 1380 * routines, except using 124 digits (vs 60) plus the trailing 4-bit 1381 * signed code. */ 1382 Iop_BCDAdd, Iop_BCDSub, 1383 1384 /* Conversion signed 128-bit integer to signed BCD 128-bit */ 1385 Iop_I128StoBCD128, 1386 1387 /* Conversion signed BCD 128-bit to 128-bit integer */ 1388 Iop_BCD128toI128S, 1389 1390 /* Conversion I64 -> D64 */ 1391 Iop_ReinterpI64asD64, 1392 1393 /* Conversion D64 -> I64 */ 1394 Iop_ReinterpD64asI64, 1395 1396 /* ------------------ 128-bit SIMD FP. ------------------ */ 1397 1398 /* --- 16x8 vector FP --- */ 1399 1400 /* binary :: IRRoundingMode(I32) x V128 -> V128 */ 1401 Iop_Sqrt16Fx8, 1402 1403 /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ 1404 Iop_Add16Fx8, Iop_Sub16Fx8, 1405 1406 /* binary */ 1407 Iop_CmpLT16Fx8, Iop_CmpLE16Fx8, Iop_CmpEQ16Fx8, 1408 1409 /* unary */ 1410 Iop_Abs16Fx8, 1411 Iop_Neg16Fx8, 1412 1413 /* --- 32x4 vector FP --- */ 1414 1415 /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ 1416 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, 1417 1418 /* binary */ 1419 Iop_Max32Fx4, Iop_Min32Fx4, 1420 Iop_Add32Fx2, Iop_Sub32Fx2, 1421 /* Note: For the following compares, the ppc and arm front-ends assume a 1422 nan in a lane of either argument returns zero for that lane. */ 1423 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, 1424 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, 1425 1426 /* Pairwise Max and Min. See integer pairwise operations for details. */ 1427 Iop_PwMax32Fx4, Iop_PwMin32Fx4, 1428 1429 /* unary */ 1430 Iop_Abs32Fx4, 1431 Iop_Neg32Fx4, 1432 1433 /* binary :: IRRoundingMode(I32) x V128 -> V128 */ 1434 Iop_Sqrt32Fx4, 1435 1436 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 1437 element in the operand vector, and places the results in the 1438 destination vector. */ 1439 Iop_RecipEst32Fx4, 1440 1441 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 1442 Note, that if one of the arguments is zero and another one is infinity 1443 of arbitrary sign the result of the operation is 2.0. */ 1444 Iop_RecipStep32Fx4, 1445 1446 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 1447 square root of each element in the operand vector. */ 1448 Iop_RSqrtEst32Fx4, 1449 1450 /* Scaling of vector with a power of 2 (wd[i] <- ws[i] * 2^wt[i]) */ 1451 Iop_Scale2_32Fx4, 1452 1453 /* Vector floating-point base 2 logarithm */ 1454 Iop_Log2_32Fx4, 1455 1456 /* Vector floating-point exponential 2^x */ 1457 Iop_Exp2_32Fx4, 1458 1459 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 1460 Note, that of one of the arguments is zero and another one is infiinty 1461 of arbitrary sign the result of the operation is 1.5. */ 1462 Iop_RSqrtStep32Fx4, 1463 1464 /* --- Int to/from FP conversion --- */ 1465 /* Unlike the standard fp conversions, these irops take no 1466 rounding mode argument. Instead the irop trailers _R{M,P,N,Z} 1467 indicate the mode: {-inf, +inf, nearest, zero} respectively. */ 1468 1469 // These carry no rounding mode and are therefore deprecated 1470 Iop_I32UtoF32x4_DEP, Iop_I32StoF32x4_DEP, /* I32x4 -> F32x4 */ 1471 1472 Iop_I32StoF32x4, /* IRRoundingMode(I32) x V128 -> V128 */ 1473 Iop_F32toI32Sx4, /* IRRoundingMode(I32) x V128 -> V128 */ 1474 1475 Iop_F32toI32Ux4_RZ, Iop_F32toI32Sx4_RZ, /* F32x4 -> I32x4 */ 1476 Iop_QF32toI32Ux4_RZ, Iop_QF32toI32Sx4_RZ, /* F32x4 -> I32x4 (saturating) */ 1477 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ 1478 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ 1479 /* Fixed32 format is floating-point number with fixed number of fraction 1480 bits. The number of fraction bits is passed as a second argument of 1481 type I8. */ 1482 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */ 1483 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */ 1484 1485 /* --- Single to/from half conversion --- */ 1486 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */ 1487 // FIXME these carry no rounding mode 1488 Iop_F32toF16x4_DEP, /* F32x4(==V128) -> F16x4(==I64), NO ROUNDING MODE */ 1489 Iop_F32toF16x4, /* IRRoundingMode(I32) x V128 -> I64 */ 1490 Iop_F16toF32x4, /* F16x4 -> F32x4 */ 1491 1492 /* -- Double to/from half conversion -- */ 1493 Iop_F64toF16x2_DEP, // F64x2 -> F16x2, NO ROUNDING MODE 1494 Iop_F16toF64x2, 1495 1496 /* Values from two registers converted in smaller type and put in one 1497 IRRoundingMode(I32) x (F32x4 | F32x4) -> Q16x8 */ 1498 Iop_F32x4_2toQ16x8, 1499 1500 1501 /* --- 32x4 lowest-lane-only scalar FP --- */ 1502 1503 /* In binary cases, upper 3/4 is copied from first operand. In 1504 unary cases, upper 3/4 is copied from the operand. */ 1505 1506 /* binary */ 1507 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4, 1508 Iop_Max32F0x4, Iop_Min32F0x4, 1509 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4, 1510 1511 /* unary */ 1512 Iop_RecipEst32F0x4, Iop_Sqrt32F0x4, Iop_RSqrtEst32F0x4, 1513 1514 /* --- 64x2 vector FP --- */ 1515 1516 /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */ 1517 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, 1518 1519 /* binary */ 1520 Iop_Max64Fx2, Iop_Min64Fx2, 1521 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, 1522 1523 /* unary */ 1524 Iop_Abs64Fx2, 1525 Iop_Neg64Fx2, 1526 1527 /* binary :: IRRoundingMode(I32) x V128 -> V128 */ 1528 Iop_Sqrt64Fx2, 1529 1530 /* Scaling of vector with a power of 2 (wd[i] <- ws[i] * 2^wt[i]) */ 1531 Iop_Scale2_64Fx2, 1532 1533 /* Vector floating-point base 2 logarithm */ 1534 Iop_Log2_64Fx2, 1535 1536 /* see 32Fx4 variants for description */ 1537 Iop_RecipEst64Fx2, // unary 1538 Iop_RecipStep64Fx2, // binary 1539 Iop_RSqrtEst64Fx2, // unary 1540 Iop_RSqrtStep64Fx2, // binary 1541 1542 1543 /* Values from two registers converted in smaller type and put in one 1544 IRRoundingMode(I32) x (F64x2 | F64x2) -> Q32x4 */ 1545 Iop_F64x2_2toQ32x4, 1546 1547 /* --- 64x2 lowest-lane-only scalar FP --- */ 1548 1549 /* In binary cases, upper half is copied from first operand. In 1550 unary cases, upper half is copied from the operand. */ 1551 1552 /* binary */ 1553 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2, 1554 Iop_Max64F0x2, Iop_Min64F0x2, 1555 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2, 1556 1557 /* unary */ 1558 Iop_Sqrt64F0x2, 1559 1560 /* --- pack / unpack --- */ 1561 1562 /* 64 <-> 128 bit vector */ 1563 Iop_V128to64, // :: V128 -> I64, low half 1564 Iop_V128HIto64, // :: V128 -> I64, high half 1565 Iop_64HLtoV128, // :: (I64,I64) -> V128 1566 1567 Iop_64UtoV128, 1568 Iop_SetV128lo64, 1569 1570 /* Copies lower 64/32/16/8 bits, zeroes out the rest. */ 1571 Iop_ZeroHI64ofV128, // :: V128 -> V128 1572 Iop_ZeroHI96ofV128, // :: V128 -> V128 1573 Iop_ZeroHI112ofV128, // :: V128 -> V128 1574 Iop_ZeroHI120ofV128, // :: V128 -> V128 1575 1576 /* 32 <-> 128 bit vector */ 1577 Iop_32UtoV128, 1578 Iop_V128to32, // :: V128 -> I32, lowest lane 1579 Iop_SetV128lo32, // :: (V128,I32) -> V128 1580 1581 /* ------------------ 128-bit SIMD Integer. ------------------ */ 1582 1583 /* BITWISE OPS */ 1584 Iop_NotV128, 1585 Iop_AndV128, Iop_OrV128, Iop_XorV128, 1586 1587 /* VECTOR SHIFT (shift amt :: Ity_I8) */ 1588 Iop_ShlV128, Iop_ShrV128, Iop_SarV128, 1589 1590 /* MISC (vector integer cmp != 0) */ 1591 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, 1592 Iop_CmpNEZ128x1, 1593 1594 /* ADDITION (normal / U->U sat / S->S sat) */ 1595 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, Iop_Add128x1, 1596 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, 1597 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, 1598 1599 /* ADDITION, ARM64 specific saturating variants. */ 1600 /* Unsigned widen left arg, signed widen right arg, add, saturate S->S. 1601 This corresponds to SUQADD. */ 1602 Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, 1603 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2, 1604 /* Signed widen left arg, unsigned widen right arg, add, saturate U->U. 1605 This corresponds to USQADD. */ 1606 Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, 1607 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2, 1608 1609 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 1610 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, Iop_Sub128x1, 1611 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, 1612 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, 1613 1614 /* MULTIPLICATION (normal / high half of signed/unsigned) */ 1615 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, 1616 Iop_MulHi8Ux16, Iop_MulHi16Ux8, Iop_MulHi32Ux4, 1617 Iop_MulHi8Sx16, Iop_MulHi16Sx8, Iop_MulHi32Sx4, 1618 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ 1619 Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4, 1620 Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4, 1621 1622 /* Widening multiplies, all of the form (I64, I64) -> V128 */ 1623 Iop_Mull8Ux8, Iop_Mull8Sx8, 1624 Iop_Mull16Ux4, Iop_Mull16Sx4, 1625 Iop_Mull32Ux2, Iop_Mull32Sx2, 1626 1627 /* Signed doubling saturating widening multiplies, (I64, I64) -> V128 */ 1628 Iop_QDMull16Sx4, Iop_QDMull32Sx2, 1629 1630 /* Vector Saturating Doubling Multiply Returning High Half and 1631 Vector Saturating Rounding Doubling Multiply Returning High Half. 1632 These IROps multiply corresponding elements in two vectors, double 1633 the results, and place the most significant half of the final results 1634 in the destination vector. The results are truncated or rounded. If 1635 any of the results overflow, they are saturated. To be more precise, 1636 for each lane, the computed result is: 1637 QDMulHi: 1638 hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2 ) 1639 QRDMulHi: 1640 hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2 1641 +q (1 << (lane-width-in-bits - 1)) ) 1642 */ 1643 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, /* (V128, V128) -> V128 */ 1644 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, /* (V128, V128) -> V128 */ 1645 1646 /* Polynomial multiplication treats its arguments as 1647 coefficients of polynomials over {0, 1}. */ 1648 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ 1649 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ 1650 1651 /* Vector Polynomial multiplication add. (V128, V128) -> V128 1652 1653 *** Below is the algorithm for the instructions. These Iops could 1654 be emulated to get this functionality, but the emulation would 1655 be long and messy. 1656 1657 Example for polynomial multiply add for vector of bytes 1658 do i = 0 to 15 1659 prod[i].bit[0:14] <- 0 1660 srcA <- VR[argL].byte[i] 1661 srcB <- VR[argR].byte[i] 1662 do j = 0 to 7 1663 do k = 0 to j 1664 gbit <- srcA.bit[k] & srcB.bit[j-k] 1665 prod[i].bit[j] <- prod[i].bit[j] ^ gbit 1666 end 1667 end 1668 1669 do j = 8 to 14 1670 do k = j-7 to 7 1671 gbit <- (srcA.bit[k] & srcB.bit[j-k]) 1672 prod[i].bit[j] <- prod[i].bit[j] ^ gbit 1673 end 1674 end 1675 end 1676 1677 do i = 0 to 7 1678 VR[dst].hword[i] <- 0b0 || (prod[2×i] ^ prod[2×i+1]) 1679 end 1680 */ 1681 Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8, 1682 Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2, 1683 1684 /* PAIRWISE operations */ 1685 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 1686 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 1687 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4, 1688 Iop_PwAdd32Fx2, 1689 1690 /* Longening variant is unary. The resulting vector contains two times 1691 less elements than operand, but they are two times wider. 1692 Example: 1693 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 1694 where a+b and c+d are unsigned 32-bit values. */ 1695 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, Iop_PwAddL64Ux2, 1696 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, 1697 1698 /* This is amd64 PMADDUBSW, (V128, V128) -> V128. For each adjacent pair 1699 of bytes [a,b] in the first arg and [c,d] in the second, computes: 1700 signed/signed sat to 16 bits ( zxTo16(a) * sxTo16(b) 1701 + zxTo16(c) * sxTo16(d) ) 1702 This exists because it's frequently used and there's no reasonably 1703 concise way to express it using other IROps. 1704 */ 1705 Iop_PwExtUSMulQAdd8x16, 1706 1707 /* Other unary pairwise ops */ 1708 1709 /* Vector bit matrix transpose. (V128) -> V128 */ 1710 /* For each doubleword element of the source vector, an 8-bit x 8-bit 1711 * matrix transpose is performed. */ 1712 Iop_PwBitMtxXpose64x2, 1713 1714 /* ABSOLUTE VALUE */ 1715 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2, 1716 1717 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 1718 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, Iop_Avg64Ux2, 1719 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, Iop_Avg64Sx2, 1720 1721 /* MIN/MAX */ 1722 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2, 1723 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2, 1724 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2, 1725 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2, 1726 1727 /* COMPARISON */ 1728 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, 1729 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, 1730 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2, 1731 1732 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 1733 bit) */ 1734 Iop_Cnt8x16, 1735 Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4, 1736 Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4, 1737 1738 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 1739 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, 1740 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, 1741 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, 1742 1743 /* VECTOR x VECTOR SHIFT / ROTATE */ 1744 /* FIXME: I'm pretty sure the ARM32 front/back ends interpret these 1745 differently from all other targets. The intention is that 1746 the shift amount (2nd arg) is interpreted as unsigned and 1747 only the lowest log2(lane-bits) bits are relevant. But the 1748 ARM32 versions treat the shift amount as an 8 bit signed 1749 number. The ARM32 uses should be replaced by the relevant 1750 vector x vector bidirectional shifts instead. */ 1751 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, 1752 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, 1753 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, 1754 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, 1755 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2, 1756 1757 /* VECTOR x VECTOR SATURATING SHIFT */ 1758 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, 1759 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2, 1760 /* VECTOR x INTEGER SATURATING SHIFT */ 1761 Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8, 1762 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2, 1763 Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8, 1764 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2, 1765 Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8, 1766 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2, 1767 1768 /* VECTOR x VECTOR BIDIRECTIONAL SATURATING (& MAYBE ROUNDING) SHIFT */ 1769 /* All of type (V128, V128) -> V256. */ 1770 /* The least significant 8 bits of each lane of the second 1771 operand are used as the shift amount, and interpreted signedly. 1772 Positive values mean a shift left, negative a shift right. The 1773 result is signedly or unsignedly saturated. There are also 1774 rounding variants, which add 2^(shift_amount-1) to the value before 1775 shifting, but only in the shift-right case. Vacated positions 1776 are filled with zeroes. IOW, it's either SHR or SHL, but not SAR. 1777 1778 These operations return 129 bits: one bit ("Q") indicating whether 1779 saturation occurred, and the shift result. The result type is V256, 1780 of which the lower V128 is the shift result, and Q occupies the 1781 least significant bit of the upper V128. All other bits of the 1782 upper V128 are zero. */ 1783 // Unsigned saturation, no rounding 1784 Iop_QandUQsh8x16, Iop_QandUQsh16x8, 1785 Iop_QandUQsh32x4, Iop_QandUQsh64x2, 1786 // Signed saturation, no rounding 1787 Iop_QandSQsh8x16, Iop_QandSQsh16x8, 1788 Iop_QandSQsh32x4, Iop_QandSQsh64x2, 1789 1790 // Unsigned saturation, rounding 1791 Iop_QandUQRsh8x16, Iop_QandUQRsh16x8, 1792 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2, 1793 // Signed saturation, rounding 1794 Iop_QandSQRsh8x16, Iop_QandSQRsh16x8, 1795 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2, 1796 1797 /* VECTOR x VECTOR BIDIRECTIONAL (& MAYBE ROUNDING) SHIFT */ 1798 /* All of type (V128, V128) -> V128 */ 1799 /* The least significant 8 bits of each lane of the second 1800 operand are used as the shift amount, and interpreted signedly. 1801 Positive values mean a shift left, negative a shift right. 1802 There are also rounding variants, which add 2^(shift_amount-1) 1803 to the value before shifting, but only in the shift-right case. 1804 1805 For left shifts, the vacated places are filled with zeroes. 1806 For right shifts, the vacated places are filled with zeroes 1807 for the U variants and sign bits for the S variants. */ 1808 // Signed and unsigned, non-rounding 1809 Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2, 1810 Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2, 1811 1812 // Signed and unsigned, rounding 1813 Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2, 1814 Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2, 1815 1816 /* The least significant 8 bits of each lane of the second 1817 operand are used as the shift amount, and interpreted signedly. 1818 Positive values mean a shift left, negative a shift right. The 1819 result is signedly or unsignedly saturated. There are also 1820 rounding variants, which add 2^(shift_amount-1) to the value before 1821 shifting, but only in the shift-right case. Vacated positions 1822 are filled with zeroes. IOW, it's either SHR or SHL, but not SAR. 1823 */ 1824 1825 /* VECTOR x SCALAR SATURATING (& MAYBE ROUNDING) NARROWING SHIFT RIGHT */ 1826 /* All of type (V128, I8) -> V128 */ 1827 /* The first argument is shifted right, then narrowed to half the width 1828 by saturating it. The second argument is a scalar shift amount that 1829 applies to all lanes, and must be a value in the range 1 to lane_width. 1830 The shift may be done signedly (Sar variants) or unsignedly (Shr 1831 variants). The saturation is done according to the two signedness 1832 indicators at the end of the name. For example 64Sto32U means a 1833 signed 64 bit value is saturated into an unsigned 32 bit value. 1834 Additionally, the QRS variants do rounding, that is, they add the 1835 value (1 << (shift_amount-1)) to each source lane before shifting. 1836 1837 These operations return 65 bits: one bit ("Q") indicating whether 1838 saturation occurred, and the shift result. The result type is V128, 1839 of which the lower half is the shift result, and Q occupies the 1840 least significant bit of the upper half. All other bits of the 1841 upper half are zero. */ 1842 // No rounding, sat U->U 1843 Iop_QandQShrNnarrow16Uto8Ux8, 1844 Iop_QandQShrNnarrow32Uto16Ux4, Iop_QandQShrNnarrow64Uto32Ux2, 1845 // No rounding, sat S->S 1846 Iop_QandQSarNnarrow16Sto8Sx8, 1847 Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQSarNnarrow64Sto32Sx2, 1848 // No rounding, sat S->U 1849 Iop_QandQSarNnarrow16Sto8Ux8, 1850 Iop_QandQSarNnarrow32Sto16Ux4, Iop_QandQSarNnarrow64Sto32Ux2, 1851 1852 // Rounding, sat U->U 1853 Iop_QandQRShrNnarrow16Uto8Ux8, 1854 Iop_QandQRShrNnarrow32Uto16Ux4, Iop_QandQRShrNnarrow64Uto32Ux2, 1855 // Rounding, sat S->S 1856 Iop_QandQRSarNnarrow16Sto8Sx8, 1857 Iop_QandQRSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2, 1858 // Rounding, sat S->U 1859 Iop_QandQRSarNnarrow16Sto8Ux8, 1860 Iop_QandQRSarNnarrow32Sto16Ux4, Iop_QandQRSarNnarrow64Sto32Ux2, 1861 1862 /* NARROWING (binary) 1863 -- narrow 2xV128 into 1xV128, hi half from left arg */ 1864 /* See comments above w.r.t. U vs S issues in saturated narrowing. */ 1865 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8, 1866 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, 1867 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, 1868 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, 1869 Iop_QNarrowBin64Sto32Sx4, Iop_QNarrowBin64Uto32Ux4, 1870 Iop_NarrowBin64to32x4, 1871 1872 /* NARROWING (unary) -- narrow V128 into I64 */ 1873 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, 1874 /* Saturating narrowing from signed source to signed/unsigned 1875 destination */ 1876 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, 1877 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, 1878 /* Saturating narrowing from unsigned source to unsigned destination */ 1879 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2, 1880 1881 /* WIDENING -- sign or zero extend each element of the argument 1882 vector to the twice original size. The resulting vector consists of 1883 the same number of elements but each element and the vector itself 1884 are twice as wide. 1885 All operations are I64->V128. 1886 Example 1887 Iop_Widen32Sto64x2( [a, b] ) = [c, d] 1888 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */ 1889 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2, 1890 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2, 1891 1892 /* INTERLEAVING */ 1893 /* Interleave lanes from low or high halves of 1894 operands. Most-significant result lane is from the left 1895 arg. */ 1896 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 1897 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, 1898 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 1899 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, 1900 /* Interleave odd/even lanes of operands. Most-significant result lane 1901 is from the left arg. */ 1902 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16, 1903 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8, 1904 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4, 1905 1906 /* Pack even/odd lanes. */ 1907 Iop_PackOddLanes8x16, Iop_PackEvenLanes8x16, 1908 Iop_PackOddLanes16x8, Iop_PackEvenLanes16x8, 1909 Iop_PackOddLanes32x4, Iop_PackEvenLanes32x4, 1910 1911 /* CONCATENATION -- build a new value by concatenating either 1912 the even or odd lanes of both operands. Note that 1913 Cat{Odd,Even}Lanes64x2 are identical to Interleave{HI,LO}64x2 1914 and so are omitted. */ 1915 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4, 1916 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4, 1917 1918 /* GET elements of VECTOR 1919 GET is binop (V128, I8) -> I<elem_size> 1920 SET is triop (V128, I8, I<elem_size>) -> V128 */ 1921 /* Note: the arm back-end handles only constant second argument. */ 1922 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, 1923 Iop_SetElem8x16, Iop_SetElem16x8, Iop_SetElem32x4, Iop_SetElem64x2, 1924 1925 /* DUPLICATING -- copy value to all lanes */ 1926 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4, 1927 1928 /* SLICE -- produces the lowest 128 bits of (arg1:arg2) >> (8 * arg3). 1929 arg3 is a shift amount in bytes and may be between 0 and 16 1930 inclusive. When 0, the result is arg2; when 16, the result is arg1. 1931 Not all back ends handle all values. The arm64 back 1932 end handles only immediate arg3 values. */ 1933 Iop_SliceV128, // (V128, V128, I8) -> V128 1934 1935 /* REVERSE the order of chunks in vector lanes. Chunks must be 1936 smaller than the vector lanes (obviously) and so may be 8-, 1937 16- and 32-bit in size. See definitions of 64-bit SIMD 1938 versions above for examples. */ 1939 Iop_Reverse8sIn16_x8, 1940 Iop_Reverse8sIn32_x4, Iop_Reverse16sIn32_x4, 1941 Iop_Reverse8sIn64_x2, Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2, 1942 Iop_Reverse1sIn8_x16, /* Reverse bits in each byte lane. */ 1943 1944 /* PERMUTING -- copy src bytes to dst, 1945 as indexed by control vector bytes: 1946 for i in 0 .. 15 . result[i] = argL[ argR[i] ] 1947 argR[i] values may only be in the range 0 .. 15, else behaviour 1948 is undefined. That is, argR[i][7:4] must be zero. */ 1949 Iop_Perm8x16, 1950 Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */ 1951 1952 /* PERMUTING with optional zeroing: 1953 for i in 0 .. 15 . result[i] = if argR[i] bit 7 is set 1954 then zero else argL[ argR[i] ] 1955 argR[i][6:4] must be zero, else behaviour is undefined. 1956 */ 1957 Iop_PermOrZero8x16, 1958 1959 /* same, but Triop (argL consists of two 128-bit parts) */ 1960 /* correct range for argR values is 0..31 */ 1961 /* (V128, V128, V128) -> V128 */ 1962 /* (ArgL_first, ArgL_second, ArgR) -> result */ 1963 Iop_Perm8x16x2, 1964 1965 /* MISC CONVERSION -- get high bits of each byte lane, a la 1966 x86/amd64 pmovmskb */ 1967 Iop_GetMSBs8x16, /* V128 -> I16 */ 1968 1969 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1970 See floating-point equivalents for details. */ 1971 Iop_RecipEst32Ux4, Iop_RSqrtEst32Ux4, 1972 1973 /* 128-bit multipy by 10 instruction, result is lower 128-bits */ 1974 Iop_MulI128by10, 1975 1976 /* 128-bit multipy by 10 instruction, result is carry out from the MSB */ 1977 Iop_MulI128by10Carry, 1978 1979 /* 128-bit multipy by 10 instruction, result is lower 128-bits of the 1980 * source times 10 plus the carry in 1981 */ 1982 Iop_MulI128by10E, 1983 1984 /* 128-bit multipy by 10 instruction, result is carry out from the MSB 1985 * of the source times 10 plus the carry in 1986 */ 1987 Iop_MulI128by10ECarry, 1988 1989 /* 128-bit carry out from ((U64 * U64 -> U128) + (U64 * U64 -> U128)) */ 1990 Iop_2xMultU64Add128CarryOut, 1991 1992 /* ------------------ 256-bit SIMD Integer. ------------------ */ 1993 1994 /* Pack/unpack */ 1995 Iop_V256to64_0, // V256 -> I64, extract least significant lane 1996 Iop_V256to64_1, 1997 Iop_V256to64_2, 1998 Iop_V256to64_3, // V256 -> I64, extract most significant lane 1999 2000 Iop_64x4toV256, // (I64,I64,I64,I64)->V256 2001 // first arg is most significant lane 2002 2003 Iop_V256toV128_0, // V256 -> V128, less significant lane 2004 Iop_V256toV128_1, // V256 -> V128, more significant lane 2005 Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif 2006 2007 Iop_AndV256, 2008 Iop_OrV256, 2009 Iop_XorV256, 2010 Iop_NotV256, 2011 2012 /* MISC (vector integer cmp != 0) */ 2013 Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4, 2014 2015 Iop_Add8x32, Iop_Add16x16, Iop_Add32x8, Iop_Add64x4, 2016 Iop_Sub8x32, Iop_Sub16x16, Iop_Sub32x8, Iop_Sub64x4, 2017 2018 Iop_CmpEQ8x32, Iop_CmpEQ16x16, Iop_CmpEQ32x8, Iop_CmpEQ64x4, 2019 Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4, 2020 2021 Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4, 2022 Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4, 2023 Iop_SarN16x16, Iop_SarN32x8, 2024 2025 Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8, 2026 Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8, 2027 Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8, 2028 Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8, 2029 2030 Iop_Mul16x16, Iop_Mul32x8, 2031 Iop_MulHi16Ux16, Iop_MulHi16Sx16, 2032 2033 Iop_QAdd8Ux32, Iop_QAdd16Ux16, 2034 Iop_QAdd8Sx32, Iop_QAdd16Sx16, 2035 Iop_QSub8Ux32, Iop_QSub16Ux16, 2036 Iop_QSub8Sx32, Iop_QSub16Sx16, 2037 2038 Iop_Avg8Ux32, Iop_Avg16Ux16, 2039 2040 Iop_Perm32x8, 2041 2042 /* (V128, V128) -> V128 */ 2043 Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128, 2044 Iop_NCipherV128, Iop_NCipherLV128, 2045 2046 /* Hash instructions, Federal Information Processing Standards 2047 * Publication 180-3 Secure Hash Standard. */ 2048 /* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and 2049 * SIX are fields from the insn. See ISA 2.07 description of 2050 * vshasigmad and vshasigmaw insns.*/ 2051 Iop_SHA512, Iop_SHA256, 2052 2053 /* ------------------ 256-bit SIMD FP. ------------------ */ 2054 2055 /* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */ 2056 Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4, 2057 Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8, 2058 2059 Iop_I32StoF32x8, /* IRRoundingMode(I32) x V256 -> V256 */ 2060 Iop_F32toI32Sx8, /* IRRoundingMode(I32) x V256 -> V256 */ 2061 2062 Iop_F32toF16x8, /* IRRoundingMode(I32) x V256 -> V128 */ 2063 Iop_F16toF32x8, /* F16x8(==V128) -> F32x8(==V256) */ 2064 2065 Iop_Sqrt32Fx8, 2066 Iop_Sqrt64Fx4, 2067 Iop_RSqrtEst32Fx8, 2068 Iop_RecipEst32Fx8, 2069 2070 Iop_Max32Fx8, Iop_Min32Fx8, 2071 Iop_Max64Fx4, Iop_Min64Fx4, 2072 Iop_Rotx32, Iop_Rotx64, 2073 Iop_LAST /* must be the last enumerator */ 2074 } 2075 IROp; 2076 2077 /* Pretty-print an op. */ 2078 extern void ppIROp ( IROp ); 2079 2080 /* For a given operand return the types of its arguments and its result. */ 2081 extern void typeOfPrimop ( IROp op, 2082 /*OUTs*/ IRType* t_dst, IRType* t_arg1, 2083 IRType* t_arg2, IRType* t_arg3, IRType* t_arg4 ); 2084 2085 /* Might the given primop trap (eg, attempt integer division by zero)? If in 2086 doubt returns True. However, the vast majority of primops will never 2087 trap. */ 2088 extern Bool primopMightTrap ( IROp op ); 2089 2090 /* Encoding of IEEE754-specified rounding modes. 2091 Note, various front and back ends rely on the actual numerical 2092 values of these, so do not change them. */ 2093 typedef 2094 enum { 2095 Irrm_NEAREST = 0, // Round to nearest, ties to even 2096 Irrm_NegINF = 1, // Round to negative infinity 2097 Irrm_PosINF = 2, // Round to positive infinity 2098 Irrm_ZERO = 3, // Round toward zero 2099 Irrm_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0 2100 Irrm_PREPARE_SHORTER = 5, // Round to prepare for shorter 2101 // precision 2102 Irrm_AWAY_FROM_ZERO = 6, // Round to away from 0 2103 Irrm_NEAREST_TIE_TOWARD_0 = 7, // Round to nearest, ties towards 0 2104 Irrm_INVALID = 8 // Invalid mode 2105 } 2106 IRRoundingMode; 2107 2108 /* Binary floating point comparison result values. 2109 This is also derived from what IA32 does. */ 2110 typedef 2111 enum { 2112 Ircr_UN = 0x45, 2113 Ircr_LT = 0x01, 2114 Ircr_GT = 0x00, 2115 Ircr_EQ = 0x40 2116 } 2117 IRCmpFResult; 2118 2119 typedef IRCmpFResult IRCmpF32Result; 2120 typedef IRCmpFResult IRCmpF64Result; 2121 typedef IRCmpFResult IRCmpF128Result; 2122 2123 /* Decimal floating point result values. */ 2124 typedef IRCmpFResult IRCmpDResult; 2125 typedef IRCmpDResult IRCmpD64Result; 2126 typedef IRCmpDResult IRCmpD128Result; 2127 2128 /* ------------------ Expressions ------------------ */ 2129 2130 typedef struct _IRQop IRQop; /* forward declaration */ 2131 typedef struct _IRTriop IRTriop; /* forward declaration */ 2132 2133 2134 /* The different kinds of expressions. Their meaning is explained below 2135 in the comments for IRExpr. */ 2136 typedef 2137 enum { 2138 Iex_Binder=0x1900, 2139 Iex_Get, 2140 Iex_GetI, 2141 Iex_RdTmp, 2142 Iex_Qop, 2143 Iex_Triop, 2144 Iex_Binop, 2145 Iex_Unop, 2146 Iex_Load, 2147 Iex_Const, 2148 Iex_ITE, 2149 Iex_CCall, 2150 Iex_VECRET, 2151 Iex_GSPTR 2152 } 2153 IRExprTag; 2154 2155 /* An expression. Stored as a tagged union. 'tag' indicates what kind 2156 of expression this is. 'Iex' is the union that holds the fields. If 2157 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load 2158 expression, and the fields can be accessed with 2159 'e.Iex.Load.<fieldname>'. 2160 2161 For each kind of expression, we show what it looks like when 2162 pretty-printed with ppIRExpr(). 2163 */ 2164 typedef 2165 struct _IRExpr 2166 IRExpr; 2167 2168 struct _IRExpr { 2169 IRExprTag tag; 2170 union { 2171 /* Used only in pattern matching within Vex. Should not be seen 2172 outside of Vex. */ 2173 struct { 2174 Int binder; 2175 } Binder; 2176 2177 /* Read a guest register, at a fixed offset in the guest state. 2178 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0) 2179 */ 2180 struct { 2181 Int offset; /* Offset into the guest state */ 2182 IRType ty; /* Type of the value being read */ 2183 } Get; 2184 2185 /* Read a guest register at a non-fixed offset in the guest 2186 state. This allows circular indexing into parts of the guest 2187 state, which is essential for modelling situations where the 2188 identity of guest registers is not known until run time. One 2189 example is the x87 FP register stack. 2190 2191 The part of the guest state to be treated as a circular array 2192 is described in the IRRegArray 'descr' field. It holds the 2193 offset of the first element in the array, the type of each 2194 element, and the number of elements. 2195 2196 The array index is indicated rather indirectly, in a way 2197 which makes optimisation easy: as the sum of variable part 2198 (the 'ix' field) and a constant offset (the 'bias' field). 2199 2200 Since the indexing is circular, the actual array index to use 2201 is computed as (ix + bias) % num-of-elems-in-the-array. 2202 2203 Here's an example. The description 2204 2205 (96:8xF64)[t39,-7] 2206 2207 describes an array of 8 F64-typed values, the 2208 guest-state-offset of the first being 96. This array is 2209 being indexed at (t39 - 7) % 8. 2210 2211 It is important to get the array size/type exactly correct 2212 since IR optimisation looks closely at such info in order to 2213 establish aliasing/non-aliasing between separate GetI and 2214 PutI events, which is used to establish when they can be 2215 reordered, etc. Putting incorrect info in will lead to 2216 obscure IR optimisation bugs. 2217 2218 ppIRExpr output: GETI<descr>[<ix>,<bias] 2219 eg. GETI(128:8xI8)[t1,0] 2220 */ 2221 struct { 2222 IRRegArray* descr; /* Part of guest state treated as circular */ 2223 IRExpr* ix; /* Variable part of index into array */ 2224 Int bias; /* Constant offset part of index into array */ 2225 } GetI; 2226 2227 /* The value held by a temporary. 2228 ppIRExpr output: t<tmp>, eg. t1 2229 */ 2230 struct { 2231 IRTemp tmp; /* The temporary number */ 2232 } RdTmp; 2233 2234 /* A quaternary operation. 2235 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>), 2236 eg. MAddF64r32(t1, t2, t3, t4) 2237 */ 2238 struct { 2239 IRQop* details; 2240 } Qop; 2241 2242 /* A ternary operation. 2243 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>), 2244 eg. MulF64(1, 2.0, 3.0) 2245 */ 2246 struct { 2247 IRTriop* details; 2248 } Triop; 2249 2250 /* A binary operation. 2251 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2) 2252 */ 2253 struct { 2254 IROp op; /* op-code */ 2255 IRExpr* arg1; /* operand 1 */ 2256 IRExpr* arg2; /* operand 2 */ 2257 } Binop; 2258 2259 /* A unary operation. 2260 ppIRExpr output: <op>(<arg>), eg. Neg8(t1) 2261 */ 2262 struct { 2263 IROp op; /* op-code */ 2264 IRExpr* arg; /* operand */ 2265 } Unop; 2266 2267 /* A load from memory -- a normal load, not a load-linked. 2268 Load-Linkeds (and Store-Conditionals) are instead represented 2269 by IRStmt.LLSC since Load-Linkeds have side effects and so 2270 are not semantically valid IRExpr's. 2271 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1) 2272 */ 2273 struct { 2274 IREndness end; /* Endian-ness of the load */ 2275 IRType ty; /* Type of the loaded value */ 2276 IRExpr* addr; /* Address being loaded from */ 2277 } Load; 2278 2279 /* A constant-valued expression. 2280 ppIRExpr output: <con>, eg. 0x4:I32 2281 */ 2282 struct { 2283 IRConst* con; /* The constant itself */ 2284 } Const; 2285 2286 /* A call to a pure (no side-effects) helper C function. 2287 2288 With the 'cee' field, 'name' is the function's name. It is 2289 only used for pretty-printing purposes. The address to call 2290 (host address, of course) is stored in the 'addr' field 2291 inside 'cee'. 2292 2293 The 'args' field is a NULL-terminated array of arguments. 2294 The stated return IRType, and the implied argument types, 2295 must match that of the function being called well enough so 2296 that the back end can actually generate correct code for the 2297 call. 2298 2299 The called function **must** satisfy the following: 2300 2301 * no side effects -- must be a pure function, the result of 2302 which depends only on the passed parameters. 2303 2304 * it may not look at, nor modify, any of the guest state 2305 since that would hide guest state transitions from 2306 instrumenters 2307 2308 * it may not access guest memory, since that would hide 2309 guest memory transactions from the instrumenters 2310 2311 * it must not assume that arguments are being evaluated in a 2312 particular order. The oder of evaluation is unspecified. 2313 2314 This is restrictive, but makes the semantics clean, and does 2315 not interfere with IR optimisation. 2316 2317 If you want to call a helper which can mess with guest state 2318 and/or memory, instead use Ist_Dirty. This is a lot more 2319 flexible, but you have to give a bunch of details about what 2320 the helper does (and you better be telling the truth, 2321 otherwise any derived instrumentation will be wrong). Also 2322 Ist_Dirty inhibits various IR optimisations and so can cause 2323 quite poor code to be generated. Try to avoid it. 2324 2325 In principle it would be allowable to have the arg vector 2326 contain an IRExpr_VECRET(), although not IRExpr_GSPTR(). However, 2327 at the moment there is no requirement for clean helper calls to 2328 be able to return V128 or V256 values. Hence this is not allowed. 2329 2330 ppIRExpr output: <cee>(<args>):<retty> 2331 eg. foo{0x80489304}(t1, t2):I32 2332 */ 2333 struct { 2334 IRCallee* cee; /* Function to call. */ 2335 IRType retty; /* Type of return value. */ 2336 IRExpr** args; /* Vector of argument expressions. */ 2337 } CCall; 2338 2339 /* A ternary if-then-else operator. It returns iftrue if cond is 2340 nonzero, iffalse otherwise. Note that it is STRICT, ie. both 2341 iftrue and iffalse are evaluated in all cases. 2342 2343 ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>), 2344 eg. ITE(t6,t7,t8) 2345 */ 2346 struct { 2347 IRExpr* cond; /* Condition */ 2348 IRExpr* iftrue; /* True expression */ 2349 IRExpr* iffalse; /* False expression */ 2350 } ITE; 2351 } Iex; 2352 }; 2353 2354 /* Expression auxiliaries: a ternary expression. */ 2355 struct _IRTriop { 2356 IROp op; /* op-code */ 2357 IRExpr* arg1; /* operand 1 */ 2358 IRExpr* arg2; /* operand 2 */ 2359 IRExpr* arg3; /* operand 3 */ 2360 }; 2361 2362 /* Expression auxiliaries: a quarternary expression. */ 2363 struct _IRQop { 2364 IROp op; /* op-code */ 2365 IRExpr* arg1; /* operand 1 */ 2366 IRExpr* arg2; /* operand 2 */ 2367 IRExpr* arg3; /* operand 3 */ 2368 IRExpr* arg4; /* operand 4 */ 2369 }; 2370 2371 2372 /* Two special kinds of IRExpr, which can ONLY be used in 2373 argument lists for dirty helper calls (IRDirty.args) and in NO 2374 OTHER PLACES. And then only in very limited ways. */ 2375 2376 /* Denotes an argument which (in the helper) takes a pointer to a 2377 (naturally aligned) V128 or V256, into which the helper is expected 2378 to write its result. Use of IRExpr_VECRET() is strictly 2379 controlled. If the helper returns a V128 or V256 value then 2380 IRExpr_VECRET() must appear exactly once in the arg list, although 2381 it can appear anywhere, and the helper must have a C 'void' return 2382 type. If the helper returns any other type, IRExpr_VECRET() may 2383 not appear in the argument list. */ 2384 2385 /* Denotes an void* argument which is passed to the helper, which at 2386 run time will point to the thread's guest state area. This can 2387 only appear at most once in an argument list, and it may not appear 2388 at all in argument lists for clean helper calls. */ 2389 2390 static inline Bool is_IRExpr_VECRET_or_GSPTR ( const IRExpr* e ) { 2391 return e->tag == Iex_VECRET || e->tag == Iex_GSPTR; 2392 } 2393 2394 2395 /* Expression constructors. */ 2396 extern IRExpr* IRExpr_Binder ( Int binder ); 2397 extern IRExpr* IRExpr_Get ( Int off, IRType ty ); 2398 extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ); 2399 extern IRExpr* IRExpr_RdTmp ( IRTemp tmp ); 2400 extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, 2401 IRExpr* arg3, IRExpr* arg4 ); 2402 extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, 2403 IRExpr* arg2, IRExpr* arg3 ); 2404 extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); 2405 extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); 2406 extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ); 2407 extern IRExpr* IRExpr_Const ( IRConst* con ); 2408 extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ); 2409 extern IRExpr* IRExpr_ITE ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse ); 2410 extern IRExpr* IRExpr_VECRET ( void ); 2411 extern IRExpr* IRExpr_GSPTR ( void ); 2412 2413 /* Deep-copy an IRExpr. */ 2414 extern IRExpr* deepCopyIRExpr ( const IRExpr* ); 2415 2416 /* Pretty-print an IRExpr. */ 2417 extern void ppIRExpr ( const IRExpr* ); 2418 2419 /* Fold an IRExpr. Return folded result. */ 2420 extern IRExpr* foldIRExpr ( IRExpr**, IRExpr* ); 2421 2422 /* NULL-terminated IRExpr vector constructors, suitable for 2423 use as arg lists in clean/dirty helper calls. */ 2424 extern IRExpr** mkIRExprVec_0 ( void ); 2425 extern IRExpr** mkIRExprVec_1 ( IRExpr* ); 2426 extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* ); 2427 extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* ); 2428 extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 2429 extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2430 IRExpr* ); 2431 extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2432 IRExpr*, IRExpr* ); 2433 extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2434 IRExpr*, IRExpr*, IRExpr* ); 2435 extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2436 IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 2437 extern IRExpr** mkIRExprVec_9 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2438 IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 2439 extern IRExpr** mkIRExprVec_13 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2440 IRExpr*, IRExpr*, IRExpr*, IRExpr*, 2441 IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 2442 2443 /* IRExpr copiers: 2444 - shallowCopy: shallow-copy (ie. create a new vector that shares the 2445 elements with the original). 2446 - deepCopy: deep-copy (ie. create a completely new vector). */ 2447 extern IRExpr** shallowCopyIRExprVec ( IRExpr** ); 2448 extern IRExpr** deepCopyIRExprVec ( IRExpr *const * ); 2449 2450 /* Make a constant expression from the given host word taking into 2451 account (of course) the host word size. */ 2452 extern IRExpr* mkIRExpr_HWord ( HWord ); 2453 2454 /* Convenience function for constructing clean helper calls. */ 2455 extern 2456 IRExpr* mkIRExprCCall ( IRType retty, 2457 Int regparms, const HChar* name, void* addr, 2458 IRExpr** args ); 2459 2460 2461 /* Convenience functions for atoms (IRExprs which are either Iex_Tmp or 2462 * Iex_Const). */ 2463 static inline Bool isIRAtom ( const IRExpr* e ) { 2464 return e->tag == Iex_RdTmp || e->tag == Iex_Const; 2465 } 2466 2467 /* Are these two IR atoms identical? Causes an assertion 2468 failure if they are passed non-atoms. */ 2469 extern Bool eqIRAtom ( const IRExpr*, const IRExpr* ); 2470 2471 2472 /* ------------------ Jump kinds ------------------ */ 2473 2474 /* This describes hints which can be passed to the dispatcher at guest 2475 control-flow transfer points. 2476 2477 Re Ijk_InvalICache and Ijk_FlushDCache: the guest state _must_ have 2478 two pseudo-registers, guest_CMSTART and guest_CMLEN, which specify 2479 the start and length of the region to be invalidated. CM stands 2480 for "Cache Management". These are both the size of a guest word. 2481 It is the responsibility of the relevant toIR.c to ensure that 2482 these are filled in with suitable values before issuing a jump of 2483 kind Ijk_InvalICache or Ijk_FlushDCache. 2484 2485 Ijk_InvalICache requests invalidation of translations taken from 2486 the requested range. Ijk_FlushDCache requests flushing of the D 2487 cache for the specified range. 2488 2489 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a 2490 pseudo-register guest_EMNOTE, which is 32-bits regardless of the 2491 host or guest word size. That register should be made to hold a 2492 VexEmNote value to indicate the reason for the exit. 2493 2494 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code 2495 cannot continue) and so the jump destination can be anything. 2496 2497 Re Ijk_Sys_ (syscall jumps): the guest state must have a 2498 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest 2499 word. Front ends should set this to be the IP at the most recently 2500 executed kernel-entering (system call) instruction. This makes it 2501 very much easier (viz, actually possible at all) to back up the 2502 guest to restart a syscall that has been interrupted by a signal. 2503 2504 Re Ijk_Extension: the guest state must have the pseudo-register 2505 guest_IP_AT_SYSCALL, which is also used for Ijk_Sys_*. Front ends 2506 must set this to the current instruction address before jumping to 2507 an extension handler. 2508 */ 2509 typedef 2510 enum { 2511 Ijk_INVALID=0x1A00, 2512 Ijk_Boring, /* not interesting; just goto next */ 2513 Ijk_Call, /* guest is doing a call */ 2514 Ijk_Ret, /* guest is doing a return */ 2515 Ijk_ClientReq, /* do guest client req before continuing */ 2516 Ijk_Yield, /* client is yielding to thread scheduler */ 2517 Ijk_EmWarn, /* report emulation warning before continuing */ 2518 Ijk_EmFail, /* emulation critical (FATAL) error; give up */ 2519 Ijk_NoDecode, /* current instruction cannot be decoded */ 2520 Ijk_MapFail, /* Vex-provided address translation failed */ 2521 Ijk_InvalICache, /* Inval icache for range [CMSTART, +CMLEN) */ 2522 Ijk_FlushDCache, /* Flush dcache for range [CMSTART, +CMLEN) */ 2523 Ijk_NoRedir, /* Jump to un-redirected guest addr */ 2524 Ijk_SigILL, /* current instruction synths SIGILL */ 2525 Ijk_SigTRAP, /* current instruction synths SIGTRAP */ 2526 Ijk_SigSEGV, /* current instruction synths SIGSEGV */ 2527 Ijk_SigBUS, /* current instruction synths SIGBUS */ 2528 Ijk_SigFPE, /* current instruction synths generic SIGFPE */ 2529 Ijk_SigFPE_IntDiv, /* current instruction synths SIGFPE - IntDiv */ 2530 Ijk_SigFPE_IntOvf, /* current instruction synths SIGFPE - IntOvf */ 2531 /* Unfortunately, various guest-dependent syscall kinds. They 2532 all mean: do a syscall before continuing. */ 2533 Ijk_Sys_syscall, /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */ 2534 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ 2535 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ 2536 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ 2537 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */ 2538 Ijk_Sys_int145, /* amd64/x86 'int $0x91' */ 2539 Ijk_Sys_int210, /* amd64/x86 'int $0xD2' */ 2540 Ijk_Sys_sysenter, /* x86 'sysenter'. guest_EIP becomes 2541 invalid at the point this happens. */ 2542 Ijk_Extension, /* invoke guest-specific extension */ 2543 } 2544 IRJumpKind; 2545 2546 extern void ppIRJumpKind ( IRJumpKind ); 2547 2548 2549 /* ------------------ Dirty helper calls ------------------ */ 2550 2551 /* A dirty call is a flexible mechanism for calling (possibly 2552 conditionally) a helper function or procedure. The helper function 2553 may read, write or modify client memory, and may read, write or 2554 modify client state. It can take arguments and optionally return a 2555 value. It may return different results and/or do different things 2556 when called repeatedly with the same arguments, by means of storing 2557 private state. 2558 2559 If a value is returned, it is assigned to the nominated return 2560 temporary. 2561 2562 Dirty calls are statements rather than expressions for obvious 2563 reasons. If a dirty call is marked as writing guest state, any 2564 pre-existing values derived from the written parts of the guest 2565 state are invalid. Similarly, if the dirty call is stated as 2566 writing memory, any pre-existing loaded values are invalidated by 2567 it. 2568 2569 In order that instrumentation is possible, the call must state, and 2570 state correctly: 2571 2572 * Whether it reads, writes or modifies memory, and if so where. 2573 2574 * Whether it reads, writes or modifies guest state, and if so which 2575 pieces. Several pieces may be stated, and their extents must be 2576 known at translation-time. Each piece is allowed to repeat some 2577 number of times at a fixed interval, if required. 2578 2579 Normally, code is generated to pass just the args to the helper. 2580 However, if IRExpr_GSPTR() is present in the argument list (at most 2581 one instance is allowed), then the guest state pointer is passed for 2582 that arg, so that the callee can access the guest state. It is 2583 invalid for .nFxState to be zero but IRExpr_GSPTR() to be present, 2584 since .nFxState==0 is a claim that the call does not access guest 2585 state. 2586 2587 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The 2588 arguments and 'mFx' are evaluated REGARDLESS of the guard value. 2589 The order of argument evaluation is unspecified. The guard 2590 expression is evaluated AFTER the arguments and 'mFx' have been 2591 evaluated. 'mFx' is expected (by Memcheck) to be a defined value 2592 even if the guard evaluates to false. 2593 */ 2594 2595 #define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */ 2596 2597 /* Effects on resources (eg. registers, memory locations) */ 2598 typedef 2599 enum { 2600 Ifx_None=0x1B00, /* no effect */ 2601 Ifx_Read, /* reads the resource */ 2602 Ifx_Write, /* writes the resource */ 2603 Ifx_Modify, /* modifies the resource */ 2604 } 2605 IREffect; 2606 2607 /* Pretty-print an IREffect */ 2608 extern void ppIREffect ( IREffect ); 2609 2610 typedef 2611 struct _IRDirty { 2612 /* What to call, and details of args/results. .guard must be 2613 non-NULL. If .tmp is not IRTemp_INVALID, then the call 2614 returns a result which is placed in .tmp. If at runtime the 2615 guard evaluates to false, .tmp has an 0x555..555 bit pattern 2616 written to it. Hence conditional calls that assign .tmp are 2617 allowed. */ 2618 IRCallee* cee; /* where to call */ 2619 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */ 2620 /* The args vector may contain IRExpr_GSPTR() and/or 2621 IRExpr_VECRET(), in both cases, at most once. */ 2622 IRExpr** args; /* arg vector, ends in NULL. */ 2623 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */ 2624 2625 /* Mem effects; we allow only one R/W/M region to be stated */ 2626 IREffect mFx; /* indicates memory effects, if any */ 2627 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */ 2628 Int mSize; /* of access, or zero if mFx==Ifx_None */ 2629 2630 /* Guest state effects; up to N allowed */ 2631 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */ 2632 struct { 2633 IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */ 2634 UShort offset; 2635 UShort size; 2636 UChar nRepeats; 2637 UChar repeatLen; 2638 } fxState[VEX_N_FXSTATE]; 2639 /* The access can be repeated, as specified by nRepeats and 2640 repeatLen. To describe only a single access, nRepeats and 2641 repeatLen should be zero. Otherwise, repeatLen must be a 2642 multiple of size and greater than size. */ 2643 /* Overall, the parts of the guest state denoted by (offset, 2644 size, nRepeats, repeatLen) is 2645 [offset, +size) 2646 and, if nRepeats > 0, 2647 for (i = 1; i <= nRepeats; i++) 2648 [offset + i * repeatLen, +size) 2649 A convenient way to enumerate all segments is therefore 2650 for (i = 0; i < 1 + nRepeats; i++) 2651 [offset + i * repeatLen, +size) 2652 */ 2653 } 2654 IRDirty; 2655 2656 /* Pretty-print a dirty call */ 2657 extern void ppIRDirty ( const IRDirty* ); 2658 2659 /* Allocate an uninitialised dirty call */ 2660 extern IRDirty* emptyIRDirty ( void ); 2661 2662 /* Deep-copy a dirty call */ 2663 extern IRDirty* deepCopyIRDirty ( const IRDirty* ); 2664 2665 /* A handy function which takes some of the tedium out of constructing 2666 dirty helper calls. The called function impliedly does not return 2667 any value and has a constant-True guard. The call is marked as 2668 accessing neither guest state nor memory (hence the "unsafe" 2669 designation) -- you can change this marking later if need be. A 2670 suitable IRCallee is constructed from the supplied bits. */ 2671 extern 2672 IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr, 2673 IRExpr** args ); 2674 2675 /* Similarly, make a zero-annotation dirty call which returns a value, 2676 and assign that to the given temp. */ 2677 extern 2678 IRDirty* unsafeIRDirty_1_N ( IRTemp dst, 2679 Int regparms, const HChar* name, void* addr, 2680 IRExpr** args ); 2681 2682 2683 /* --------------- Memory Bus Events --------------- */ 2684 2685 typedef 2686 enum { 2687 Imbe_Fence=0x1C00, 2688 /* Needed only on ARM. It cancels a reservation made by a 2689 preceding Linked-Load, and needs to be handed through to the 2690 back end, just as LL and SC themselves are. */ 2691 Imbe_CancelReservation 2692 } 2693 IRMBusEvent; 2694 2695 extern void ppIRMBusEvent ( IRMBusEvent ); 2696 2697 2698 /* --------------- Compare and Swap --------------- */ 2699 2700 /* This denotes an atomic compare and swap operation, either 2701 a single-element one or a double-element one. 2702 2703 In the single-element case: 2704 2705 .addr is the memory address. 2706 .end is the endianness with which memory is accessed 2707 2708 If .addr contains the same value as .expdLo, then .dataLo is 2709 written there, else there is no write. In both cases, the 2710 original value at .addr is copied into .oldLo. 2711 2712 Types: .expdLo, .dataLo and .oldLo must all have the same type. 2713 It may be any integral type, viz: I8, I16, I32 or, for 64-bit 2714 guests, I64. 2715 2716 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must 2717 be NULL. 2718 2719 In the double-element case: 2720 2721 .addr is the memory address. 2722 .end is the endianness with which memory is accessed 2723 2724 The operation is the same: 2725 2726 If .addr contains the same value as .expdHi:.expdLo, then 2727 .dataHi:.dataLo is written there, else there is no write. In 2728 both cases the original value at .addr is copied into 2729 .oldHi:.oldLo. 2730 2731 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must 2732 all have the same type, which may be any integral type, viz: I8, 2733 I16, I32 or, for 64-bit guests, I64. 2734 2735 The double-element case is complicated by the issue of 2736 endianness. In all cases, the two elements are understood to be 2737 located adjacently in memory, starting at the address .addr. 2738 2739 If .end is Iend_LE, then the .xxxLo component is at the lower 2740 address and the .xxxHi component is at the higher address, and 2741 each component is itself stored little-endianly. 2742 2743 If .end is Iend_BE, then the .xxxHi component is at the lower 2744 address and the .xxxLo component is at the higher address, and 2745 each component is itself stored big-endianly. 2746 2747 This allows representing more cases than most architectures can 2748 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements. 2749 2750 How to know if the CAS succeeded? 2751 2752 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo), 2753 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now 2754 stored at .addr, and the original value there was .oldLo (resp 2755 .oldHi:.oldLo). 2756 2757 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo), 2758 then the CAS failed, and the original value at .addr was .oldLo 2759 (resp. .oldHi:.oldLo). 2760 2761 Hence it is easy to know whether or not the CAS succeeded. 2762 */ 2763 typedef 2764 struct { 2765 IRTemp oldHi; /* old value of *addr is written here */ 2766 IRTemp oldLo; 2767 IREndness end; /* endianness of the data in memory */ 2768 IRExpr* addr; /* store address */ 2769 IRExpr* expdHi; /* expected old value at *addr */ 2770 IRExpr* expdLo; 2771 IRExpr* dataHi; /* new value for *addr */ 2772 IRExpr* dataLo; 2773 } 2774 IRCAS; 2775 2776 extern void ppIRCAS ( const IRCAS* cas ); 2777 2778 extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo, 2779 IREndness end, IRExpr* addr, 2780 IRExpr* expdHi, IRExpr* expdLo, 2781 IRExpr* dataHi, IRExpr* dataLo ); 2782 2783 extern IRCAS* deepCopyIRCAS ( const IRCAS* ); 2784 2785 2786 /* ------------------ Circular Array Put ------------------ */ 2787 2788 typedef 2789 struct { 2790 IRRegArray* descr; /* Part of guest state treated as circular */ 2791 IRExpr* ix; /* Variable part of index into array */ 2792 Int bias; /* Constant offset part of index into array */ 2793 IRExpr* data; /* The value to write */ 2794 } IRPutI; 2795 2796 extern void ppIRPutI ( const IRPutI* puti ); 2797 2798 extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix, 2799 Int bias, IRExpr* data ); 2800 2801 extern IRPutI* deepCopyIRPutI ( const IRPutI* ); 2802 2803 2804 /* --------------- Guarded loads and stores --------------- */ 2805 2806 /* Conditional stores are straightforward. They are the same as 2807 normal stores, with an extra 'guard' field :: Ity_I1 that 2808 determines whether or not the store actually happens. If not, 2809 memory is unmodified. 2810 2811 The semantics of this is that 'addr' and 'data' are fully evaluated 2812 even in the case where 'guard' evaluates to zero (false). 2813 */ 2814 typedef 2815 struct { 2816 IREndness end; /* Endianness of the store */ 2817 IRExpr* addr; /* store address */ 2818 IRExpr* data; /* value to write */ 2819 IRExpr* guard; /* Guarding value */ 2820 } 2821 IRStoreG; 2822 2823 /* Conditional loads are a little more complex. 'addr' is the 2824 address, 'guard' is the guarding condition. If the load takes 2825 place, the loaded value is placed in 'dst'. If it does not take 2826 place, 'alt' is copied to 'dst'. However, the loaded value is not 2827 placed directly in 'dst' -- it is first subjected to the conversion 2828 specified by 'cvt'. 2829 2830 For example, imagine doing a conditional 8-bit load, in which the 2831 loaded value is zero extended to 32 bits. Hence: 2832 * 'dst' and 'alt' must have type I32 2833 * 'cvt' must be a unary op which converts I8 to I32. In this 2834 example, it would be ILGop_8Uto32. 2835 2836 There is no explicit indication of the type at which the load is 2837 done, since that is inferrable from the arg type of 'cvt'. Note 2838 that the types of 'alt' and 'dst' and the result type of 'cvt' must 2839 all be the same. 2840 2841 Semantically, 'addr' is evaluated even in the case where 'guard' 2842 evaluates to zero (false), and 'alt' is evaluated even when 'guard' 2843 evaluates to one (true). That is, 'addr' and 'alt' are always 2844 evaluated. 2845 */ 2846 typedef 2847 enum { 2848 ILGop_INVALID=0x1D00, 2849 ILGop_IdentV128, /* 128 bit vector, no conversion */ 2850 ILGop_Ident64, /* 64 bit, no conversion */ 2851 ILGop_Ident32, /* 32 bit, no conversion */ 2852 ILGop_16Uto32, /* 16 bit load, Z-widen to 32 */ 2853 ILGop_16Sto32, /* 16 bit load, S-widen to 32 */ 2854 ILGop_8Uto32, /* 8 bit load, Z-widen to 32 */ 2855 ILGop_8Sto32 /* 8 bit load, S-widen to 32 */ 2856 } 2857 IRLoadGOp; 2858 2859 typedef 2860 struct { 2861 IREndness end; /* Endianness of the load */ 2862 IRLoadGOp cvt; /* Conversion to apply to the loaded value */ 2863 IRTemp dst; /* Destination (LHS) of assignment */ 2864 IRExpr* addr; /* Address being loaded from */ 2865 IRExpr* alt; /* Value if load is not done. */ 2866 IRExpr* guard; /* Guarding value */ 2867 } 2868 IRLoadG; 2869 2870 extern void ppIRStoreG ( const IRStoreG* sg ); 2871 2872 extern void ppIRLoadGOp ( IRLoadGOp cvt ); 2873 2874 extern void ppIRLoadG ( const IRLoadG* lg ); 2875 2876 extern IRStoreG* mkIRStoreG ( IREndness end, 2877 IRExpr* addr, IRExpr* data, 2878 IRExpr* guard ); 2879 2880 extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, 2881 IRTemp dst, IRExpr* addr, IRExpr* alt, 2882 IRExpr* guard ); 2883 2884 2885 /* ------------------ Statements ------------------ */ 2886 2887 /* The different kinds of statements. Their meaning is explained 2888 below in the comments for IRStmt. 2889 2890 Those marked META do not represent code, but rather extra 2891 information about the code. These statements can be removed 2892 without affecting the functional behaviour of the code, however 2893 they are required by some IR consumers such as tools that 2894 instrument the code. 2895 */ 2896 2897 typedef 2898 enum { 2899 Ist_NoOp=0x1E00, 2900 Ist_IMark, /* META */ 2901 Ist_AbiHint, /* META */ 2902 Ist_Put, 2903 Ist_PutI, 2904 Ist_WrTmp, 2905 Ist_Store, 2906 Ist_LoadG, 2907 Ist_StoreG, 2908 Ist_CAS, 2909 Ist_LLSC, 2910 Ist_Dirty, 2911 Ist_MBE, 2912 Ist_Exit 2913 } 2914 IRStmtTag; 2915 2916 /* A statement. Stored as a tagged union. 'tag' indicates what kind 2917 of expression this is. 'Ist' is the union that holds the fields. 2918 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store 2919 statement, and the fields can be accessed with 2920 'st.Ist.Store.<fieldname>'. 2921 2922 For each kind of statement, we show what it looks like when 2923 pretty-printed with ppIRStmt(). 2924 */ 2925 typedef 2926 struct _IRStmt { 2927 IRStmtTag tag; 2928 union { 2929 /* A no-op (usually resulting from IR optimisation). Can be 2930 omitted without any effect. 2931 2932 ppIRStmt output: IR-NoOp 2933 */ 2934 struct { 2935 } NoOp; 2936 2937 /* META: instruction mark. Marks the start of the statements 2938 that represent a single machine instruction (the end of 2939 those statements is marked by the next IMark or the end of 2940 the IRSB). Contains the address and length of the 2941 instruction. 2942 2943 It also contains a delta value. The delta must be 2944 subtracted from a guest program counter value before 2945 attempting to establish, by comparison with the address 2946 and length values, whether or not that program counter 2947 value refers to this instruction. For x86, amd64, ppc32, 2948 ppc64 and arm, the delta value is zero. For Thumb 2949 instructions, the delta value is one. This is because, on 2950 Thumb, guest PC values (guest_R15T) are encoded using the 2951 top 31 bits of the instruction address and a 1 in the lsb; 2952 hence they appear to be (numerically) 1 past the start of 2953 the instruction they refer to. IOW, guest_R15T on ARM 2954 holds a standard ARM interworking address. 2955 2956 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, 2957 eg. ------ IMark(0x4000792, 5, 0) ------, 2958 */ 2959 struct { 2960 Addr addr; /* instruction address */ 2961 UInt len; /* instruction length */ 2962 UChar delta; /* addr = program counter as encoded in guest state 2963 - delta */ 2964 } IMark; 2965 2966 /* META: An ABI hint, which says something about this 2967 platform's ABI. 2968 2969 At the moment, the only AbiHint is one which indicates 2970 that a given chunk of address space, [base .. base+len-1], 2971 has become undefined. This is used on amd64-linux and 2972 some ppc variants to pass stack-redzoning hints to whoever 2973 wants to see them. It also indicates the address of the 2974 next (dynamic) instruction that will be executed. This is 2975 to help Memcheck to origin tracking. 2976 2977 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== 2978 eg. ====== AbiHint(t1, 16, t2) ====== 2979 */ 2980 struct { 2981 IRExpr* base; /* Start of undefined chunk */ 2982 Int len; /* Length of undefined chunk */ 2983 IRExpr* nia; /* Address of next (guest) insn */ 2984 } AbiHint; 2985 2986 /* Write a guest register, at a fixed offset in the guest state. 2987 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 2988 */ 2989 struct { 2990 Int offset; /* Offset into the guest state */ 2991 IRExpr* data; /* The value to write */ 2992 } Put; 2993 2994 /* Write a guest register, at a non-fixed offset in the guest 2995 state. See the comment for GetI expressions for more 2996 information. 2997 2998 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, 2999 eg. PUTI(64:8xF64)[t5,0] = t1 3000 */ 3001 struct { 3002 IRPutI* details; 3003 } PutI; 3004 3005 /* Assign a value to a temporary. Note that SSA rules require 3006 each tmp is only assigned to once. IR sanity checking will 3007 reject any block containing a temporary which is not assigned 3008 to exactly once. 3009 3010 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 3011 */ 3012 struct { 3013 IRTemp tmp; /* Temporary (LHS of assignment) */ 3014 IRExpr* data; /* Expression (RHS of assignment) */ 3015 } WrTmp; 3016 3017 /* Write a value to memory. This is a normal store, not a 3018 Store-Conditional. To represent a Store-Conditional, 3019 instead use IRStmt.LLSC. 3020 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 3021 */ 3022 struct { 3023 IREndness end; /* Endianness of the store */ 3024 IRExpr* addr; /* store address */ 3025 IRExpr* data; /* value to write */ 3026 } Store; 3027 3028 /* Guarded store. Note that this is defined to evaluate all 3029 expression fields (addr, data) even if the guard evaluates 3030 to false. 3031 ppIRStmt output: 3032 if (<guard>) ST<end>(<addr>) = <data> */ 3033 struct { 3034 IRStoreG* details; 3035 } StoreG; 3036 3037 /* Guarded load. Note that this is defined to evaluate all 3038 expression fields (addr, alt) even if the guard evaluates 3039 to false. 3040 ppIRStmt output: 3041 t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */ 3042 struct { 3043 IRLoadG* details; 3044 } LoadG; 3045 3046 /* Do an atomic compare-and-swap operation. Semantics are 3047 described above on a comment at the definition of IRCAS. 3048 3049 ppIRStmt output: 3050 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) 3051 eg 3052 t1 = CASle(t2 :: t3->Add32(t3,1)) 3053 which denotes a 32-bit atomic increment 3054 of a value at address t2 3055 3056 A double-element CAS may also be denoted, in which case <tmp>, 3057 <expected> and <new> are all pairs of items, separated by 3058 commas. 3059 */ 3060 struct { 3061 IRCAS* details; 3062 } CAS; 3063 3064 /* Either Load-Linked or Store-Conditional, depending on 3065 STOREDATA. 3066 3067 If STOREDATA is NULL then this is a Load-Linked, meaning 3068 that data is loaded from memory as normal, but a 3069 'reservation' for the address is also lodged in the 3070 hardware. 3071 3072 result = Load-Linked(addr, end) 3073 3074 The data transfer type is the type of RESULT (I32, I64, 3075 etc). ppIRStmt output: 3076 3077 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) 3078 3079 If STOREDATA is not NULL then this is a Store-Conditional, 3080 hence: 3081 3082 result = Store-Conditional(addr, storedata, end) 3083 3084 The data transfer type is the type of STOREDATA and RESULT 3085 has type Ity_I1. The store may fail or succeed depending 3086 on the state of a previously lodged reservation on this 3087 address. RESULT is written 1 if the store succeeds and 0 3088 if it fails. eg ppIRStmt output: 3089 3090 result = ( ST<end>-Cond(<addr>) = <storedata> ) 3091 eg t3 = ( STbe-Cond(t1, t2) ) 3092 3093 In all cases, the address must be naturally aligned for 3094 the transfer type -- any misaligned addresses should be 3095 caught by a dominating IR check and side exit. This 3096 alignment restriction exists because on at least some 3097 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on 3098 misaligned addresses, and we have to actually generate 3099 stwcx. on the host, and we don't want it trapping on the 3100 host. 3101 3102 Summary of rules for transfer type: 3103 STOREDATA == NULL (LL): 3104 transfer type = type of RESULT 3105 STOREDATA != NULL (SC): 3106 transfer type = type of STOREDATA, and RESULT :: Ity_I1 3107 */ 3108 struct { 3109 IREndness end; 3110 IRTemp result; 3111 IRExpr* addr; 3112 IRExpr* storedata; /* NULL => LL, non-NULL => SC */ 3113 } LLSC; 3114 3115 /* Call (possibly conditionally) a C function that has side 3116 effects (ie. is "dirty"). See the comments above the 3117 IRDirty type declaration for more information. 3118 3119 ppIRStmt output: 3120 t<tmp> = DIRTY <guard> <effects> 3121 ::: <callee>(<args>) 3122 eg. 3123 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) 3124 ::: foo{0x380035f4}(t2) 3125 */ 3126 struct { 3127 IRDirty* details; 3128 } Dirty; 3129 3130 /* A memory bus event - a fence, or acquisition/release of the 3131 hardware bus lock. IR optimisation treats all these as fences 3132 across which no memory references may be moved. 3133 ppIRStmt output: MBusEvent-Fence, 3134 MBusEvent-BusLock, MBusEvent-BusUnlock. 3135 */ 3136 struct { 3137 IRMBusEvent event; 3138 } MBE; 3139 3140 /* Conditional exit from the middle of an IRSB. 3141 ppIRStmt output: if (<guard>) goto {<jk>} <dst> 3142 eg. if (t69) goto {Boring} 0x4000AAA:I32 3143 If <guard> is true, the guest state is also updated by 3144 PUT-ing <dst> at <offsIP>. This is done because a 3145 taken exit must update the guest program counter. 3146 */ 3147 struct { 3148 IRExpr* guard; /* Conditional expression */ 3149 IRConst* dst; /* Jump target (constant only) */ 3150 IRJumpKind jk; /* Jump kind */ 3151 Int offsIP; /* Guest state offset for IP */ 3152 } Exit; 3153 } Ist; 3154 } 3155 IRStmt; 3156 3157 /* Statement constructors. */ 3158 extern IRStmt* IRStmt_NoOp ( void ); 3159 extern IRStmt* IRStmt_IMark ( Addr addr, UInt len, UChar delta ); 3160 extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ); 3161 extern IRStmt* IRStmt_Put ( Int off, IRExpr* data ); 3162 extern IRStmt* IRStmt_PutI ( IRPutI* details ); 3163 extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); 3164 extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); 3165 extern IRStmt* IRStmt_StoreG ( IREndness end, IRExpr* addr, IRExpr* data, 3166 IRExpr* guard ); 3167 extern IRStmt* IRStmt_LoadG ( IREndness end, IRLoadGOp cvt, IRTemp dst, 3168 IRExpr* addr, IRExpr* alt, IRExpr* guard ); 3169 extern IRStmt* IRStmt_CAS ( IRCAS* details ); 3170 extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, 3171 IRExpr* addr, IRExpr* storedata ); 3172 extern IRStmt* IRStmt_Dirty ( IRDirty* details ); 3173 extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); 3174 extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, 3175 Int offsIP ); 3176 3177 /* Deep-copy an IRStmt. */ 3178 extern IRStmt* deepCopyIRStmt ( const IRStmt* ); 3179 3180 /* Pretty-print an IRStmt. */ 3181 extern void ppIRStmt ( const IRStmt* ); 3182 3183 3184 /* ------------------ Basic Blocks ------------------ */ 3185 3186 /* Type environments: a bunch of statements, expressions, etc, are 3187 incomplete without an environment indicating the type of each 3188 IRTemp. So this provides one. IR temporaries are really just 3189 unsigned ints and so this provides an array, 0 .. n_types_used-1 of 3190 them. 3191 */ 3192 typedef 3193 struct { 3194 IRType* types; 3195 Int types_size; 3196 Int types_used; 3197 } 3198 IRTypeEnv; 3199 3200 /* Obtain a new IRTemp */ 3201 extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); 3202 3203 /* Deep-copy a type environment */ 3204 extern IRTypeEnv* deepCopyIRTypeEnv ( const IRTypeEnv* ); 3205 3206 /* Pretty-print a type environment */ 3207 extern void ppIRTypeEnv ( const IRTypeEnv* ); 3208 3209 3210 /* Code blocks, which in proper compiler terminology are superblocks 3211 (single entry, multiple exit code sequences) contain: 3212 3213 - A table giving a type for each temp (the "type environment") 3214 - An expandable array of statements 3215 - An expression of type 32 or 64 bits, depending on the 3216 guest's word size, indicating the next destination if the block 3217 executes all the way to the end, without a side exit 3218 - An indication of any special actions (JumpKind) needed 3219 for this final jump. 3220 - Offset of the IP field in the guest state. This will be 3221 updated before the final jump is done. 3222 3223 "IRSB" stands for "IR Super Block". 3224 */ 3225 typedef 3226 struct { 3227 IRTypeEnv* tyenv; 3228 IRStmt** stmts; 3229 Int stmts_size; 3230 Int stmts_used; 3231 IRExpr* next; 3232 IRJumpKind jumpkind; 3233 Int offsIP; 3234 } 3235 IRSB; 3236 3237 /* Allocate a new, uninitialised IRSB */ 3238 extern IRSB* emptyIRSB ( void ); 3239 3240 /* Deep-copy an IRSB */ 3241 extern IRSB* deepCopyIRSB ( const IRSB* ); 3242 3243 /* Deep-copy an IRSB, except for the statements list, which set to be 3244 a new, empty, list of statements. */ 3245 extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* ); 3246 3247 /* Pretty-print an IRSB */ 3248 extern void ppIRSB ( const IRSB* ); 3249 3250 /* Append an IRStmt to an IRSB */ 3251 extern void addStmtToIRSB ( IRSB*, IRStmt* ); 3252 3253 3254 /*---------------------------------------------------------------*/ 3255 /*--- Helper functions for the IR ---*/ 3256 /*---------------------------------------------------------------*/ 3257 3258 /* For messing with IR type environments */ 3259 extern IRTypeEnv* emptyIRTypeEnv ( void ); 3260 3261 /* What is the type of this expression? */ 3262 extern IRType typeOfIRConst ( const IRConst* ); 3263 extern IRType typeOfIRTemp ( const IRTypeEnv*, IRTemp ); 3264 extern IRType typeOfIRExpr ( const IRTypeEnv*, const IRExpr* ); 3265 3266 /* What are the arg and result type for this IRLoadGOp? */ 3267 extern void typeOfIRLoadGOp ( IRLoadGOp cvt, 3268 /*OUT*/IRType* t_res, 3269 /*OUT*/IRType* t_arg ); 3270 3271 /* Sanity check a BB of IR */ 3272 extern void sanityCheckIRSB ( const IRSB* bb, 3273 const HChar* caller, 3274 Bool require_flatness, 3275 IRType guest_word_size ); 3276 extern Bool isFlatIRStmt ( const IRStmt* ); 3277 extern Bool isFlatIRSB ( const IRSB* ); 3278 3279 /* Is this any value actually in the enumeration 'IRType' ? */ 3280 extern Bool isPlausibleIRType ( IRType ty ); 3281 3282 3283 /*---------------------------------------------------------------*/ 3284 /*--- IR injection ---*/ 3285 /*---------------------------------------------------------------*/ 3286 3287 void vex_inject_ir(IRSB *, IREndness); 3288 3289 3290 #endif /* ndef __LIBVEX_IR_H */ 3291 3292 /*---------------------------------------------------------------*/ 3293 /*--- libvex_ir.h ---*/ 3294 /*---------------------------------------------------------------*/
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|