Warning, /include/llvm/IR/IntrinsicsAArch64.td is written in an unsupported language. File is not indexed.
0001 //===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file defines all of the AARCH64-specific intrinsics.
0010 //
0011 //===----------------------------------------------------------------------===//
0012
0013 let TargetPrefix = "aarch64" in {
0014
0015 def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
0016 [IntrNoFree, IntrWillReturn]>;
0017 def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
0018 [IntrNoFree, IntrWillReturn]>;
0019 def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty],
0020 [IntrNoFree, IntrWillReturn]>;
0021 def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty],
0022 [IntrNoFree, IntrWillReturn]>;
0023
0024 def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty],
0025 [IntrNoFree, IntrWillReturn]>;
0026 def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty],
0027 [IntrNoFree, IntrWillReturn]>;
0028 def int_aarch64_stxp : Intrinsic<[llvm_i32_ty],
0029 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
0030 [IntrNoFree, IntrWillReturn]>;
0031 def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
0032 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
0033 [IntrNoFree, IntrWillReturn]>;
0034
0035 def int_aarch64_clrex : Intrinsic<[]>;
0036
0037 def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
0038 LLVMMatchType<0>], [IntrNoMem]>;
0039 def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
0040 LLVMMatchType<0>], [IntrNoMem]>;
0041
0042 def int_aarch64_fjcvtzs : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
0043
0044 def int_aarch64_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
0045 def int_aarch64_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
0046
0047 def int_aarch64_frint32z
0048 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ],
0049 [ IntrNoMem ]>;
0050 def int_aarch64_frint64z
0051 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ],
0052 [ IntrNoMem ]>;
0053 def int_aarch64_frint32x
0054 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ],
0055 [ IntrNoMem ]>;
0056 def int_aarch64_frint64x
0057 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0> ],
0058 [ IntrNoMem ]>;
0059
0060
0061 //===----------------------------------------------------------------------===//
0062 // HINT
0063
0064 def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
0065
0066 def int_aarch64_break : Intrinsic<[], [llvm_i32_ty],
0067 [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
0068
0069 def int_aarch64_hlt : Intrinsic<[], [llvm_i32_ty],
0070 [IntrNoMem, IntrHasSideEffects, IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>;
0071
0072 def int_aarch64_prefetch : Intrinsic<[],
0073 [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0074 [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
0075 ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>
0076 ]>,
0077 ClangBuiltin<"__builtin_arm_prefetch">;
0078
0079 //===----------------------------------------------------------------------===//
0080 // Data Barrier Instructions
0081
0082 def int_aarch64_dmb : ClangBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">,
0083 Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
0084 def int_aarch64_dsb : ClangBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">,
0085 Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
0086 def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
0087 Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>;
0088
0089 // A space-consuming intrinsic primarily for testing block and jump table
0090 // placements. The first argument is the number of bytes this "instruction"
0091 // takes up, the second and return value are essentially chains, used to force
0092 // ordering during ISel.
0093 def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
0094
0095 //===----------------------------------------------------------------------===//
0096 // Guarded Control Stack
0097
0098 def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">,
0099 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
0100 [IntrNoMem]>;
0101
0102 // FIXME: This should be marked as [IntrReadMem, IntrHasSideEffects], as it has
0103 // the side-effect of updating gcspr, but this combination doesn't work
0104 // correctly.
0105 def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">,
0106 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
0107 []>;
0108
0109 def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">,
0110 DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
0111
0112 }
0113
0114 //===----------------------------------------------------------------------===//
0115 // Advanced SIMD (NEON)
0116
0117 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
0118 class AdvSIMD_2Scalar_Float_Intrinsic
0119 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
0120 [IntrNoMem]>;
0121
0122 class AdvSIMD_FPToIntRounding_Intrinsic
0123 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
0124
0125 class AdvSIMD_1IntArg_Intrinsic
0126 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
0127 class AdvSIMD_1FloatArg_Intrinsic
0128 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
0129 class AdvSIMD_1VectorArg_Intrinsic
0130 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
0131 class AdvSIMD_1VectorArg_Expand_Intrinsic
0132 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
0133 class AdvSIMD_1VectorArg_Long_Intrinsic
0134 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
0135 class AdvSIMD_1IntArg_Narrow_Intrinsic
0136 : DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty], [IntrNoMem]>;
0137 class AdvSIMD_1VectorArg_Narrow_Intrinsic
0138 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
0139 class AdvSIMD_1VectorArg_Int_Across_Intrinsic
0140 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
0141 class AdvSIMD_1VectorArg_Float_Across_Intrinsic
0142 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
0143
0144 class AdvSIMD_2IntArg_Intrinsic
0145 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
0146 [IntrNoMem]>;
0147 class AdvSIMD_2FloatArg_Intrinsic
0148 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
0149 [IntrNoMem]>;
0150 class AdvSIMD_2VectorArg_Intrinsic
0151 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
0152 [IntrNoMem]>;
0153 class AdvSIMD_2VectorArg_Compare_Intrinsic
0154 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
0155 [IntrNoMem]>;
0156 class AdvSIMD_2Arg_FloatCompare_Intrinsic
0157 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
0158 [IntrNoMem]>;
0159 class AdvSIMD_2VectorArg_Long_Intrinsic
0160 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0161 [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
0162 [IntrNoMem]>;
0163 class AdvSIMD_2VectorArg_Wide_Intrinsic
0164 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0165 [LLVMMatchType<0>, LLVMTruncatedType<0>],
0166 [IntrNoMem]>;
0167 class AdvSIMD_2VectorArg_Narrow_Intrinsic
0168 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0169 [LLVMExtendedType<0>, LLVMExtendedType<0>],
0170 [IntrNoMem]>;
0171 class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
0172 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
0173 [LLVMExtendedType<0>, llvm_i32_ty],
0174 [IntrNoMem]>;
0175 class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
0176 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0177 [llvm_anyvector_ty],
0178 [IntrNoMem]>;
0179 class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
0180 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0181 [LLVMTruncatedType<0>],
0182 [IntrNoMem]>;
0183 class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
0184 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0185 [LLVMTruncatedType<0>, llvm_i32_ty],
0186 [IntrNoMem]>;
0187 class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
0188 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0189 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
0190 [IntrNoMem]>;
0191 class AdvSIMD_2VectorArg_Lane_Intrinsic
0192 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
0193 [LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty],
0194 [IntrNoMem]>;
0195
0196 class AdvSIMD_3IntArg_Intrinsic
0197 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
0198 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
0199 [IntrNoMem]>;
0200 class AdvSIMD_3VectorArg_Intrinsic
0201 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0202 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
0203 [IntrNoMem]>;
0204 class AdvSIMD_3VectorArg_Scalar_Intrinsic
0205 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0206 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
0207 [IntrNoMem]>;
0208 class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
0209 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0210 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
0211 LLVMMatchType<1>], [IntrNoMem]>;
0212 class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
0213 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0214 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
0215 [IntrNoMem]>;
0216 class AdvSIMD_CvtFxToFP_Intrinsic
0217 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
0218 [IntrNoMem]>;
0219 class AdvSIMD_CvtFPToFx_Intrinsic
0220 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
0221 [IntrNoMem]>;
0222
0223 class AdvSIMD_1Arg_Intrinsic
0224 : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>;
0225
0226 class AdvSIMD_Dot_Intrinsic
0227 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0228 [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
0229 [IntrNoMem]>;
0230
0231 class AdvSIMD_FP16FML_Intrinsic
0232 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0233 [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
0234 [IntrNoMem]>;
0235
0236 class AdvSIMD_MatMul_Intrinsic
0237 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0238 [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
0239 [IntrNoMem]>;
0240
0241 class AdvSIMD_FML_Intrinsic
0242 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0243 [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>],
0244 [IntrNoMem]>;
0245
0246 class AdvSIMD_BF16FML_Intrinsic
0247 : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
0248 [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
0249 [IntrNoMem]>;
0250 }
0251
0252 // Arithmetic ops
0253
0254 let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
0255 // Vector Add Across Lanes
0256 def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0257 def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0258 def int_aarch64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
0259
0260 // Vector Long Add Across Lanes
0261 def int_aarch64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0262 def int_aarch64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0263
0264 // Vector Halving Add
0265 def int_aarch64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
0266 def int_aarch64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
0267
0268 // Vector Rounding Halving Add
0269 def int_aarch64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
0270 def int_aarch64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
0271
0272 // Vector Saturating Add
0273 def int_aarch64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
0274 def int_aarch64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
0275 def int_aarch64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
0276 def int_aarch64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
0277
0278 // Vector Add High-Half
0279 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
0280 // header is no longer supported.
0281 def int_aarch64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
0282
0283 // Vector Rounding Add High-Half
0284 def int_aarch64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
0285
0286 // Vector Saturating Doubling Multiply High
0287 def int_aarch64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
0288 def int_aarch64_neon_sqdmulh_lane : AdvSIMD_2VectorArg_Lane_Intrinsic;
0289 def int_aarch64_neon_sqdmulh_laneq : AdvSIMD_2VectorArg_Lane_Intrinsic;
0290
0291 // Vector Saturating Rounding Doubling Multiply High
0292 def int_aarch64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
0293 def int_aarch64_neon_sqrdmulh_lane : AdvSIMD_2VectorArg_Lane_Intrinsic;
0294 def int_aarch64_neon_sqrdmulh_laneq : AdvSIMD_2VectorArg_Lane_Intrinsic;
0295
0296 def int_aarch64_neon_sqrdmlah : AdvSIMD_3IntArg_Intrinsic;
0297 def int_aarch64_neon_sqrdmlsh : AdvSIMD_3IntArg_Intrinsic;
0298
0299 // Vector Polynominal Multiply
0300 def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
0301
0302 // Vector Long Multiply
0303 def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
0304 def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
0305 def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
0306
0307 // 64-bit polynomial multiply really returns an i128, which is not legal. Fake
0308 // it with a v16i8.
0309 def int_aarch64_neon_pmull64 :
0310 DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
0311
0312 // Vector Extending Multiply
0313 def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
0314 let IntrProperties = [IntrNoMem, Commutative];
0315 }
0316
0317 // Vector Saturating Doubling Long Multiply
0318 def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
0319 def int_aarch64_neon_sqdmulls_scalar
0320 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
0321
0322 // Vector Halving Subtract
0323 def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
0324 def int_aarch64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
0325
0326 // Vector Saturating Subtract
0327 def int_aarch64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
0328 def int_aarch64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
0329
0330 // Vector Subtract High-Half
0331 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
0332 // header is no longer supported.
0333 def int_aarch64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
0334
0335 // Vector Rounding Subtract High-Half
0336 def int_aarch64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
0337
0338 // Vector Compare Absolute Greater-than-or-equal
0339 def int_aarch64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
0340
0341 // Vector Compare Absolute Greater-than
0342 def int_aarch64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
0343
0344 // Vector Absolute Difference
0345 def int_aarch64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
0346 def int_aarch64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
0347 def int_aarch64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
0348
0349 // Scalar Absolute Difference
0350 def int_aarch64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
0351
0352 // Vector Max
0353 def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
0354 def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
0355 def int_aarch64_neon_fmax : AdvSIMD_2FloatArg_Intrinsic;
0356 def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
0357
0358 // Vector Max Across Lanes
0359 def int_aarch64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0360 def int_aarch64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0361 def int_aarch64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
0362 def int_aarch64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
0363
0364 // Vector Min
0365 def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
0366 def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
0367 def int_aarch64_neon_fmin : AdvSIMD_2FloatArg_Intrinsic;
0368 def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
0369
0370 // Vector Min/Max Number
0371 def int_aarch64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
0372 def int_aarch64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
0373
0374 // Vector Min Across Lanes
0375 def int_aarch64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0376 def int_aarch64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
0377 def int_aarch64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
0378 def int_aarch64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
0379
0380 // Pairwise Add
0381 def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
0382 def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
0383
0384 // Long Pairwise Add
0385 // FIXME: In theory, we shouldn't need intrinsics for saddlp or
0386 // uaddlp, but tblgen's type inference currently can't handle the
0387 // pattern fragments this ends up generating.
0388 def int_aarch64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
0389 def int_aarch64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
0390
0391 // Folding Maximum
0392 def int_aarch64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
0393 def int_aarch64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
0394 def int_aarch64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
0395
0396 // Folding Minimum
0397 def int_aarch64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
0398 def int_aarch64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
0399 def int_aarch64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
0400
0401 // Reciprocal Estimate/Step
0402 def int_aarch64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
0403 def int_aarch64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
0404
0405 // Reciprocal Exponent
0406 def int_aarch64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic;
0407
0408 // Vector Saturating Shift Left
0409 def int_aarch64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
0410 def int_aarch64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
0411
0412 // Vector Rounding Shift Left
0413 def int_aarch64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
0414 def int_aarch64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
0415
0416 // Vector Saturating Rounding Shift Left
0417 def int_aarch64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
0418 def int_aarch64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
0419
0420 // Vector Signed->Unsigned Shift Left by Constant
0421 def int_aarch64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
0422
0423 // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
0424 def int_aarch64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0425
0426 // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
0427 def int_aarch64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0428
0429 // Vector Narrowing Shift Right by Constant
0430 def int_aarch64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0431 def int_aarch64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0432
0433 // Vector Rounding Narrowing Shift Right by Constant
0434 def int_aarch64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0435
0436 // Vector Rounding Narrowing Saturating Shift Right by Constant
0437 def int_aarch64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0438 def int_aarch64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
0439
0440 // Vector Shift Left
0441 def int_aarch64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
0442 def int_aarch64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
0443
0444 // Vector Widening Shift Left by Constant
0445 def int_aarch64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
0446 def int_aarch64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
0447 def int_aarch64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
0448
0449 // Vector Shift Right by Constant and Insert
0450 def int_aarch64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
0451
0452 // Vector Shift Left by Constant and Insert
0453 def int_aarch64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
0454
0455 // Vector Saturating Narrow
0456 def int_aarch64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
0457 def int_aarch64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
0458 def int_aarch64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
0459 def int_aarch64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
0460
0461 // Vector Saturating Extract and Unsigned Narrow
0462 def int_aarch64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
0463 def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
0464
0465 // Vector Absolute Value
0466 def int_aarch64_neon_abs : AdvSIMD_1Arg_Intrinsic;
0467
0468 // Vector Saturating Absolute Value
0469 def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
0470
0471 // Vector Saturating Negation
0472 def int_aarch64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
0473
0474 // Vector Count Leading Sign Bits
0475 def int_aarch64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
0476
0477 // Vector Reciprocal Estimate
0478 def int_aarch64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
0479 def int_aarch64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic;
0480
0481 // Vector Square Root Estimate
0482 def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
0483 def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
0484
0485 // Vector Conversions Between Half-Precision and Single-Precision.
0486 def int_aarch64_neon_vcvtfp2hf
0487 : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
0488 def int_aarch64_neon_vcvthf2fp
0489 : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
0490
0491 // Vector Conversions Between Floating-point and Fixed-point.
0492 def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
0493 def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
0494 def int_aarch64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
0495 def int_aarch64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
0496
0497 // Vector FP->Int Conversions
0498 def int_aarch64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
0499 def int_aarch64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
0500 def int_aarch64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
0501 def int_aarch64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
0502 def int_aarch64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
0503 def int_aarch64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
0504 def int_aarch64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
0505 def int_aarch64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
0506 def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
0507 def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
0508
0509 // v8.5-A Vector FP Rounding
0510 def int_aarch64_neon_frint32x : AdvSIMD_1FloatArg_Intrinsic;
0511 def int_aarch64_neon_frint32z : AdvSIMD_1FloatArg_Intrinsic;
0512 def int_aarch64_neon_frint64x : AdvSIMD_1FloatArg_Intrinsic;
0513 def int_aarch64_neon_frint64z : AdvSIMD_1FloatArg_Intrinsic;
0514
0515 // Scalar FP->Int conversions
0516
0517 // Vector FP Inexact Narrowing
0518 def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
0519
0520 // Scalar FP Inexact Narrowing
0521 def int_aarch64_sisd_fcvtxn : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty],
0522 [IntrNoMem]>;
0523
0524 // v8.2-A Dot Product
0525 def int_aarch64_neon_udot : AdvSIMD_Dot_Intrinsic;
0526 def int_aarch64_neon_sdot : AdvSIMD_Dot_Intrinsic;
0527
0528 // v8.6-A Matrix Multiply Intrinsics
0529 def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic;
0530 def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic;
0531 def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic;
0532 def int_aarch64_neon_usdot : AdvSIMD_Dot_Intrinsic;
0533 def int_aarch64_neon_bfdot : AdvSIMD_Dot_Intrinsic;
0534 def int_aarch64_neon_bfmmla
0535 : DefaultAttrsIntrinsic<[llvm_v4f32_ty],
0536 [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty],
0537 [IntrNoMem]>;
0538 def int_aarch64_neon_bfmlalb : AdvSIMD_BF16FML_Intrinsic;
0539 def int_aarch64_neon_bfmlalt : AdvSIMD_BF16FML_Intrinsic;
0540
0541 // v8.2-A FP16 Fused Multiply-Add Long
0542 def int_aarch64_neon_fmlal : AdvSIMD_FP16FML_Intrinsic;
0543 def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic;
0544 def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic;
0545 def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic;
0546
0547 // v8.3-A Floating-point complex add
0548 def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic;
0549 def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic;
0550
0551 def int_aarch64_neon_vcmla_rot0 : AdvSIMD_3VectorArg_Intrinsic;
0552 def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic;
0553 def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic;
0554 def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic;
0555
0556 // FP8 fscale
0557 def int_aarch64_neon_fp8_fscale : DefaultAttrsIntrinsic<
0558 [llvm_anyvector_ty],
0559 [LLVMMatchType<0>,
0560 LLVMVectorOfBitcastsToInt<0>],
0561 [IntrNoMem]>;
0562 }
0563
0564 let TargetPrefix = "aarch64" in {
0565 def int_aarch64_neon_vluti2_lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0566 [llvm_anyvector_ty, llvm_v8i8_ty,
0567 llvm_i32_ty],
0568 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0569
0570 def int_aarch64_neon_vluti2_laneq : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0571 [llvm_anyvector_ty, llvm_v16i8_ty,
0572 llvm_i32_ty],
0573 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0574
0575 def int_aarch64_neon_vluti4q_lane: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0576 [LLVMMatchType<0>, llvm_v8i8_ty,
0577 llvm_i32_ty],
0578 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0579
0580 def int_aarch64_neon_vluti4q_laneq: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0581 [LLVMMatchType<0>, llvm_v16i8_ty,
0582 llvm_i32_ty],
0583 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0584
0585
0586 def int_aarch64_neon_vluti4q_lane_x2:
0587 DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0588 [LLVMMatchType<0>, LLVMMatchType<0>,
0589 llvm_v8i8_ty, llvm_i32_ty],
0590 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
0591
0592 def int_aarch64_neon_vluti4q_laneq_x2:
0593 DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0594 [LLVMMatchType<0>, LLVMMatchType<0>,
0595 llvm_v16i8_ty, llvm_i32_ty],
0596 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
0597 }
0598
0599 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
0600 class AdvSIMD_2Vector2Index_Intrinsic
0601 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0602 [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
0603 [IntrNoMem]>;
0604 }
0605
0606 // Vector element to element moves
0607 def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
0608
0609 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
0610 class AdvSIMD_1Vec_Load_Intrinsic
0611 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
0612 [IntrReadMem, IntrArgMemOnly]>;
0613 class AdvSIMD_1Vec_Store_Lane_Intrinsic
0614 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
0615 [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
0616
0617 class AdvSIMD_2Vec_Load_Intrinsic
0618 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
0619 [llvm_anyptr_ty],
0620 [IntrReadMem, IntrArgMemOnly]>;
0621 class AdvSIMD_2Vec_Load_Lane_Intrinsic
0622 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
0623 [LLVMMatchType<0>, llvm_anyvector_ty,
0624 llvm_i64_ty, llvm_anyptr_ty],
0625 [IntrReadMem, IntrArgMemOnly]>;
0626 class AdvSIMD_2Vec_Store_Intrinsic
0627 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
0628 llvm_anyptr_ty],
0629 [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
0630 class AdvSIMD_2Vec_Store_Lane_Intrinsic
0631 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
0632 llvm_i64_ty, llvm_anyptr_ty],
0633 [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
0634
0635 class AdvSIMD_3Vec_Load_Intrinsic
0636 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
0637 [llvm_anyptr_ty],
0638 [IntrReadMem, IntrArgMemOnly]>;
0639 class AdvSIMD_3Vec_Load_Lane_Intrinsic
0640 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
0641 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty,
0642 llvm_i64_ty, llvm_anyptr_ty],
0643 [IntrReadMem, IntrArgMemOnly]>;
0644 class AdvSIMD_3Vec_Store_Intrinsic
0645 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
0646 LLVMMatchType<0>, llvm_anyptr_ty],
0647 [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
0648 class AdvSIMD_3Vec_Store_Lane_Intrinsic
0649 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty,
0650 LLVMMatchType<0>, LLVMMatchType<0>,
0651 llvm_i64_ty, llvm_anyptr_ty],
0652 [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
0653
0654 class AdvSIMD_4Vec_Load_Intrinsic
0655 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
0656 LLVMMatchType<0>, llvm_anyvector_ty],
0657 [llvm_anyptr_ty],
0658 [IntrReadMem, IntrArgMemOnly]>;
0659 class AdvSIMD_4Vec_Load_Lane_Intrinsic
0660 : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
0661 LLVMMatchType<0>, LLVMMatchType<0>],
0662 [LLVMMatchType<0>, LLVMMatchType<0>,
0663 LLVMMatchType<0>, llvm_anyvector_ty,
0664 llvm_i64_ty, llvm_anyptr_ty],
0665 [IntrReadMem, IntrArgMemOnly]>;
0666 class AdvSIMD_4Vec_Store_Intrinsic
0667 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
0668 LLVMMatchType<0>, LLVMMatchType<0>,
0669 llvm_anyptr_ty],
0670 [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
0671 class AdvSIMD_4Vec_Store_Lane_Intrinsic
0672 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
0673 LLVMMatchType<0>, LLVMMatchType<0>,
0674 llvm_i64_ty, llvm_anyptr_ty],
0675 [IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
0676 }
0677
0678 // Memory ops
0679
0680 def int_aarch64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
0681 def int_aarch64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
0682 def int_aarch64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
0683
0684 def int_aarch64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
0685 def int_aarch64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
0686 def int_aarch64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
0687
0688 def int_aarch64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
0689 def int_aarch64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
0690 def int_aarch64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
0691
0692 def int_aarch64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
0693 def int_aarch64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
0694 def int_aarch64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
0695
0696 def int_aarch64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
0697 def int_aarch64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
0698 def int_aarch64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
0699
0700 def int_aarch64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
0701 def int_aarch64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
0702 def int_aarch64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
0703
0704 def int_aarch64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
0705 def int_aarch64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
0706 def int_aarch64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
0707
0708 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
0709 class AdvSIMD_Tbl1_Intrinsic
0710 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
0711 [IntrNoMem]>;
0712 class AdvSIMD_Tbl2_Intrinsic
0713 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0714 [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
0715 class AdvSIMD_Tbl3_Intrinsic
0716 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0717 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
0718 LLVMMatchType<0>],
0719 [IntrNoMem]>;
0720 class AdvSIMD_Tbl4_Intrinsic
0721 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0722 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
0723 LLVMMatchType<0>],
0724 [IntrNoMem]>;
0725
0726 class AdvSIMD_Tbx1_Intrinsic
0727 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0728 [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
0729 [IntrNoMem]>;
0730 class AdvSIMD_Tbx2_Intrinsic
0731 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0732 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
0733 LLVMMatchType<0>],
0734 [IntrNoMem]>;
0735 class AdvSIMD_Tbx3_Intrinsic
0736 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0737 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
0738 llvm_v16i8_ty, LLVMMatchType<0>],
0739 [IntrNoMem]>;
0740 class AdvSIMD_Tbx4_Intrinsic
0741 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0742 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
0743 llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
0744 [IntrNoMem]>;
0745 }
0746 def int_aarch64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
0747 def int_aarch64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
0748 def int_aarch64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
0749 def int_aarch64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
0750
0751 def int_aarch64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
0752 def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
0753 def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
0754 def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
0755
0756 let TargetPrefix = "aarch64" in {
0757 class FPENV_Get_Intrinsic
0758 : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
0759 class FPENV_Set_Intrinsic
0760 : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>;
0761 class RNDR_Intrinsic
0762 : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
0763 class FPMR_Set_Intrinsic
0764 : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>;
0765 }
0766
0767 // FP environment registers.
0768 def int_aarch64_get_fpcr : FPENV_Get_Intrinsic;
0769 def int_aarch64_set_fpcr : FPENV_Set_Intrinsic;
0770 def int_aarch64_get_fpsr : FPENV_Get_Intrinsic;
0771 def int_aarch64_set_fpsr : FPENV_Set_Intrinsic;
0772 def int_aarch64_set_fpmr : FPMR_Set_Intrinsic;
0773
0774 // Armv8.5-A Random number generation intrinsics
0775 def int_aarch64_rndr : RNDR_Intrinsic;
0776 def int_aarch64_rndrrs : RNDR_Intrinsic;
0777
0778 let TargetPrefix = "aarch64" in {
0779 class Crypto_AES_DataKey_Intrinsic
0780 : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
0781
0782 class Crypto_AES_Data_Intrinsic
0783 : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
0784
0785 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
0786 // (v4i32).
0787 class Crypto_SHA_5Hash4Schedule_Intrinsic
0788 : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
0789 [IntrNoMem]>;
0790
0791 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
0792 // (v4i32).
0793 class Crypto_SHA_1Hash_Intrinsic
0794 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
0795
0796 // SHA intrinsic taking 8 words of the schedule
0797 class Crypto_SHA_8Schedule_Intrinsic
0798 : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
0799
0800 // SHA intrinsic taking 12 words of the schedule
0801 class Crypto_SHA_12Schedule_Intrinsic
0802 : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
0803 [IntrNoMem]>;
0804
0805 // SHA intrinsic taking 8 words of the hash and 4 of the schedule.
0806 class Crypto_SHA_8Hash4Schedule_Intrinsic
0807 : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
0808 [IntrNoMem]>;
0809
0810 // SHA512 intrinsic taking 2 arguments
0811 class Crypto_SHA512_2Arg_Intrinsic
0812 : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
0813
0814 // SHA512 intrinsic taking 3 Arguments
0815 class Crypto_SHA512_3Arg_Intrinsic
0816 : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
0817 [IntrNoMem]>;
0818
0819 // SHA3 Intrinsics taking 3 arguments
0820 class Crypto_SHA3_3Arg_Intrinsic
0821 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
0822 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
0823 [IntrNoMem]>;
0824
0825 // SHA3 Intrinsic taking 2 arguments
0826 class Crypto_SHA3_2Arg_Intrinsic
0827 : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
0828 [IntrNoMem]>;
0829
0830 // SHA3 Intrinsic taking 3 Arguments 1 immediate
0831 class Crypto_SHA3_2ArgImm_Intrinsic
0832 : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty],
0833 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0834
0835 class Crypto_SM3_3Vector_Intrinsic
0836 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
0837 [IntrNoMem]>;
0838
0839 class Crypto_SM3_3VectorIndexed_Intrinsic
0840 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i64_ty],
0841 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
0842
0843 class Crypto_SM4_2Vector_Intrinsic
0844 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
0845 }
0846
0847 // AES
0848 def int_aarch64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
0849 def int_aarch64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
0850 def int_aarch64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
0851 def int_aarch64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
0852
0853 // SHA1
0854 def int_aarch64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
0855 def int_aarch64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
0856 def int_aarch64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
0857 def int_aarch64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
0858
0859 def int_aarch64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
0860 def int_aarch64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
0861
0862 // SHA256
0863 def int_aarch64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
0864 def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
0865 def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
0866 def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
0867
0868 //SHA3
0869 def int_aarch64_crypto_eor3s : Crypto_SHA3_3Arg_Intrinsic;
0870 def int_aarch64_crypto_eor3u : Crypto_SHA3_3Arg_Intrinsic;
0871 def int_aarch64_crypto_bcaxs : Crypto_SHA3_3Arg_Intrinsic;
0872 def int_aarch64_crypto_bcaxu : Crypto_SHA3_3Arg_Intrinsic;
0873 def int_aarch64_crypto_rax1 : Crypto_SHA3_2Arg_Intrinsic;
0874 def int_aarch64_crypto_xar : Crypto_SHA3_2ArgImm_Intrinsic;
0875
0876 // SHA512
0877 def int_aarch64_crypto_sha512h : Crypto_SHA512_3Arg_Intrinsic;
0878 def int_aarch64_crypto_sha512h2 : Crypto_SHA512_3Arg_Intrinsic;
0879 def int_aarch64_crypto_sha512su0 : Crypto_SHA512_2Arg_Intrinsic;
0880 def int_aarch64_crypto_sha512su1 : Crypto_SHA512_3Arg_Intrinsic;
0881
0882 //SM3 & SM4
0883 def int_aarch64_crypto_sm3partw1 : Crypto_SM3_3Vector_Intrinsic;
0884 def int_aarch64_crypto_sm3partw2 : Crypto_SM3_3Vector_Intrinsic;
0885 def int_aarch64_crypto_sm3ss1 : Crypto_SM3_3Vector_Intrinsic;
0886 def int_aarch64_crypto_sm3tt1a : Crypto_SM3_3VectorIndexed_Intrinsic;
0887 def int_aarch64_crypto_sm3tt1b : Crypto_SM3_3VectorIndexed_Intrinsic;
0888 def int_aarch64_crypto_sm3tt2a : Crypto_SM3_3VectorIndexed_Intrinsic;
0889 def int_aarch64_crypto_sm3tt2b : Crypto_SM3_3VectorIndexed_Intrinsic;
0890 def int_aarch64_crypto_sm4e : Crypto_SM4_2Vector_Intrinsic;
0891 def int_aarch64_crypto_sm4ekey : Crypto_SM4_2Vector_Intrinsic;
0892
0893 //===----------------------------------------------------------------------===//
0894 // CRC32
0895
0896 let TargetPrefix = "aarch64" in {
0897
0898 def int_aarch64_crc32b : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0899 [IntrNoMem]>;
0900 def int_aarch64_crc32cb : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0901 [IntrNoMem]>;
0902 def int_aarch64_crc32h : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0903 [IntrNoMem]>;
0904 def int_aarch64_crc32ch : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0905 [IntrNoMem]>;
0906 def int_aarch64_crc32w : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0907 [IntrNoMem]>;
0908 def int_aarch64_crc32cw : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0909 [IntrNoMem]>;
0910 def int_aarch64_crc32x : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
0911 [IntrNoMem]>;
0912 def int_aarch64_crc32cx : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
0913 [IntrNoMem]>;
0914 }
0915
0916 //===----------------------------------------------------------------------===//
0917 // Memory Tagging Extensions (MTE) Intrinsics
0918 let TargetPrefix = "aarch64" in {
0919 def int_aarch64_irg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
0920 [IntrNoMem, IntrHasSideEffects]>;
0921 def int_aarch64_addg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty],
0922 [IntrNoMem]>;
0923 def int_aarch64_gmi : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty],
0924 [IntrNoMem]>;
0925 def int_aarch64_ldg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty],
0926 [IntrReadMem]>;
0927 def int_aarch64_stg : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
0928 [IntrWriteMem]>;
0929 def int_aarch64_subp : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
0930 [IntrNoMem]>;
0931
0932 // The following are codegen-only intrinsics for stack instrumentation.
0933
0934 // Generate a randomly tagged stack base pointer.
0935 def int_aarch64_irg_sp : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i64_ty],
0936 [IntrNoMem, IntrHasSideEffects]>;
0937
0938 // Transfer pointer tag with offset.
0939 // ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
0940 // * address is the address in ptr0
0941 // * tag is a function of (tag in baseptr, tag_offset).
0942 // ** Beware, this is not the same function as implemented by the ADDG instruction!
0943 // Backend optimizations may change tag_offset; the only guarantee is that calls
0944 // to tagp with the same pair of (baseptr, tag_offset) will produce pointers
0945 // with the same tag value, assuming the set of excluded tags has not changed.
0946 // Address bits in baseptr and tag bits in ptr0 are ignored.
0947 // When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
0948 // ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
0949 // It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
0950 def int_aarch64_tagp : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
0951 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
0952
0953 // Update allocation tags for the memory range to match the tag in the pointer argument.
0954 def int_aarch64_settag : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
0955 [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
0956
0957 // Update allocation tags for the memory range to match the tag in the pointer argument,
0958 // and set memory contents to zero.
0959 def int_aarch64_settag_zero : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
0960 [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
0961
0962 // Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
0963 def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
0964 [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
0965 }
0966
0967 //===----------------------------------------------------------------------===//
0968 // Memory Operations (MOPS) Intrinsics
0969 let TargetPrefix = "aarch64" in {
0970 // Sizes are chosen to correspond to the llvm.memset intrinsic: ptr, i8, i64
0971 def int_aarch64_mops_memset_tag : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty, llvm_i64_ty],
0972 [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>;
0973 }
0974
0975 // Transactional Memory Extension (TME) Intrinsics
0976 let TargetPrefix = "aarch64" in {
0977 def int_aarch64_tstart : ClangBuiltin<"__builtin_arm_tstart">,
0978 Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>;
0979
0980 def int_aarch64_tcommit : ClangBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>;
0981
0982 def int_aarch64_tcancel : ClangBuiltin<"__builtin_arm_tcancel">,
0983 Intrinsic<[], [llvm_i64_ty], [IntrWillReturn, ImmArg<ArgIndex<0>>]>;
0984
0985 def int_aarch64_ttest : ClangBuiltin<"__builtin_arm_ttest">,
0986 Intrinsic<[llvm_i64_ty], [],
0987 [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>;
0988
0989 // Armv8.7-A load/store 64-byte intrinsics
0990 defvar data512 = !listsplat(llvm_i64_ty, 8);
0991 def int_aarch64_ld64b: Intrinsic<data512, [llvm_ptr_ty]>;
0992 def int_aarch64_st64b: Intrinsic<[], !listconcat([llvm_ptr_ty], data512)>;
0993 def int_aarch64_st64bv: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
0994 def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
0995
0996 //
0997 // Neon FP8 intrinsics
0998 //
0999
1000 // Conversions
1001 class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
1002 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
1003
1004 def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
1005 def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
1006
1007 def int_aarch64_neon_fp8_fcvtn
1008 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1009 [llvm_anyvector_ty,
1010 LLVMMatchType<1>],
1011 [IntrReadMem, IntrInaccessibleMemOnly]>;
1012 def int_aarch64_neon_fp8_fcvtn2
1013 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1014 [LLVMMatchType<0>,
1015 llvm_anyvector_ty,
1016 LLVMMatchType<1>],
1017 [IntrReadMem, IntrInaccessibleMemOnly]>;
1018
1019 // Dot-product
1020 class AdvSIMD_FP8_DOT_Intrinsic
1021 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1022 [LLVMMatchType<0>,
1023 llvm_anyvector_ty,
1024 LLVMMatchType<1>],
1025 [IntrReadMem, IntrInaccessibleMemOnly]>;
1026 class AdvSIMD_FP8_DOT_LANE_Intrinsic
1027 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1028 [LLVMMatchType<0>,
1029 llvm_anyvector_ty,
1030 llvm_v16i8_ty,
1031 llvm_i32_ty],
1032 [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
1033
1034 def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
1035 def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
1036
1037 def int_aarch64_neon_fp8_fdot4 : AdvSIMD_FP8_DOT_Intrinsic;
1038 def int_aarch64_neon_fp8_fdot4_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
1039
1040
1041 // Fused multiply-add
1042 class AdvSIMD_FP8_FMLA_Intrinsic
1043 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1044 [LLVMMatchType<0>,
1045 llvm_v16i8_ty,
1046 llvm_v16i8_ty],
1047 [IntrReadMem, IntrInaccessibleMemOnly]>;
1048
1049 class AdvSIMD_FP8_FMLA_LANE_Intrinsic
1050 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1051 [LLVMMatchType<0>,
1052 llvm_v16i8_ty,
1053 llvm_v16i8_ty,
1054 llvm_i32_ty],
1055 [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
1056
1057 def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
1058 def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
1059
1060 def int_aarch64_neon_fp8_fmlallbb : AdvSIMD_FP8_FMLA_Intrinsic;
1061 def int_aarch64_neon_fp8_fmlallbt : AdvSIMD_FP8_FMLA_Intrinsic;
1062 def int_aarch64_neon_fp8_fmlalltb : AdvSIMD_FP8_FMLA_Intrinsic;
1063 def int_aarch64_neon_fp8_fmlalltt : AdvSIMD_FP8_FMLA_Intrinsic;
1064
1065 def int_aarch64_neon_fp8_fmlalb_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1066 def int_aarch64_neon_fp8_fmlalt_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1067
1068 def int_aarch64_neon_fp8_fmlallbb_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1069 def int_aarch64_neon_fp8_fmlallbt_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1070 def int_aarch64_neon_fp8_fmlalltb_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1071 def int_aarch64_neon_fp8_fmlalltt_lane : AdvSIMD_FP8_FMLA_LANE_Intrinsic;
1072 }
1073
1074 def llvm_nxv1i1_ty : LLVMType<nxv1i1>;
1075 def llvm_nxv2i1_ty : LLVMType<nxv2i1>;
1076 def llvm_nxv4i1_ty : LLVMType<nxv4i1>;
1077 def llvm_nxv8i1_ty : LLVMType<nxv8i1>;
1078 def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
1079 def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
1080 def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
1081 def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
1082 def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
1083 def llvm_nxv8bf16_ty : LLVMType<nxv8bf16>;
1084 def llvm_nxv4f32_ty : LLVMType<nxv4f32>;
1085 def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
1086
1087 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
1088
1089 class AdvSIMD_1Vec_PredLoad_Intrinsic
1090 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1091 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1092 [IntrReadMem, IntrArgMemOnly]>;
1093
1094 class AdvSIMD_2Vec_PredLoad_Intrinsic
1095 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
1096 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1097 [IntrReadMem, IntrArgMemOnly]>;
1098
1099 class AdvSIMD_3Vec_PredLoad_Intrinsic
1100 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
1101 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1102 [IntrReadMem, IntrArgMemOnly]>;
1103
1104 class AdvSIMD_4Vec_PredLoad_Intrinsic
1105 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
1106 LLVMMatchType<0>],
1107 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1108 [IntrReadMem, IntrArgMemOnly]>;
1109
1110 class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
1111 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1112 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1113 [IntrInaccessibleMemOrArgMemOnly]>;
1114
1115 class AdvSIMD_1Vec_PredStore_Intrinsic
1116 : DefaultAttrsIntrinsic<[],
1117 [llvm_anyvector_ty,
1118 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1119 [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
1120
1121 class AdvSIMD_2Vec_PredStore_Intrinsic
1122 : DefaultAttrsIntrinsic<[],
1123 [llvm_anyvector_ty, LLVMMatchType<0>,
1124 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1125 [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
1126
1127 class AdvSIMD_3Vec_PredStore_Intrinsic
1128 : DefaultAttrsIntrinsic<[],
1129 [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
1130 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1131 [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
1132
1133 class AdvSIMD_4Vec_PredStore_Intrinsic
1134 : DefaultAttrsIntrinsic<[],
1135 [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
1136 LLVMMatchType<0>,
1137 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
1138 [IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
1139
1140 class AdvSIMD_SVE_Index_Intrinsic
1141 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1142 [LLVMVectorElementType<0>,
1143 LLVMVectorElementType<0>],
1144 [IntrNoMem]>;
1145
1146 class AdvSIMD_Merged1VectorArg_Intrinsic
1147 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1148 [LLVMMatchType<0>,
1149 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1150 LLVMMatchType<0>],
1151 [IntrNoMem]>;
1152
1153 class AdvSIMD_2VectorArgIndexed_Intrinsic
1154 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1155 [LLVMMatchType<0>,
1156 LLVMMatchType<0>,
1157 llvm_i32_ty],
1158 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1159
1160 class AdvSIMD_3VectorArgIndexed_Intrinsic
1161 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1162 [LLVMMatchType<0>,
1163 LLVMMatchType<0>,
1164 LLVMMatchType<0>,
1165 llvm_i32_ty],
1166 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1167
1168 class AdvSIMD_Pred1VectorArg_Intrinsic
1169 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1170 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1171 LLVMMatchType<0>],
1172 [IntrNoMem]>;
1173
1174 class AdvSIMD_Pred2VectorArg_Intrinsic
1175 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1176 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1177 LLVMMatchType<0>,
1178 LLVMMatchType<0>],
1179 [IntrNoMem]>;
1180
1181 class AdvSIMD_Pred3VectorArg_Intrinsic
1182 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1183 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1184 LLVMMatchType<0>,
1185 LLVMMatchType<0>,
1186 LLVMMatchType<0>],
1187 [IntrNoMem]>;
1188
1189 class AdvSIMD_SVE_Compare_Intrinsic
1190 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1191 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1192 llvm_anyvector_ty,
1193 LLVMMatchType<0>],
1194 [IntrNoMem]>;
1195
1196 class AdvSIMD_SVE_CompareWide_Intrinsic
1197 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1198 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1199 llvm_anyvector_ty,
1200 llvm_nxv2i64_ty],
1201 [IntrNoMem]>;
1202
1203 class AdvSIMD_SVE_Saturating_Intrinsic
1204 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1205 [LLVMMatchType<0>,
1206 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1207 [IntrNoMem]>;
1208
1209 class AdvSIMD_SVE_SaturatingWithPattern_Intrinsic
1210 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1211 [LLVMMatchType<0>,
1212 llvm_i32_ty,
1213 llvm_i32_ty],
1214 [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
1215
1216 class AdvSIMD_SVE_Saturating_N_Intrinsic<LLVMType T>
1217 : DefaultAttrsIntrinsic<[T],
1218 [T, llvm_anyvector_ty],
1219 [IntrNoMem]>;
1220
1221 class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<LLVMType T>
1222 : DefaultAttrsIntrinsic<[T],
1223 [T, llvm_i32_ty, llvm_i32_ty],
1224 [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
1225
1226 class AdvSIMD_SVE_CNT_Intrinsic
1227 : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>],
1228 [LLVMVectorOfBitcastsToInt<0>,
1229 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1230 llvm_anyvector_ty],
1231 [IntrNoMem]>;
1232
1233 class AdvSIMD_SVE_ReduceWithInit_Intrinsic
1234 : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
1235 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1236 LLVMVectorElementType<0>,
1237 llvm_anyvector_ty],
1238 [IntrNoMem]>;
1239
1240 class AdvSIMD_SVE_ShiftByImm_Intrinsic
1241 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1242 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1243 LLVMMatchType<0>,
1244 llvm_i32_ty],
1245 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1246
1247 class AdvSIMD_SVE_ShiftWide_Intrinsic
1248 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1249 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1250 LLVMMatchType<0>,
1251 llvm_nxv2i64_ty],
1252 [IntrNoMem]>;
1253
1254 class AdvSIMD_SVE_Unpack_Intrinsic
1255 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1256 [LLVMSubdivide2VectorType<0>],
1257 [IntrNoMem]>;
1258
1259 class AdvSIMD_SVE_CADD_Intrinsic
1260 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1261 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1262 LLVMMatchType<0>,
1263 LLVMMatchType<0>,
1264 llvm_i32_ty],
1265 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1266
1267 class AdvSIMD_SVE_CMLA_Intrinsic
1268 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1269 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1270 LLVMMatchType<0>,
1271 LLVMMatchType<0>,
1272 LLVMMatchType<0>,
1273 llvm_i32_ty],
1274 [IntrNoMem, ImmArg<ArgIndex<4>>]>;
1275
1276 class AdvSIMD_SVE_CMLA_LANE_Intrinsic
1277 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1278 [LLVMMatchType<0>,
1279 LLVMMatchType<0>,
1280 LLVMMatchType<0>,
1281 llvm_i32_ty,
1282 llvm_i32_ty],
1283 [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
1284
1285 class AdvSIMD_SVE_DUP_Intrinsic
1286 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1287 [LLVMMatchType<0>,
1288 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1289 LLVMVectorElementType<0>],
1290 [IntrNoMem]>;
1291
1292 class AdvSIMD_SVE_DUP_Unpred_Intrinsic
1293 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>],
1294 [IntrNoMem]>;
1295
1296 class AdvSIMD_SVE_DUPQ_Intrinsic
1297 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1298 [LLVMMatchType<0>,
1299 llvm_i64_ty],
1300 [IntrNoMem]>;
1301
1302 class AdvSIMD_SVE_EXPA_Intrinsic
1303 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1304 [LLVMVectorOfBitcastsToInt<0>],
1305 [IntrNoMem]>;
1306
1307 class AdvSIMD_SVE_FCVT_Intrinsic
1308 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1309 [LLVMMatchType<0>,
1310 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1311 llvm_anyvector_ty],
1312 [IntrNoMem]>;
1313
1314 class AdvSIMD_SVE_FCVTZS_Intrinsic
1315 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1316 [LLVMVectorOfBitcastsToInt<0>,
1317 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1318 llvm_anyvector_ty],
1319 [IntrNoMem]>;
1320
1321 class AdvSIMD_SVE_INSR_Intrinsic
1322 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1323 [LLVMMatchType<0>,
1324 LLVMVectorElementType<0>],
1325 [IntrNoMem]>;
1326
1327 class AdvSIMD_SVE_PTRUE_Intrinsic
1328 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1329 [llvm_i32_ty],
1330 [IntrNoMem, ImmArg<ArgIndex<0>>]>;
1331
1332 class AdvSIMD_SVE_PUNPKHI_Intrinsic
1333 : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>],
1334 [llvm_anyvector_ty],
1335 [IntrNoMem]>;
1336
1337 class AdvSIMD_SVE_SCALE_Intrinsic
1338 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1339 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1340 LLVMMatchType<0>,
1341 LLVMVectorOfBitcastsToInt<0>],
1342 [IntrNoMem]>;
1343
1344 class AdvSIMD_SVE_SCVTF_Intrinsic
1345 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1346 [LLVMMatchType<0>,
1347 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1348 llvm_anyvector_ty],
1349 [IntrNoMem]>;
1350
1351 class AdvSIMD_SVE_TSMUL_Intrinsic
1352 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1353 [LLVMMatchType<0>,
1354 LLVMVectorOfBitcastsToInt<0>],
1355 [IntrNoMem]>;
1356
1357 class AdvSIMD_SVE_CNTB_Intrinsic
1358 : DefaultAttrsIntrinsic<[llvm_i64_ty],
1359 [llvm_i32_ty],
1360 [IntrNoMem, ImmArg<ArgIndex<0>>]>;
1361
1362 class AdvSIMD_SVE_CNTP_Intrinsic
1363 : DefaultAttrsIntrinsic<[llvm_i64_ty],
1364 [llvm_anyvector_ty, LLVMMatchType<0>],
1365 [IntrNoMem]>;
1366
1367 class AdvSIMD_SVE_DOT_Intrinsic
1368 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1369 [LLVMMatchType<0>,
1370 LLVMSubdivide4VectorType<0>,
1371 LLVMSubdivide4VectorType<0>],
1372 [IntrNoMem]>;
1373
1374 class AdvSIMD_SVE_DOT_Indexed_Intrinsic
1375 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1376 [LLVMMatchType<0>,
1377 LLVMSubdivide4VectorType<0>,
1378 LLVMSubdivide4VectorType<0>,
1379 llvm_i32_ty],
1380 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1381
1382 class AdvSIMD_SVE_PTEST_Intrinsic
1383 : DefaultAttrsIntrinsic<[llvm_i1_ty],
1384 [llvm_anyvector_ty,
1385 LLVMMatchType<0>],
1386 [IntrNoMem]>;
1387
1388 class AdvSIMD_SVE_TBL_Intrinsic
1389 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1390 [LLVMMatchType<0>,
1391 LLVMVectorOfBitcastsToInt<0>],
1392 [IntrNoMem]>;
1393
1394 class AdvSIMD_SVE2_TBX_Intrinsic
1395 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1396 [LLVMMatchType<0>,
1397 LLVMMatchType<0>,
1398 LLVMVectorOfBitcastsToInt<0>],
1399 [IntrNoMem]>;
1400
1401 class SVE2_LUTI_Inrinsic
1402 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1403 [LLVMMatchType<0>,
1404 llvm_nxv16i8_ty,
1405 llvm_i32_ty],
1406 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1407
1408 class SVE2_1VectorArg_Long_Intrinsic
1409 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1410 [LLVMSubdivide2VectorType<0>,
1411 llvm_i32_ty],
1412 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
1413
1414 class SVE2_2VectorArg_Long_Intrinsic
1415 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1416 [LLVMSubdivide2VectorType<0>,
1417 LLVMSubdivide2VectorType<0>],
1418 [IntrNoMem]>;
1419
1420 class SVE2_2VectorArgIndexed_Long_Intrinsic
1421 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1422 [LLVMSubdivide2VectorType<0>,
1423 LLVMSubdivide2VectorType<0>,
1424 llvm_i32_ty],
1425 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1426
1427 class SVE2_2VectorArg_Wide_Intrinsic
1428 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1429 [LLVMMatchType<0>,
1430 LLVMSubdivide2VectorType<0>],
1431 [IntrNoMem]>;
1432
1433 class SVE2_2VectorArg_Pred_Long_Intrinsic
1434 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1435 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1436 LLVMMatchType<0>,
1437 LLVMSubdivide2VectorType<0>],
1438 [IntrNoMem]>;
1439
1440 class SVE2_3VectorArg_Long_Intrinsic
1441 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1442 [LLVMMatchType<0>,
1443 LLVMSubdivide2VectorType<0>,
1444 LLVMSubdivide2VectorType<0>],
1445 [IntrNoMem]>;
1446
1447 class SVE2_3VectorArgIndexed_Long_Intrinsic
1448 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1449 [LLVMMatchType<0>,
1450 LLVMSubdivide2VectorType<0>,
1451 LLVMSubdivide2VectorType<0>,
1452 llvm_i32_ty],
1453 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1454
1455 class SVE2_1VectorArg_Narrowing_Intrinsic
1456 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
1457 [llvm_anyvector_ty],
1458 [IntrNoMem]>;
1459
1460 class SVE2_Merged1VectorArg_Narrowing_Intrinsic
1461 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
1462 [LLVMSubdivide2VectorType<0>,
1463 llvm_anyvector_ty],
1464 [IntrNoMem]>;
1465 class SVE2_2VectorArg_Narrowing_Intrinsic
1466 : DefaultAttrsIntrinsic<
1467 [LLVMSubdivide2VectorType<0>],
1468 [llvm_anyvector_ty, LLVMMatchType<0>],
1469 [IntrNoMem]>;
1470
1471 class SVE2_Merged2VectorArg_Narrowing_Intrinsic
1472 : DefaultAttrsIntrinsic<
1473 [LLVMSubdivide2VectorType<0>],
1474 [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
1475 [IntrNoMem]>;
1476
1477 class SVE2_1VectorArg_Imm_Narrowing_Intrinsic
1478 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
1479 [llvm_anyvector_ty, llvm_i32_ty],
1480 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
1481
1482 class SVE2_2VectorArg_Imm_Narrowing_Intrinsic
1483 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
1484 [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty,
1485 llvm_i32_ty],
1486 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1487
1488 class SVE2_CONFLICT_DETECT_Intrinsic
1489 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1490 [llvm_anyptr_ty, LLVMMatchType<1>],
1491 [IntrNoMem]>;
1492
1493 class SVE2_3VectorArg_Indexed_Intrinsic
1494 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1495 [LLVMMatchType<0>,
1496 LLVMSubdivide2VectorType<0>,
1497 LLVMSubdivide2VectorType<0>,
1498 llvm_i32_ty],
1499 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1500
1501 class SVE2_1VectorArgIndexed_Intrinsic
1502 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1503 [LLVMMatchType<0>,
1504 llvm_i32_ty],
1505 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
1506
1507 class AdvSIMD_SVE_CDOT_LANE_Intrinsic
1508 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1509 [LLVMMatchType<0>,
1510 LLVMSubdivide4VectorType<0>,
1511 LLVMSubdivide4VectorType<0>,
1512 llvm_i32_ty,
1513 llvm_i32_ty],
1514 [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
1515
1516 class SVE2_1VectorArg_Pred_Intrinsic
1517 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1518 [llvm_anyvector_ty],
1519 [IntrNoMem]>;
1520
1521 class SVE2_1VectorArgIndexed_Pred_Intrinsic
1522 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1523 [llvm_anyvector_ty, llvm_i32_ty],
1524 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
1525
1526 class SVE2_Pred_1VectorArgIndexed_Intrinsic
1527 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1528 [LLVMMatchType<0>,
1529 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty],
1530 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
1531
1532 class SVE2_Pred_1VectorArg_Intrinsic
1533 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1534 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
1535 [IntrNoMem]>;
1536
1537 // NOTE: There is no relationship between these intrinsics beyond an attempt
1538 // to reuse currently identical class definitions.
1539 class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic;
1540 class AdvSIMD_SVE2_CADD_Intrinsic : AdvSIMD_2VectorArgIndexed_Intrinsic;
1541 class AdvSIMD_SVE2_CMLA_Intrinsic : AdvSIMD_3VectorArgIndexed_Intrinsic;
1542
1543 // This class of intrinsics are not intended to be useful within LLVM IR but
1544 // are instead here to support some of the more regid parts of the ACLE.
1545 class Builtin_SVCVT<LLVMType OUT, LLVMType PRED, LLVMType IN>
1546 : DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
1547 }
1548
1549 //===----------------------------------------------------------------------===//
1550 // SVE
1551
1552 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
1553
1554 class AdvSIMD_SVE_2SVBoolArg_Intrinsic
1555 : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
1556 [llvm_nxv16i1_ty],
1557 [IntrNoMem]>;
1558
1559 class AdvSIMD_SVE_3SVBoolArg_Intrinsic
1560 : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
1561 [llvm_nxv16i1_ty, llvm_nxv16i1_ty],
1562 [IntrNoMem]>;
1563
1564 class AdvSIMD_SVE_Reduce_Intrinsic
1565 : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
1566 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1567 llvm_anyvector_ty],
1568 [IntrNoMem]>;
1569
1570 class AdvSIMD_SVE_V128_Reduce_Intrinsic
1571 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1572 [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
1573 llvm_anyvector_ty],
1574 [IntrNoMem]>;
1575
1576
1577 class AdvSIMD_SVE_SADDV_Reduce_Intrinsic
1578 : DefaultAttrsIntrinsic<[llvm_i64_ty],
1579 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1580 llvm_anyvector_ty],
1581 [IntrNoMem]>;
1582
1583 class AdvSIMD_SVE_WHILE_Intrinsic
1584 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1585 [llvm_anyint_ty, LLVMMatchType<1>],
1586 [IntrNoMem]>;
1587
1588 class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic
1589 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1590 [
1591 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1592 llvm_ptr_ty,
1593 LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
1594 ],
1595 [IntrReadMem, IntrArgMemOnly]>;
1596
1597 class AdvSIMD_GatherLoad_SV_64b_Offsets_WriteFFR_Intrinsic
1598 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1599 [
1600 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1601 llvm_ptr_ty,
1602 LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
1603 ],
1604 [IntrInaccessibleMemOrArgMemOnly]>;
1605
1606 class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic
1607 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1608 [
1609 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1610 llvm_ptr_ty,
1611 LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
1612 ],
1613 [IntrReadMem, IntrArgMemOnly]>;
1614
1615 class AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic
1616 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1617 [
1618 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1619 llvm_ptr_ty,
1620 LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
1621 ],
1622 [IntrInaccessibleMemOrArgMemOnly]>;
1623
1624 class AdvSIMD_GatherLoad_VS_Intrinsic
1625 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1626 [
1627 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1628 llvm_anyvector_ty,
1629 llvm_i64_ty
1630 ],
1631 [IntrReadMem]>;
1632
1633 class AdvSIMD_GatherLoadQ_VS_Intrinsic
1634 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1635 [
1636 llvm_nxv1i1_ty,
1637 llvm_anyvector_ty,
1638 llvm_i64_ty
1639 ],
1640 [IntrReadMem]>;
1641
1642 class AdvSIMD_GatherLoadQ_SV_Intrinsic
1643 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1644 [
1645 llvm_nxv1i1_ty,
1646 llvm_ptr_ty,
1647 llvm_nxv2i64_ty
1648 ],
1649 [IntrReadMem, IntrArgMemOnly]>;
1650
1651 class AdvSIMD_GatherLoad_VS_WriteFFR_Intrinsic
1652 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1653 [
1654 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1655 llvm_anyvector_ty,
1656 llvm_i64_ty
1657 ],
1658 [IntrInaccessibleMemOrArgMemOnly]>;
1659
1660 class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic
1661 : DefaultAttrsIntrinsic<[],
1662 [
1663 llvm_anyvector_ty,
1664 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1665 llvm_ptr_ty,
1666 LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
1667 ],
1668 [IntrWriteMem, IntrArgMemOnly]>;
1669
1670 class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic
1671 : DefaultAttrsIntrinsic<[],
1672 [
1673 llvm_anyvector_ty,
1674 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1675 llvm_ptr_ty,
1676 LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
1677 ],
1678 [IntrWriteMem, IntrArgMemOnly]>;
1679
1680 class AdvSIMD_ScatterStore_VS_Intrinsic
1681 : DefaultAttrsIntrinsic<[],
1682 [
1683 llvm_anyvector_ty,
1684 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
1685 llvm_anyvector_ty, llvm_i64_ty
1686 ],
1687 [IntrWriteMem]>;
1688
1689 class AdvSIMD_ScatterStoreQ_VS_Intrinsic
1690 : DefaultAttrsIntrinsic<[],
1691 [
1692 llvm_anyvector_ty,
1693 llvm_nxv1i1_ty,
1694 llvm_anyvector_ty,
1695 llvm_i64_ty
1696 ],
1697 [IntrWriteMem]>;
1698
1699 class AdvSIMD_ScatterStoreQ_SV_Intrinsic
1700 : DefaultAttrsIntrinsic<[],
1701 [
1702 llvm_anyvector_ty,
1703 llvm_nxv1i1_ty,
1704 llvm_ptr_ty,
1705 llvm_nxv2i64_ty
1706 ],
1707 [IntrWriteMem, IntrArgMemOnly]>;
1708
1709 class SVE_gather_prf_SV
1710 : DefaultAttrsIntrinsic<[],
1711 [
1712 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
1713 llvm_ptr_ty, // Base address
1714 llvm_anyvector_ty, // Offsets
1715 llvm_i32_ty // Prfop
1716 ],
1717 [IntrInaccessibleMemOrArgMemOnly, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>;
1718
1719 class SVE_gather_prf_VS
1720 : DefaultAttrsIntrinsic<[],
1721 [
1722 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate
1723 llvm_anyvector_ty, // Base addresses
1724 llvm_i64_ty, // Scalar offset
1725 llvm_i32_ty // Prfop
1726 ],
1727 [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<3>>]>;
1728
1729 class SVE_MatMul_Intrinsic
1730 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1731 [LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>],
1732 [IntrNoMem]>;
1733
1734 class SVE_4Vec_BF16
1735 : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
1736 [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
1737 [IntrNoMem]>;
1738
1739 class SVE_4Vec_BF16_Indexed
1740 : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
1741 [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
1742 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
1743
1744 //
1745 // Loads
1746 //
1747
1748 def int_aarch64_sve_ld1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
1749
1750 def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
1751 def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
1752 def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
1753
1754 def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
1755 def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
1756 def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
1757
1758 def int_aarch64_sve_ld1rq : AdvSIMD_1Vec_PredLoad_Intrinsic;
1759 def int_aarch64_sve_ld1ro : AdvSIMD_1Vec_PredLoad_Intrinsic;
1760
1761 //
1762 // Stores
1763 //
1764
1765 def int_aarch64_sve_st1 : AdvSIMD_1Vec_PredStore_Intrinsic;
1766 def int_aarch64_sve_st2 : AdvSIMD_2Vec_PredStore_Intrinsic;
1767 def int_aarch64_sve_st3 : AdvSIMD_3Vec_PredStore_Intrinsic;
1768 def int_aarch64_sve_st4 : AdvSIMD_4Vec_PredStore_Intrinsic;
1769
1770 def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic;
1771
1772 //
1773 // Prefetches
1774 //
1775
1776 def int_aarch64_sve_prf
1777 : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty],
1778 [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
1779
1780 // Scalar + 32-bit scaled offset vector, zero extend, packed and
1781 // unpacked.
1782 def int_aarch64_sve_prfb_gather_uxtw_index : SVE_gather_prf_SV;
1783 def int_aarch64_sve_prfh_gather_uxtw_index : SVE_gather_prf_SV;
1784 def int_aarch64_sve_prfw_gather_uxtw_index : SVE_gather_prf_SV;
1785 def int_aarch64_sve_prfd_gather_uxtw_index : SVE_gather_prf_SV;
1786
1787 // Scalar + 32-bit scaled offset vector, sign extend, packed and
1788 // unpacked.
1789 def int_aarch64_sve_prfb_gather_sxtw_index : SVE_gather_prf_SV;
1790 def int_aarch64_sve_prfw_gather_sxtw_index : SVE_gather_prf_SV;
1791 def int_aarch64_sve_prfh_gather_sxtw_index : SVE_gather_prf_SV;
1792 def int_aarch64_sve_prfd_gather_sxtw_index : SVE_gather_prf_SV;
1793
1794 // Scalar + 64-bit scaled offset vector.
1795 def int_aarch64_sve_prfb_gather_index : SVE_gather_prf_SV;
1796 def int_aarch64_sve_prfh_gather_index : SVE_gather_prf_SV;
1797 def int_aarch64_sve_prfw_gather_index : SVE_gather_prf_SV;
1798 def int_aarch64_sve_prfd_gather_index : SVE_gather_prf_SV;
1799
1800 // Vector + scalar.
1801 def int_aarch64_sve_prfb_gather_scalar_offset : SVE_gather_prf_VS;
1802 def int_aarch64_sve_prfh_gather_scalar_offset : SVE_gather_prf_VS;
1803 def int_aarch64_sve_prfw_gather_scalar_offset : SVE_gather_prf_VS;
1804 def int_aarch64_sve_prfd_gather_scalar_offset : SVE_gather_prf_VS;
1805
1806 //
1807 // Scalar to vector operations
1808 //
1809
1810 def int_aarch64_sve_dup : AdvSIMD_SVE_DUP_Intrinsic;
1811 def int_aarch64_sve_dup_x : AdvSIMD_SVE_DUP_Unpred_Intrinsic;
1812
1813 def int_aarch64_sve_index : AdvSIMD_SVE_Index_Intrinsic;
1814
1815 //
1816 // Address calculation
1817 //
1818
1819 def int_aarch64_sve_adrb : AdvSIMD_2VectorArg_Intrinsic;
1820 def int_aarch64_sve_adrh : AdvSIMD_2VectorArg_Intrinsic;
1821 def int_aarch64_sve_adrw : AdvSIMD_2VectorArg_Intrinsic;
1822 def int_aarch64_sve_adrd : AdvSIMD_2VectorArg_Intrinsic;
1823
1824 //
1825 // Integer arithmetic
1826 //
1827
1828 def int_aarch64_sve_add : AdvSIMD_Pred2VectorArg_Intrinsic;
1829 def int_aarch64_sve_add_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1830 def int_aarch64_sve_sub : AdvSIMD_Pred2VectorArg_Intrinsic;
1831 def int_aarch64_sve_sub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1832 def int_aarch64_sve_subr : AdvSIMD_Pred2VectorArg_Intrinsic;
1833
1834 def int_aarch64_sve_pmul : AdvSIMD_2VectorArg_Intrinsic;
1835
1836 def int_aarch64_sve_mul : AdvSIMD_Pred2VectorArg_Intrinsic;
1837 def int_aarch64_sve_mul_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1838 def int_aarch64_sve_mul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
1839 def int_aarch64_sve_smulh : AdvSIMD_Pred2VectorArg_Intrinsic;
1840 def int_aarch64_sve_smulh_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1841 def int_aarch64_sve_umulh : AdvSIMD_Pred2VectorArg_Intrinsic;
1842 def int_aarch64_sve_umulh_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1843
1844 def int_aarch64_sve_sdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
1845 def int_aarch64_sve_sdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1846 def int_aarch64_sve_udiv : AdvSIMD_Pred2VectorArg_Intrinsic;
1847 def int_aarch64_sve_udiv_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1848 def int_aarch64_sve_sdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
1849 def int_aarch64_sve_udivr : AdvSIMD_Pred2VectorArg_Intrinsic;
1850
1851 def int_aarch64_sve_smax : AdvSIMD_Pred2VectorArg_Intrinsic;
1852 def int_aarch64_sve_smax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1853 def int_aarch64_sve_umax : AdvSIMD_Pred2VectorArg_Intrinsic;
1854 def int_aarch64_sve_umax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1855 def int_aarch64_sve_smin : AdvSIMD_Pred2VectorArg_Intrinsic;
1856 def int_aarch64_sve_smin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1857 def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic;
1858 def int_aarch64_sve_umin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1859 def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic;
1860 def int_aarch64_sve_sabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1861 def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic;
1862 def int_aarch64_sve_uabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1863
1864 def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic;
1865 def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic;
1866 def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic;
1867 def int_aarch64_sve_mla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
1868 def int_aarch64_sve_mla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
1869 def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic;
1870 def int_aarch64_sve_mls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
1871 def int_aarch64_sve_mls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
1872
1873 def int_aarch64_sve_saddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic;
1874 def int_aarch64_sve_uaddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic;
1875
1876 def int_aarch64_sve_smaxv : AdvSIMD_SVE_Reduce_Intrinsic;
1877 def int_aarch64_sve_umaxv : AdvSIMD_SVE_Reduce_Intrinsic;
1878 def int_aarch64_sve_sminv : AdvSIMD_SVE_Reduce_Intrinsic;
1879 def int_aarch64_sve_uminv : AdvSIMD_SVE_Reduce_Intrinsic;
1880
1881 def int_aarch64_sve_orv : AdvSIMD_SVE_Reduce_Intrinsic;
1882 def int_aarch64_sve_eorv : AdvSIMD_SVE_Reduce_Intrinsic;
1883 def int_aarch64_sve_andv : AdvSIMD_SVE_Reduce_Intrinsic;
1884
1885 def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic;
1886 def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic;
1887
1888 def int_aarch64_sve_sdot : AdvSIMD_SVE_DOT_Intrinsic;
1889 def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
1890
1891 def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
1892 def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
1893
1894 def int_aarch64_sve_sqadd_x : AdvSIMD_2VectorArg_Intrinsic;
1895 def int_aarch64_sve_sqsub_x : AdvSIMD_2VectorArg_Intrinsic;
1896 def int_aarch64_sve_uqadd_x : AdvSIMD_2VectorArg_Intrinsic;
1897 def int_aarch64_sve_uqsub_x : AdvSIMD_2VectorArg_Intrinsic;
1898
1899 def int_aarch64_sve_orqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1900 def int_aarch64_sve_eorqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1901 def int_aarch64_sve_andqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1902 def int_aarch64_sve_addqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1903 def int_aarch64_sve_smaxqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1904 def int_aarch64_sve_umaxqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1905 def int_aarch64_sve_sminqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1906 def int_aarch64_sve_uminqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
1907
1908
1909 // Shifts
1910
1911 def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic;
1912 def int_aarch64_sve_asr_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1913 def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1914 def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic;
1915 def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic;
1916 def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic;
1917 def int_aarch64_sve_lsl_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1918 def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1919 def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic;
1920 def int_aarch64_sve_lsr_u : AdvSIMD_Pred2VectorArg_Intrinsic;
1921 def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
1922
1923 //
1924 // Integer comparisons
1925 //
1926
1927 def int_aarch64_sve_cmpeq : AdvSIMD_SVE_Compare_Intrinsic;
1928 def int_aarch64_sve_cmpge : AdvSIMD_SVE_Compare_Intrinsic;
1929 def int_aarch64_sve_cmpgt : AdvSIMD_SVE_Compare_Intrinsic;
1930 def int_aarch64_sve_cmphi : AdvSIMD_SVE_Compare_Intrinsic;
1931 def int_aarch64_sve_cmphs : AdvSIMD_SVE_Compare_Intrinsic;
1932 def int_aarch64_sve_cmpne : AdvSIMD_SVE_Compare_Intrinsic;
1933
1934 def int_aarch64_sve_cmpeq_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1935 def int_aarch64_sve_cmpge_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1936 def int_aarch64_sve_cmpgt_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1937 def int_aarch64_sve_cmphi_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1938 def int_aarch64_sve_cmphs_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1939 def int_aarch64_sve_cmple_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1940 def int_aarch64_sve_cmplo_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1941 def int_aarch64_sve_cmpls_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1942 def int_aarch64_sve_cmplt_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1943 def int_aarch64_sve_cmpne_wide : AdvSIMD_SVE_CompareWide_Intrinsic;
1944
1945 //
1946 // Counting bits
1947 //
1948
1949 def int_aarch64_sve_cls : AdvSIMD_Merged1VectorArg_Intrinsic;
1950 def int_aarch64_sve_clz : AdvSIMD_Merged1VectorArg_Intrinsic;
1951 def int_aarch64_sve_cnt : AdvSIMD_SVE_CNT_Intrinsic;
1952
1953 //
1954 // Counting elements
1955 //
1956
1957 def int_aarch64_sve_cntb : AdvSIMD_SVE_CNTB_Intrinsic;
1958 def int_aarch64_sve_cnth : AdvSIMD_SVE_CNTB_Intrinsic;
1959 def int_aarch64_sve_cntw : AdvSIMD_SVE_CNTB_Intrinsic;
1960 def int_aarch64_sve_cntd : AdvSIMD_SVE_CNTB_Intrinsic;
1961
1962 def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic;
1963
1964 //
1965 // FFR manipulation
1966 //
1967
1968 def int_aarch64_sve_rdffr : ClangBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [], [IntrReadMem, IntrInaccessibleMemOnly]>;
1969 def int_aarch64_sve_rdffr_z : ClangBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
1970 def int_aarch64_sve_setffr : ClangBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
1971 def int_aarch64_sve_wrffr : ClangBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
1972
1973 //
1974 // Saturating scalar arithmetic
1975 //
1976
1977 def int_aarch64_sve_sqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1978 def int_aarch64_sve_sqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1979 def int_aarch64_sve_sqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1980 def int_aarch64_sve_sqdecp : AdvSIMD_SVE_Saturating_Intrinsic;
1981
1982 def int_aarch64_sve_sqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
1983 def int_aarch64_sve_sqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
1984 def int_aarch64_sve_sqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
1985 def int_aarch64_sve_sqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
1986 def int_aarch64_sve_sqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
1987 def int_aarch64_sve_sqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
1988 def int_aarch64_sve_sqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
1989 def int_aarch64_sve_sqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
1990 def int_aarch64_sve_sqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
1991 def int_aarch64_sve_sqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
1992
1993 def int_aarch64_sve_sqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1994 def int_aarch64_sve_sqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1995 def int_aarch64_sve_sqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
1996 def int_aarch64_sve_sqincp : AdvSIMD_SVE_Saturating_Intrinsic;
1997
1998 def int_aarch64_sve_sqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
1999 def int_aarch64_sve_sqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2000 def int_aarch64_sve_sqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2001 def int_aarch64_sve_sqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2002 def int_aarch64_sve_sqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2003 def int_aarch64_sve_sqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2004 def int_aarch64_sve_sqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2005 def int_aarch64_sve_sqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2006 def int_aarch64_sve_sqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
2007 def int_aarch64_sve_sqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
2008
2009 def int_aarch64_sve_uqdech : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2010 def int_aarch64_sve_uqdecw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2011 def int_aarch64_sve_uqdecd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2012 def int_aarch64_sve_uqdecp : AdvSIMD_SVE_Saturating_Intrinsic;
2013
2014 def int_aarch64_sve_uqdecb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2015 def int_aarch64_sve_uqdecb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2016 def int_aarch64_sve_uqdech_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2017 def int_aarch64_sve_uqdech_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2018 def int_aarch64_sve_uqdecw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2019 def int_aarch64_sve_uqdecw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2020 def int_aarch64_sve_uqdecd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2021 def int_aarch64_sve_uqdecd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2022 def int_aarch64_sve_uqdecp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
2023 def int_aarch64_sve_uqdecp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
2024
2025 def int_aarch64_sve_uqinch : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2026 def int_aarch64_sve_uqincw : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2027 def int_aarch64_sve_uqincd : AdvSIMD_SVE_SaturatingWithPattern_Intrinsic;
2028 def int_aarch64_sve_uqincp : AdvSIMD_SVE_Saturating_Intrinsic;
2029
2030 def int_aarch64_sve_uqincb_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2031 def int_aarch64_sve_uqincb_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2032 def int_aarch64_sve_uqinch_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2033 def int_aarch64_sve_uqinch_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2034 def int_aarch64_sve_uqincw_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2035 def int_aarch64_sve_uqincw_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2036 def int_aarch64_sve_uqincd_n32 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i32_ty>;
2037 def int_aarch64_sve_uqincd_n64 : AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<llvm_i64_ty>;
2038 def int_aarch64_sve_uqincp_n32 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i32_ty>;
2039 def int_aarch64_sve_uqincp_n64 : AdvSIMD_SVE_Saturating_N_Intrinsic<llvm_i64_ty>;
2040
2041 //
2042 // Reversal
2043 //
2044
2045 def int_aarch64_sve_rbit : AdvSIMD_Merged1VectorArg_Intrinsic;
2046 def int_aarch64_sve_revb : AdvSIMD_Merged1VectorArg_Intrinsic;
2047 def int_aarch64_sve_revh : AdvSIMD_Merged1VectorArg_Intrinsic;
2048 def int_aarch64_sve_revw : AdvSIMD_Merged1VectorArg_Intrinsic;
2049
2050 //
2051 // Permutations and selection
2052 //
2053
2054 def int_aarch64_sve_clasta : AdvSIMD_Pred2VectorArg_Intrinsic;
2055 def int_aarch64_sve_clasta_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
2056 def int_aarch64_sve_clastb : AdvSIMD_Pred2VectorArg_Intrinsic;
2057 def int_aarch64_sve_clastb_n : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
2058 def int_aarch64_sve_compact : AdvSIMD_Pred1VectorArg_Intrinsic;
2059 def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
2060 def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
2061 def int_aarch64_sve_ext : AdvSIMD_2VectorArgIndexed_Intrinsic;
2062 def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic;
2063 def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;
2064 def int_aarch64_sve_lastb : AdvSIMD_SVE_Reduce_Intrinsic;
2065 def int_aarch64_sve_rev : AdvSIMD_1VectorArg_Intrinsic;
2066 def int_aarch64_sve_rev_b16 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
2067 def int_aarch64_sve_rev_b32 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
2068 def int_aarch64_sve_rev_b64 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
2069 def int_aarch64_sve_splice : AdvSIMD_Pred2VectorArg_Intrinsic;
2070 def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
2071 def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
2072 def int_aarch64_sve_tbl : AdvSIMD_SVE_TBL_Intrinsic;
2073 def int_aarch64_sve_trn1 : AdvSIMD_2VectorArg_Intrinsic;
2074 def int_aarch64_sve_trn1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2075 def int_aarch64_sve_trn1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2076 def int_aarch64_sve_trn1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2077 def int_aarch64_sve_trn2 : AdvSIMD_2VectorArg_Intrinsic;
2078 def int_aarch64_sve_trn2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2079 def int_aarch64_sve_trn2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2080 def int_aarch64_sve_trn2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2081 def int_aarch64_sve_trn1q : AdvSIMD_2VectorArg_Intrinsic;
2082 def int_aarch64_sve_trn2q : AdvSIMD_2VectorArg_Intrinsic;
2083 def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
2084 def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
2085 def int_aarch64_sve_uzp1 : AdvSIMD_2VectorArg_Intrinsic;
2086 def int_aarch64_sve_uzp1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2087 def int_aarch64_sve_uzp1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2088 def int_aarch64_sve_uzp1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2089 def int_aarch64_sve_uzp2 : AdvSIMD_2VectorArg_Intrinsic;
2090 def int_aarch64_sve_uzp2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2091 def int_aarch64_sve_uzp2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2092 def int_aarch64_sve_uzp2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2093 def int_aarch64_sve_uzp1q : AdvSIMD_2VectorArg_Intrinsic;
2094 def int_aarch64_sve_uzp2q : AdvSIMD_2VectorArg_Intrinsic;
2095 def int_aarch64_sve_zip1 : AdvSIMD_2VectorArg_Intrinsic;
2096 def int_aarch64_sve_zip1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2097 def int_aarch64_sve_zip1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2098 def int_aarch64_sve_zip1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2099 def int_aarch64_sve_zip2 : AdvSIMD_2VectorArg_Intrinsic;
2100 def int_aarch64_sve_zip2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2101 def int_aarch64_sve_zip2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2102 def int_aarch64_sve_zip2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
2103 def int_aarch64_sve_zip1q : AdvSIMD_2VectorArg_Intrinsic;
2104 def int_aarch64_sve_zip2q : AdvSIMD_2VectorArg_Intrinsic;
2105
2106 //
2107 // Logical operations
2108 //
2109
2110 def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic;
2111 def int_aarch64_sve_and_u: AdvSIMD_Pred2VectorArg_Intrinsic;
2112 def int_aarch64_sve_bic : AdvSIMD_Pred2VectorArg_Intrinsic;
2113 def int_aarch64_sve_bic_u: AdvSIMD_Pred2VectorArg_Intrinsic;
2114 def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic;
2115 def int_aarch64_sve_eor : AdvSIMD_Pred2VectorArg_Intrinsic;
2116 def int_aarch64_sve_eor_u: AdvSIMD_Pred2VectorArg_Intrinsic;
2117 def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic;
2118 def int_aarch64_sve_orr : AdvSIMD_Pred2VectorArg_Intrinsic;
2119 def int_aarch64_sve_orr_u: AdvSIMD_Pred2VectorArg_Intrinsic;
2120
2121 //
2122 // Conversion
2123 //
2124
2125 def int_aarch64_sve_sxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
2126 def int_aarch64_sve_sxth : AdvSIMD_Merged1VectorArg_Intrinsic;
2127 def int_aarch64_sve_sxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
2128 def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
2129 def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic;
2130 def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
2131
2132 //
2133 // While comparisons
2134 //
2135
2136 def int_aarch64_sve_whilele : AdvSIMD_SVE_WHILE_Intrinsic;
2137 def int_aarch64_sve_whilelo : AdvSIMD_SVE_WHILE_Intrinsic;
2138 def int_aarch64_sve_whilels : AdvSIMD_SVE_WHILE_Intrinsic;
2139 def int_aarch64_sve_whilelt : AdvSIMD_SVE_WHILE_Intrinsic;
2140 def int_aarch64_sve_whilege : AdvSIMD_SVE_WHILE_Intrinsic;
2141 def int_aarch64_sve_whilegt : AdvSIMD_SVE_WHILE_Intrinsic;
2142 def int_aarch64_sve_whilehs : AdvSIMD_SVE_WHILE_Intrinsic;
2143 def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic;
2144
2145 //
2146 // Floating-point arithmetic
2147 //
2148
2149 def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
2150 def int_aarch64_sve_fabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2151 def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic;
2152 def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2153 def int_aarch64_sve_fadd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2154 def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
2155 def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
2156 def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
2157 def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
2158 def int_aarch64_sve_fdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2159 def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
2160 def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic;
2161 def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
2162 def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
2163 def int_aarch64_sve_fmax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2164 def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
2165 def int_aarch64_sve_fmaxnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2166 def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
2167 def int_aarch64_sve_fmin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2168 def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
2169 def int_aarch64_sve_fminnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2170 def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
2171 def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
2172 def int_aarch64_sve_fmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
2173 def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
2174 def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
2175 def int_aarch64_sve_fmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
2176 def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
2177 def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
2178 def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
2179 def int_aarch64_sve_fmul_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2180 def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
2181 def int_aarch64_sve_fmulx_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2182 def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic;
2183 def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
2184 def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
2185 def int_aarch64_sve_fnmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
2186 def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
2187 def int_aarch64_sve_fnmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
2188 def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
2189 def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic;
2190 def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic;
2191 def int_aarch64_sve_frecpx : AdvSIMD_Merged1VectorArg_Intrinsic;
2192 def int_aarch64_sve_frinta : AdvSIMD_Merged1VectorArg_Intrinsic;
2193 def int_aarch64_sve_frinti : AdvSIMD_Merged1VectorArg_Intrinsic;
2194 def int_aarch64_sve_frintm : AdvSIMD_Merged1VectorArg_Intrinsic;
2195 def int_aarch64_sve_frintn : AdvSIMD_Merged1VectorArg_Intrinsic;
2196 def int_aarch64_sve_frintp : AdvSIMD_Merged1VectorArg_Intrinsic;
2197 def int_aarch64_sve_frintx : AdvSIMD_Merged1VectorArg_Intrinsic;
2198 def int_aarch64_sve_frintz : AdvSIMD_Merged1VectorArg_Intrinsic;
2199 def int_aarch64_sve_frsqrte_x : AdvSIMD_1VectorArg_Intrinsic;
2200 def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic;
2201 def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
2202 def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic;
2203 def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
2204 def int_aarch64_sve_fsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2205 def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
2206 def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
2207 def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
2208 def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;
2209
2210 //
2211 // Floating-point reductions
2212 //
2213
2214 def int_aarch64_sve_fadda : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
2215 def int_aarch64_sve_faddv : AdvSIMD_SVE_Reduce_Intrinsic;
2216 def int_aarch64_sve_fmaxv : AdvSIMD_SVE_Reduce_Intrinsic;
2217 def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic;
2218 def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic;
2219 def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic;
2220
2221 def int_aarch64_sve_faddqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
2222 def int_aarch64_sve_fmaxnmqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
2223 def int_aarch64_sve_fminnmqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
2224 def int_aarch64_sve_fmaxqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
2225 def int_aarch64_sve_fminqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
2226
2227 //
2228 // Floating-point conversions
2229 //
2230
2231 def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic;
2232 def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic;
2233 def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic;
2234 def int_aarch64_sve_scvtf : AdvSIMD_SVE_SCVTF_Intrinsic;
2235 def int_aarch64_sve_ucvtf : AdvSIMD_SVE_SCVTF_Intrinsic;
2236
2237 //
2238 // Floating-point comparisons
2239 //
2240
2241 def int_aarch64_sve_facge : AdvSIMD_SVE_Compare_Intrinsic;
2242 def int_aarch64_sve_facgt : AdvSIMD_SVE_Compare_Intrinsic;
2243
2244 def int_aarch64_sve_fcmpeq : AdvSIMD_SVE_Compare_Intrinsic;
2245 def int_aarch64_sve_fcmpge : AdvSIMD_SVE_Compare_Intrinsic;
2246 def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
2247 def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
2248 def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
2249
2250 def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
2251 def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2252 def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
2253 def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
2254
2255 def int_aarch64_sve_fcvt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2256 def int_aarch64_sve_fcvtnt_bf16f32_v2 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2257
2258 def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
2259 def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2260 def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
2261 def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
2262
2263 def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2264 def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2265 def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2266
2267 def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
2268 def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
2269 def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
2270
2271 def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
2272 def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
2273 def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
2274 def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2275
2276 def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2277 def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
2278
2279 def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
2280 def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
2281 def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
2282 def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
2283
2284 def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
2285 def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
2286 def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
2287 def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
2288
2289 //
2290 // Predicate creation
2291 //
2292
2293 def int_aarch64_sve_ptrue : AdvSIMD_SVE_PTRUE_Intrinsic;
2294
2295 //
2296 // Predicate operations
2297 //
2298
2299 def int_aarch64_sve_and_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2300 def int_aarch64_sve_bic_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2301 def int_aarch64_sve_brka : AdvSIMD_Merged1VectorArg_Intrinsic;
2302 def int_aarch64_sve_brka_z : AdvSIMD_Pred1VectorArg_Intrinsic;
2303 def int_aarch64_sve_brkb : AdvSIMD_Merged1VectorArg_Intrinsic;
2304 def int_aarch64_sve_brkb_z : AdvSIMD_Pred1VectorArg_Intrinsic;
2305 def int_aarch64_sve_brkn_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2306 def int_aarch64_sve_brkpa_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2307 def int_aarch64_sve_brkpb_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2308 def int_aarch64_sve_eor_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2309 def int_aarch64_sve_nand_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2310 def int_aarch64_sve_nor_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2311 def int_aarch64_sve_orn_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2312 def int_aarch64_sve_orr_z : AdvSIMD_Pred2VectorArg_Intrinsic;
2313 def int_aarch64_sve_pfirst : AdvSIMD_Pred1VectorArg_Intrinsic;
2314 def int_aarch64_sve_pnext : AdvSIMD_Pred1VectorArg_Intrinsic;
2315 def int_aarch64_sve_punpkhi : AdvSIMD_SVE_PUNPKHI_Intrinsic;
2316 def int_aarch64_sve_punpklo : AdvSIMD_SVE_PUNPKHI_Intrinsic;
2317
2318 //
2319 // Testing predicates
2320 //
2321
2322 def int_aarch64_sve_ptest_any : AdvSIMD_SVE_PTEST_Intrinsic;
2323 def int_aarch64_sve_ptest_first : AdvSIMD_SVE_PTEST_Intrinsic;
2324 def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
2325
2326 //
2327 // Reinterpreting data
2328 //
2329
2330 def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_any_ty],
2331 [llvm_nxv16i1_ty],
2332 [IntrNoMem]>;
2333
2334 def int_aarch64_sve_convert_to_svbool : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
2335 [llvm_any_ty],
2336 [IntrNoMem]>;
2337
2338 //
2339 // Gather loads: scalar base + vector offsets
2340 //
2341
2342 // 64 bit unscaled offsets
2343 def int_aarch64_sve_ld1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
2344
2345 // 64 bit scaled offsets
2346 def int_aarch64_sve_ld1_gather_index : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
2347
2348 // 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
2349 def int_aarch64_sve_ld1_gather_sxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
2350 def int_aarch64_sve_ld1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
2351
2352 // 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
2353 def int_aarch64_sve_ld1_gather_sxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
2354 def int_aarch64_sve_ld1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
2355
2356 // 128-bit loads, scaled offsets (indices)
2357 def int_aarch64_sve_ld1q_gather_index : AdvSIMD_GatherLoadQ_SV_Intrinsic;
2358
2359 // 128-bit loads, unscaled offsets
2360 def int_aarch64_sve_ld1q_gather_vector_offset : AdvSIMD_GatherLoadQ_SV_Intrinsic;
2361
2362 //
2363 // Gather loads: vector base + scalar offset
2364 //
2365
2366 def int_aarch64_sve_ld1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;
2367
2368 // 128-bit loads, unscaled offsets
2369 def int_aarch64_sve_ld1q_gather_scalar_offset : AdvSIMD_GatherLoadQ_VS_Intrinsic;
2370
2371 //
2372 // First-faulting gather loads: scalar base + vector offsets
2373 //
2374
2375 // 64 bit unscaled offsets
2376 def int_aarch64_sve_ldff1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_WriteFFR_Intrinsic;
2377
2378 // 64 bit scaled offsets
2379 def int_aarch64_sve_ldff1_gather_index : AdvSIMD_GatherLoad_SV_64b_Offsets_WriteFFR_Intrinsic;
2380
2381 // 32 bit unscaled offsets, sign (sxtw) or zero (uxtw) extended to 64 bits
2382 def int_aarch64_sve_ldff1_gather_sxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic;
2383 def int_aarch64_sve_ldff1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic;
2384
2385 // 32 bit scaled offsets, sign (sxtw) or zero (uxtw) extended to 64 bits
2386 def int_aarch64_sve_ldff1_gather_sxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic;
2387 def int_aarch64_sve_ldff1_gather_uxtw_index : AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic;
2388
2389 //
2390 // First-faulting gather loads: vector base + scalar offset
2391 //
2392
2393 def int_aarch64_sve_ldff1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_WriteFFR_Intrinsic;
2394
2395
2396 //
2397 // Non-temporal gather loads: scalar base + vector offsets
2398 //
2399
2400 // 64 bit unscaled offsets
2401 def int_aarch64_sve_ldnt1_gather : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
2402
2403 // 64 bit indices
2404 def int_aarch64_sve_ldnt1_gather_index : AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic;
2405
2406 // 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
2407 def int_aarch64_sve_ldnt1_gather_uxtw : AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic;
2408
2409 //
2410 // Non-temporal gather loads: vector base + scalar offset
2411 //
2412
2413 def int_aarch64_sve_ldnt1_gather_scalar_offset : AdvSIMD_GatherLoad_VS_Intrinsic;
2414
2415 //
2416 // Scatter stores: scalar base + vector offsets
2417 //
2418
2419 // 64 bit unscaled offsets
2420 def int_aarch64_sve_st1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;
2421
2422 // 64 bit scaled offsets
2423 def int_aarch64_sve_st1_scatter_index
2424 : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;
2425
2426 // 32 bit unscaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
2427 def int_aarch64_sve_st1_scatter_sxtw
2428 : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;
2429
2430 def int_aarch64_sve_st1_scatter_uxtw
2431 : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;
2432
2433 // 32 bit scaled offsets, sign (sxtw) or zero (zxtw) extended to 64 bits
2434 def int_aarch64_sve_st1_scatter_sxtw_index
2435 : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;
2436
2437 def int_aarch64_sve_st1_scatter_uxtw_index
2438 : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;
2439
2440 // 128-bit stores, scaled offsets (indices)
2441 def int_aarch64_sve_st1q_scatter_index : AdvSIMD_ScatterStoreQ_SV_Intrinsic;
2442
2443 // 128-bit stores, unscaled offsets
2444 def int_aarch64_sve_st1q_scatter_vector_offset : AdvSIMD_ScatterStoreQ_SV_Intrinsic;
2445
2446 //
2447 // Scatter stores: vector base + scalar offset
2448 //
2449
2450 def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;
2451
2452 // 128-bit stores, unscaled offsets
2453 def int_aarch64_sve_st1q_scatter_scalar_offset : AdvSIMD_ScatterStoreQ_VS_Intrinsic;
2454
2455 //
2456 // Non-temporal scatter stores: scalar base + vector offsets
2457 //
2458
2459 // 64 bit unscaled offsets
2460 def int_aarch64_sve_stnt1_scatter : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;
2461
2462 // 64 bit indices
2463 def int_aarch64_sve_stnt1_scatter_index
2464 : AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic;
2465
2466 // 32 bit unscaled offsets, zero (zxtw) extended to 64 bits
2467 def int_aarch64_sve_stnt1_scatter_uxtw : AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic;
2468
2469 //
2470 // Non-temporal scatter stores: vector base + scalar offset
2471 //
2472
2473 def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intrinsic;
2474
2475 //
2476 // SVE2 - Uniform DSP operations
2477 //
2478
2479 def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic;
2480 def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2481 def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic;
2482 def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
2483 def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic;
2484 def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic;
2485 def int_aarch64_sve_sqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2486 def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic;
2487 def int_aarch64_sve_sqdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
2488 def int_aarch64_sve_sqneg : AdvSIMD_Merged1VectorArg_Intrinsic;
2489 def int_aarch64_sve_sqrdmlah : AdvSIMD_3VectorArg_Intrinsic;
2490 def int_aarch64_sve_sqrdmlah_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
2491 def int_aarch64_sve_sqrdmlsh : AdvSIMD_3VectorArg_Intrinsic;
2492 def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
2493 def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic;
2494 def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
2495 def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2496 def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2497 def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic;
2498 def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
2499 def int_aarch64_sve_sqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2500 def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
2501 def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2502 def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic;
2503 def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2504 def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic;
2505 def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic;
2506 def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic;
2507 def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2508 def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic;
2509 def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2510 def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic;
2511 def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
2512 def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2513 def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2514 def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2515 def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
2516 def int_aarch64_sve_uqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
2517 def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
2518 def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic;
2519 def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2520 def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic;
2521 def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic;
2522 def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic;
2523 def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic;
2524 def int_aarch64_sve_usqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
2525 def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic;
2526
2527 //
2528 // SVE2 - Widening DSP operations
2529 //
2530
2531 def int_aarch64_sve_sabalb : SVE2_3VectorArg_Long_Intrinsic;
2532 def int_aarch64_sve_sabalt : SVE2_3VectorArg_Long_Intrinsic;
2533 def int_aarch64_sve_sabdlb : SVE2_2VectorArg_Long_Intrinsic;
2534 def int_aarch64_sve_sabdlt : SVE2_2VectorArg_Long_Intrinsic;
2535 def int_aarch64_sve_saddlb : SVE2_2VectorArg_Long_Intrinsic;
2536 def int_aarch64_sve_saddlt : SVE2_2VectorArg_Long_Intrinsic;
2537 def int_aarch64_sve_saddwb : SVE2_2VectorArg_Wide_Intrinsic;
2538 def int_aarch64_sve_saddwt : SVE2_2VectorArg_Wide_Intrinsic;
2539 def int_aarch64_sve_sshllb : SVE2_1VectorArg_Long_Intrinsic;
2540 def int_aarch64_sve_sshllt : SVE2_1VectorArg_Long_Intrinsic;
2541 def int_aarch64_sve_ssublb : SVE2_2VectorArg_Long_Intrinsic;
2542 def int_aarch64_sve_ssublt : SVE2_2VectorArg_Long_Intrinsic;
2543 def int_aarch64_sve_ssubwb : SVE2_2VectorArg_Wide_Intrinsic;
2544 def int_aarch64_sve_ssubwt : SVE2_2VectorArg_Wide_Intrinsic;
2545 def int_aarch64_sve_uabalb : SVE2_3VectorArg_Long_Intrinsic;
2546 def int_aarch64_sve_uabalt : SVE2_3VectorArg_Long_Intrinsic;
2547 def int_aarch64_sve_uabdlb : SVE2_2VectorArg_Long_Intrinsic;
2548 def int_aarch64_sve_uabdlt : SVE2_2VectorArg_Long_Intrinsic;
2549 def int_aarch64_sve_uaddlb : SVE2_2VectorArg_Long_Intrinsic;
2550 def int_aarch64_sve_uaddlt : SVE2_2VectorArg_Long_Intrinsic;
2551 def int_aarch64_sve_uaddwb : SVE2_2VectorArg_Wide_Intrinsic;
2552 def int_aarch64_sve_uaddwt : SVE2_2VectorArg_Wide_Intrinsic;
2553 def int_aarch64_sve_ushllb : SVE2_1VectorArg_Long_Intrinsic;
2554 def int_aarch64_sve_ushllt : SVE2_1VectorArg_Long_Intrinsic;
2555 def int_aarch64_sve_usublb : SVE2_2VectorArg_Long_Intrinsic;
2556 def int_aarch64_sve_usublt : SVE2_2VectorArg_Long_Intrinsic;
2557 def int_aarch64_sve_usubwb : SVE2_2VectorArg_Wide_Intrinsic;
2558 def int_aarch64_sve_usubwt : SVE2_2VectorArg_Wide_Intrinsic;
2559
2560 //
2561 // SVE2 - Non-widening pairwise arithmetic
2562 //
2563
2564 def int_aarch64_sve_addp : AdvSIMD_Pred2VectorArg_Intrinsic;
2565 def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
2566 def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
2567 def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
2568 def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
2569 def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
2570 def int_aarch64_sve_smaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
2571 def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic;
2572 def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
2573 def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic;
2574
2575 //
2576 // SVE2 - Widening pairwise arithmetic
2577 //
2578
2579 def int_aarch64_sve_sadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
2580 def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
2581
2582 //
2583 // SVE2 - Uniform complex integer arithmetic
2584 //
2585
2586 def int_aarch64_sve_cadd_x : AdvSIMD_SVE2_CADD_Intrinsic;
2587 def int_aarch64_sve_sqcadd_x : AdvSIMD_SVE2_CADD_Intrinsic;
2588 def int_aarch64_sve_cmla_x : AdvSIMD_SVE2_CMLA_Intrinsic;
2589 def int_aarch64_sve_cmla_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
2590 def int_aarch64_sve_sqrdcmlah_x : AdvSIMD_SVE2_CMLA_Intrinsic;
2591 def int_aarch64_sve_sqrdcmlah_lane_x : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
2592
2593 //
2594 // SVE2 - Widening complex integer arithmetic
2595 //
2596
2597 def int_aarch64_sve_saddlbt : SVE2_2VectorArg_Long_Intrinsic;
2598 def int_aarch64_sve_ssublbt : SVE2_2VectorArg_Long_Intrinsic;
2599 def int_aarch64_sve_ssubltb : SVE2_2VectorArg_Long_Intrinsic;
2600
2601 //
2602 // SVE2 - Widening complex integer dot product
2603 //
2604
2605 def int_aarch64_sve_cdot : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
2606 def int_aarch64_sve_cdot_lane : AdvSIMD_SVE_CDOT_LANE_Intrinsic;
2607
2608 //
2609 // SVE2 - Floating-point widening multiply-accumulate
2610 //
2611
2612 def int_aarch64_sve_fmlalb : SVE2_3VectorArg_Long_Intrinsic;
2613 def int_aarch64_sve_fmlalb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
2614 def int_aarch64_sve_fmlalt : SVE2_3VectorArg_Long_Intrinsic;
2615 def int_aarch64_sve_fmlalt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
2616 def int_aarch64_sve_fmlslb : SVE2_3VectorArg_Long_Intrinsic;
2617 def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
2618 def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
2619 def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
2620
2621 //
2622 // SVE2 - Floating-point integer binary logarithm
2623 //
2624
2625 def int_aarch64_sve_flogb : AdvSIMD_SVE_LOGB_Intrinsic;
2626
2627 //
2628 // SVE2 - Vector histogram count
2629 //
2630
2631 def int_aarch64_sve_histcnt : AdvSIMD_Pred2VectorArg_Intrinsic;
2632 def int_aarch64_sve_histseg : AdvSIMD_2VectorArg_Intrinsic;
2633
2634 //
2635 // SVE2 - Character match
2636 //
2637
2638 def int_aarch64_sve_match : AdvSIMD_SVE_Compare_Intrinsic;
2639 def int_aarch64_sve_nmatch : AdvSIMD_SVE_Compare_Intrinsic;
2640
2641 //
2642 // SVE2 - Unary narrowing operations
2643 //
2644
2645 def int_aarch64_sve_sqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
2646 def int_aarch64_sve_sqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
2647 def int_aarch64_sve_sqxtunb : SVE2_1VectorArg_Narrowing_Intrinsic;
2648 def int_aarch64_sve_sqxtunt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
2649 def int_aarch64_sve_uqxtnb : SVE2_1VectorArg_Narrowing_Intrinsic;
2650 def int_aarch64_sve_uqxtnt : SVE2_Merged1VectorArg_Narrowing_Intrinsic;
2651
2652 //
2653 // SVE2 - Binary narrowing DSP operations
2654 //
2655 def int_aarch64_sve_addhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
2656 def int_aarch64_sve_addhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
2657
2658 def int_aarch64_sve_raddhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
2659 def int_aarch64_sve_raddhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
2660
2661 def int_aarch64_sve_subhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
2662 def int_aarch64_sve_subhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
2663
2664 def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
2665 def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
2666
2667 // Narrowing shift right
2668 def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2669 def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2670
2671 def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2672 def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2673
2674 // Saturating shift right - signed input/output
2675 def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2676 def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2677
2678 def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2679 def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2680
2681 // Saturating shift right - unsigned input/output
2682 def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2683 def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2684
2685 def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2686 def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2687
2688 // Saturating shift right - signed input, unsigned output
2689 def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2690 def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2691
2692 def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
2693 def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
2694
2695 // SVE2 MLA LANE.
2696 def int_aarch64_sve_smlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2697 def int_aarch64_sve_smlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2698 def int_aarch64_sve_umlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2699 def int_aarch64_sve_umlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2700 def int_aarch64_sve_smlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2701 def int_aarch64_sve_smlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2702 def int_aarch64_sve_umlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2703 def int_aarch64_sve_umlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2704 def int_aarch64_sve_smullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2705 def int_aarch64_sve_smullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2706 def int_aarch64_sve_umullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2707 def int_aarch64_sve_umullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2708 def int_aarch64_sve_sqdmlalb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2709 def int_aarch64_sve_sqdmlalt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2710 def int_aarch64_sve_sqdmlslb_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2711 def int_aarch64_sve_sqdmlslt_lane : SVE2_3VectorArg_Indexed_Intrinsic;
2712 def int_aarch64_sve_sqdmullb_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2713 def int_aarch64_sve_sqdmullt_lane : SVE2_2VectorArgIndexed_Long_Intrinsic;
2714
2715 // SVE2 MLA Unpredicated.
2716 def int_aarch64_sve_smlalb : SVE2_3VectorArg_Long_Intrinsic;
2717 def int_aarch64_sve_smlalt : SVE2_3VectorArg_Long_Intrinsic;
2718 def int_aarch64_sve_umlalb : SVE2_3VectorArg_Long_Intrinsic;
2719 def int_aarch64_sve_umlalt : SVE2_3VectorArg_Long_Intrinsic;
2720 def int_aarch64_sve_smlslb : SVE2_3VectorArg_Long_Intrinsic;
2721 def int_aarch64_sve_smlslt : SVE2_3VectorArg_Long_Intrinsic;
2722 def int_aarch64_sve_umlslb : SVE2_3VectorArg_Long_Intrinsic;
2723 def int_aarch64_sve_umlslt : SVE2_3VectorArg_Long_Intrinsic;
2724 def int_aarch64_sve_smullb : SVE2_2VectorArg_Long_Intrinsic;
2725 def int_aarch64_sve_smullt : SVE2_2VectorArg_Long_Intrinsic;
2726 def int_aarch64_sve_umullb : SVE2_2VectorArg_Long_Intrinsic;
2727 def int_aarch64_sve_umullt : SVE2_2VectorArg_Long_Intrinsic;
2728
2729 def int_aarch64_sve_sqdmlalb : SVE2_3VectorArg_Long_Intrinsic;
2730 def int_aarch64_sve_sqdmlalt : SVE2_3VectorArg_Long_Intrinsic;
2731 def int_aarch64_sve_sqdmlslb : SVE2_3VectorArg_Long_Intrinsic;
2732 def int_aarch64_sve_sqdmlslt : SVE2_3VectorArg_Long_Intrinsic;
2733 def int_aarch64_sve_sqdmullb : SVE2_2VectorArg_Long_Intrinsic;
2734 def int_aarch64_sve_sqdmullt : SVE2_2VectorArg_Long_Intrinsic;
2735 def int_aarch64_sve_sqdmlalbt : SVE2_3VectorArg_Long_Intrinsic;
2736 def int_aarch64_sve_sqdmlslbt : SVE2_3VectorArg_Long_Intrinsic;
2737
2738 // SVE2 ADDSUB Long Unpredicated.
2739 def int_aarch64_sve_adclb : AdvSIMD_3VectorArg_Intrinsic;
2740 def int_aarch64_sve_adclt : AdvSIMD_3VectorArg_Intrinsic;
2741 def int_aarch64_sve_sbclb : AdvSIMD_3VectorArg_Intrinsic;
2742 def int_aarch64_sve_sbclt : AdvSIMD_3VectorArg_Intrinsic;
2743
2744 //
2745 // SVE2 - Polynomial arithmetic
2746 //
2747 def int_aarch64_sve_eorbt : AdvSIMD_3VectorArg_Intrinsic;
2748 def int_aarch64_sve_eortb : AdvSIMD_3VectorArg_Intrinsic;
2749 def int_aarch64_sve_pmullb_pair : AdvSIMD_2VectorArg_Intrinsic;
2750 def int_aarch64_sve_pmullt_pair : AdvSIMD_2VectorArg_Intrinsic;
2751
2752 //
2753 // SVE2 bitwise ternary operations.
2754 //
2755 def int_aarch64_sve_eor3 : AdvSIMD_3VectorArg_Intrinsic;
2756 def int_aarch64_sve_bcax : AdvSIMD_3VectorArg_Intrinsic;
2757 def int_aarch64_sve_bsl : AdvSIMD_3VectorArg_Intrinsic;
2758 def int_aarch64_sve_bsl1n : AdvSIMD_3VectorArg_Intrinsic;
2759 def int_aarch64_sve_bsl2n : AdvSIMD_3VectorArg_Intrinsic;
2760 def int_aarch64_sve_nbsl : AdvSIMD_3VectorArg_Intrinsic;
2761 def int_aarch64_sve_xar : AdvSIMD_2VectorArgIndexed_Intrinsic;
2762
2763 //
2764 // SVE2 - Optional AES, SHA-3 and SM4
2765 //
2766
2767 def int_aarch64_sve_aesd : ClangBuiltin<"__builtin_sve_svaesd_u8">,
2768 DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
2769 [llvm_nxv16i8_ty, llvm_nxv16i8_ty],
2770 [IntrNoMem]>;
2771 def int_aarch64_sve_aesimc : ClangBuiltin<"__builtin_sve_svaesimc_u8">,
2772 DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
2773 [llvm_nxv16i8_ty],
2774 [IntrNoMem]>;
2775 def int_aarch64_sve_aese : ClangBuiltin<"__builtin_sve_svaese_u8">,
2776 DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
2777 [llvm_nxv16i8_ty, llvm_nxv16i8_ty],
2778 [IntrNoMem]>;
2779 def int_aarch64_sve_aesmc : ClangBuiltin<"__builtin_sve_svaesmc_u8">,
2780 DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
2781 [llvm_nxv16i8_ty],
2782 [IntrNoMem]>;
2783 def int_aarch64_sve_rax1 : ClangBuiltin<"__builtin_sve_svrax1_u64">,
2784 DefaultAttrsIntrinsic<[llvm_nxv2i64_ty],
2785 [llvm_nxv2i64_ty, llvm_nxv2i64_ty],
2786 [IntrNoMem]>;
2787 def int_aarch64_sve_sm4e : ClangBuiltin<"__builtin_sve_svsm4e_u32">,
2788 DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
2789 [llvm_nxv4i32_ty, llvm_nxv4i32_ty],
2790 [IntrNoMem]>;
2791 def int_aarch64_sve_sm4ekey : ClangBuiltin<"__builtin_sve_svsm4ekey_u32">,
2792 DefaultAttrsIntrinsic<[llvm_nxv4i32_ty],
2793 [llvm_nxv4i32_ty, llvm_nxv4i32_ty],
2794 [IntrNoMem]>;
2795 //
2796 // SVE2 - Extended table lookup/permute
2797 //
2798
2799 def int_aarch64_sve_tbl2 : AdvSIMD_SVE2_TBX_Intrinsic;
2800 def int_aarch64_sve_tbx : AdvSIMD_SVE2_TBX_Intrinsic;
2801
2802 //
2803 // SVE2 - Lookup Table
2804 //
2805
2806 def int_aarch64_sve_luti2_lane : SVE2_LUTI_Inrinsic;
2807 def int_aarch64_sve_luti4_lane : SVE2_LUTI_Inrinsic;
2808 def int_aarch64_sve_luti4_lane_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
2809 [LLVMMatchType<0>,
2810 LLVMMatchType<0>,
2811 llvm_nxv16i8_ty,
2812 llvm_i32_ty],
2813 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
2814
2815 //
2816 // SVE2 - Optional bit permutation
2817 //
2818
2819 def int_aarch64_sve_bdep_x : AdvSIMD_2VectorArg_Intrinsic;
2820 def int_aarch64_sve_bext_x : AdvSIMD_2VectorArg_Intrinsic;
2821 def int_aarch64_sve_bgrp_x : AdvSIMD_2VectorArg_Intrinsic;
2822
2823
2824 //
2825 // SVE ACLE: 7.3. INT8 matrix multiply extensions
2826 //
2827 def int_aarch64_sve_ummla : SVE_MatMul_Intrinsic;
2828 def int_aarch64_sve_smmla : SVE_MatMul_Intrinsic;
2829 def int_aarch64_sve_usmmla : SVE_MatMul_Intrinsic;
2830
2831 def int_aarch64_sve_usdot : AdvSIMD_SVE_DOT_Intrinsic;
2832 def int_aarch64_sve_usdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
2833 def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
2834
2835 //
2836 // SVE ACLE: 7.4/5. FP64/FP32 matrix multiply extensions
2837 //
2838 def int_aarch64_sve_fmmla : AdvSIMD_3VectorArg_Intrinsic;
2839
2840 //
2841 // SVE ACLE: 7.2. BFloat16 extensions
2842 //
2843
2844 def int_aarch64_sve_bfdot : SVE_4Vec_BF16;
2845 def int_aarch64_sve_bfmlalb : SVE_4Vec_BF16;
2846 def int_aarch64_sve_bfmlalt : SVE_4Vec_BF16;
2847
2848 def int_aarch64_sve_bfmmla : SVE_4Vec_BF16;
2849
2850 def int_aarch64_sve_bfdot_lane_v2 : SVE_4Vec_BF16_Indexed;
2851 def int_aarch64_sve_bfmlalb_lane_v2 : SVE_4Vec_BF16_Indexed;
2852 def int_aarch64_sve_bfmlalt_lane_v2 : SVE_4Vec_BF16_Indexed;
2853
2854 //
2855 // SVE2.1 - Contiguous loads to multiple consecutive vectors
2856 //
2857
2858 class SVE2p1_Load_PN_X2_Intrinsic
2859 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2860 [llvm_aarch64_svcount_ty, llvm_ptr_ty],
2861 [IntrReadMem, IntrArgMemOnly]>;
2862
2863 class SVE2p1_Load_PN_X4_Intrinsic
2864 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
2865 LLVMMatchType<0>, LLVMMatchType<0>],
2866 [llvm_aarch64_svcount_ty, llvm_ptr_ty],
2867 [IntrReadMem, IntrArgMemOnly]>;
2868
2869 def int_aarch64_sve_ld1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic;
2870 def int_aarch64_sve_ld1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic;
2871 def int_aarch64_sve_ldnt1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic;
2872 def int_aarch64_sve_ldnt1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic;
2873
2874 //
2875 // SVE2.1 - Contiguous loads to quadword (single vector)
2876 //
2877
2878 class SVE2p1_Single_Load_Quadword
2879 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
2880 [llvm_nxv1i1_ty, llvm_ptr_ty],
2881 [IntrReadMem, IntrArgMemOnly]>;
2882 def int_aarch64_sve_ld1uwq : SVE2p1_Single_Load_Quadword;
2883 def int_aarch64_sve_ld1udq : SVE2p1_Single_Load_Quadword;
2884
2885 //
2886 // SVE2.1 - Contiguous store from quadword (single vector)
2887 //
2888
2889 class SVE2p1_Single_Store_Quadword
2890 : DefaultAttrsIntrinsic<[],
2891 [llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty],
2892 [IntrWriteMem, IntrArgMemOnly]>;
2893 def int_aarch64_sve_st1wq : SVE2p1_Single_Store_Quadword;
2894 def int_aarch64_sve_st1dq : SVE2p1_Single_Store_Quadword;
2895
2896
2897 def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
2898 def int_aarch64_sve_ld3q_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
2899 def int_aarch64_sve_ld4q_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
2900
2901 def int_aarch64_sve_st2q : AdvSIMD_2Vec_PredStore_Intrinsic;
2902 def int_aarch64_sve_st3q : AdvSIMD_3Vec_PredStore_Intrinsic;
2903 def int_aarch64_sve_st4q : AdvSIMD_4Vec_PredStore_Intrinsic;
2904
2905 //
2906 // SVE2.1 - Contiguous stores to multiple consecutive vectors
2907 //
2908
2909 class SVE2p1_Store_PN_X2_Intrinsic
2910 : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>,
2911 llvm_aarch64_svcount_ty, llvm_ptr_ty ],
2912 [IntrWriteMem, IntrArgMemOnly]>;
2913
2914 class SVE2p1_Store_PN_X4_Intrinsic
2915 : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>,
2916 LLVMMatchType<0>, LLVMMatchType<0>,
2917 llvm_aarch64_svcount_ty, llvm_ptr_ty],
2918 [IntrWriteMem, IntrArgMemOnly]>;
2919
2920 def int_aarch64_sve_st1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic;
2921 def int_aarch64_sve_st1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic;
2922 def int_aarch64_sve_stnt1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic;
2923 def int_aarch64_sve_stnt1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic;
2924 }
2925
2926 //
2927 // SVE2 - Contiguous conflict detection
2928 //
2929
2930 def int_aarch64_sve_whilerw_b : SVE2_CONFLICT_DETECT_Intrinsic;
2931 def int_aarch64_sve_whilerw_h : SVE2_CONFLICT_DETECT_Intrinsic;
2932 def int_aarch64_sve_whilerw_s : SVE2_CONFLICT_DETECT_Intrinsic;
2933 def int_aarch64_sve_whilerw_d : SVE2_CONFLICT_DETECT_Intrinsic;
2934 def int_aarch64_sve_whilewr_b : SVE2_CONFLICT_DETECT_Intrinsic;
2935 def int_aarch64_sve_whilewr_h : SVE2_CONFLICT_DETECT_Intrinsic;
2936 def int_aarch64_sve_whilewr_s : SVE2_CONFLICT_DETECT_Intrinsic;
2937 def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic;
2938
2939 // Scalable Matrix Extension (SME) Intrinsics
2940 let TargetPrefix = "aarch64" in {
2941 class SME_Load_Store_Intrinsic<LLVMType pred_ty>
2942 : DefaultAttrsIntrinsic<[],
2943 [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
2944
2945 // Loads
2946 def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
2947 def int_aarch64_sme_ld1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>;
2948 def int_aarch64_sme_ld1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>;
2949 def int_aarch64_sme_ld1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>;
2950 def int_aarch64_sme_ld1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>;
2951 def int_aarch64_sme_ld1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
2952 def int_aarch64_sme_ld1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>;
2953 def int_aarch64_sme_ld1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>;
2954 def int_aarch64_sme_ld1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>;
2955 def int_aarch64_sme_ld1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>;
2956
2957 // Stores
2958 def int_aarch64_sme_st1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
2959 def int_aarch64_sme_st1h_horiz : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>;
2960 def int_aarch64_sme_st1w_horiz : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>;
2961 def int_aarch64_sme_st1d_horiz : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>;
2962 def int_aarch64_sme_st1q_horiz : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>;
2963 def int_aarch64_sme_st1b_vert : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
2964 def int_aarch64_sme_st1h_vert : SME_Load_Store_Intrinsic<llvm_nxv8i1_ty>;
2965 def int_aarch64_sme_st1w_vert : SME_Load_Store_Intrinsic<llvm_nxv4i1_ty>;
2966 def int_aarch64_sme_st1d_vert : SME_Load_Store_Intrinsic<llvm_nxv2i1_ty>;
2967 def int_aarch64_sme_st1q_vert : SME_Load_Store_Intrinsic<llvm_nxv1i1_ty>;
2968
2969 // Spill + fill
2970 class SME_LDR_STR_ZA_Intrinsic
2971 : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty]>;
2972 def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic;
2973 def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic;
2974
2975 class SME_TileToVector_Intrinsic
2976 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
2977 [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
2978 llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<2>>]>;
2979 class SME_VectorToTile_Intrinsic
2980 : DefaultAttrsIntrinsic<[],
2981 [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
2982 llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
2983
2984 def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic;
2985 def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic;
2986 def int_aarch64_sme_write_horiz : SME_VectorToTile_Intrinsic;
2987 def int_aarch64_sme_write_vert : SME_VectorToTile_Intrinsic;
2988
2989 def int_aarch64_sme_readq_horiz : SME_TileToVector_Intrinsic;
2990 def int_aarch64_sme_readq_vert : SME_TileToVector_Intrinsic;
2991 def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
2992 def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;
2993
2994 class SME_MOVAZ_TileToVector_X2_Intrinsic
2995 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
2996 [llvm_i32_ty, llvm_i32_ty],
2997 [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
2998
2999 class SME_MOVAZ_TileToVector_X4_Intrinsic
3000 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3001 LLVMMatchType<0>,LLVMMatchType<0>],
3002 [llvm_i32_ty, llvm_i32_ty],
3003 [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
3004
3005 def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
3006 def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
3007
3008 def int_aarch64_sme_readz_horiz_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
3009 def int_aarch64_sme_readz_vert_x4 : SME_MOVAZ_TileToVector_X4_Intrinsic;
3010
3011 class SME_MOVAZ_TileToVector_Intrinsic
3012 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3013 [llvm_i32_ty, llvm_i32_ty],
3014 [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
3015
3016 def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic;
3017 def int_aarch64_sme_readz_vert : SME_MOVAZ_TileToVector_Intrinsic;
3018
3019 def int_aarch64_sme_readz_q_horiz : SME_MOVAZ_TileToVector_Intrinsic;
3020 def int_aarch64_sme_readz_q_vert : SME_MOVAZ_TileToVector_Intrinsic;
3021
3022 def int_aarch64_sme_readz_x2
3023 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3024 [llvm_i32_ty],
3025 [IntrNoMem, IntrHasSideEffects]>;
3026
3027 def int_aarch64_sme_readz_x4
3028 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3029 [llvm_i32_ty],
3030 [IntrNoMem, IntrHasSideEffects]>;
3031
3032 def int_aarch64_sme_write_lane_zt
3033 : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
3034 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrNoMem, IntrHasSideEffects]>;
3035
3036 def int_aarch64_sme_write_zt
3037 : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
3038 [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3039
3040
3041 def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
3042 def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
3043
3044 class SME_OuterProduct_Intrinsic
3045 : DefaultAttrsIntrinsic<[],
3046 [llvm_i32_ty,
3047 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
3048 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
3049 LLVMMatchType<0>,
3050 llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
3051
3052 def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
3053 def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
3054
3055 def int_aarch64_sme_mopa_wide : SME_OuterProduct_Intrinsic;
3056 def int_aarch64_sme_mops_wide : SME_OuterProduct_Intrinsic;
3057
3058 def int_aarch64_sme_smopa_wide : SME_OuterProduct_Intrinsic;
3059 def int_aarch64_sme_smops_wide : SME_OuterProduct_Intrinsic;
3060 def int_aarch64_sme_umopa_wide : SME_OuterProduct_Intrinsic;
3061 def int_aarch64_sme_umops_wide : SME_OuterProduct_Intrinsic;
3062 def int_aarch64_sme_sumopa_wide : SME_OuterProduct_Intrinsic;
3063 def int_aarch64_sme_sumops_wide : SME_OuterProduct_Intrinsic;
3064 def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic;
3065 def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
3066
3067 class SME_AddVectorToTile_Intrinsic
3068 : DefaultAttrsIntrinsic<[],
3069 [llvm_i32_ty,
3070 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
3071 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
3072 llvm_anyvector_ty], [ImmArg<ArgIndex<0>>]>;
3073
3074 def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
3075 def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
3076
3077 //
3078 // Counting elements
3079 //
3080
3081 class AdvSIMD_SME_CNTSB_Intrinsic
3082 : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
3083
3084 def int_aarch64_sme_cntsb : AdvSIMD_SME_CNTSB_Intrinsic;
3085 def int_aarch64_sme_cntsh : AdvSIMD_SME_CNTSB_Intrinsic;
3086 def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic;
3087 def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic;
3088
3089 //
3090 // PSTATE Functions
3091 //
3092
3093 def int_aarch64_sme_get_tpidr2
3094 : DefaultAttrsIntrinsic<[llvm_i64_ty], [],
3095 [IntrNoMem, IntrHasSideEffects]>;
3096 def int_aarch64_sme_set_tpidr2
3097 : DefaultAttrsIntrinsic<[], [llvm_i64_ty],
3098 [IntrNoMem, IntrHasSideEffects]>;
3099
3100 def int_aarch64_sme_za_enable
3101 : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
3102 def int_aarch64_sme_za_disable
3103 : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
3104
3105 // Clamp
3106 //
3107
3108 def int_aarch64_sve_sclamp : AdvSIMD_3VectorArg_Intrinsic;
3109 def int_aarch64_sve_uclamp : AdvSIMD_3VectorArg_Intrinsic;
3110 def int_aarch64_sve_fclamp : AdvSIMD_3VectorArg_Intrinsic;
3111
3112
3113 //
3114 // Reversal
3115 //
3116
3117 def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic;
3118
3119 //
3120 // Predicate selection
3121 //
3122
3123 def int_aarch64_sve_psel
3124 : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
3125 [llvm_nxv16i1_ty,
3126 llvm_anyvector_ty, llvm_i32_ty],
3127 [IntrNoMem]>;
3128
3129 //
3130 // Predicate-pair intrinsics
3131 //
3132 foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {
3133 def int_aarch64_sve_while # cmp # _x2
3134 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3135 [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
3136 }
3137
3138 //
3139 // Predicate-as-counter intrinsics
3140 //
3141
3142 def int_aarch64_sve_pext
3143 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3144 [llvm_aarch64_svcount_ty, llvm_i32_ty],
3145 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3146
3147 def int_aarch64_sve_pext_x2
3148 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3149 [llvm_aarch64_svcount_ty, llvm_i32_ty],
3150 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3151
3152 def int_aarch64_sve_ptrue_c8
3153 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
3154 def int_aarch64_sve_ptrue_c16
3155 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
3156 def int_aarch64_sve_ptrue_c32
3157 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
3158 def int_aarch64_sve_ptrue_c64
3159 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
3160
3161 def int_aarch64_sve_cntp_c8
3162 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
3163 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3164 def int_aarch64_sve_cntp_c16
3165 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
3166 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3167 def int_aarch64_sve_cntp_c32
3168 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
3169 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3170 def int_aarch64_sve_cntp_c64
3171 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
3172 [IntrNoMem, ImmArg<ArgIndex<1>>]>;
3173
3174 // While (predicate-as-counter) intrinsics
3175 foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {
3176 foreach ty = ["c8", "c16", "c32", "c64"] in {
3177 def int_aarch64_sve_while # cmp # _ # ty
3178 : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty],
3179 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
3180 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
3181 }
3182 }
3183
3184 //
3185 // SME2 Intrinsics
3186 //
3187
3188 class SME2_Matrix_ArrayVector_Single_Single_Intrinsic
3189 : DefaultAttrsIntrinsic<[],
3190 [llvm_i32_ty,
3191 llvm_anyvector_ty, LLVMMatchType<0>],
3192 []>;
3193
3194 class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic
3195 : DefaultAttrsIntrinsic<[],
3196 [llvm_i32_ty,
3197 llvm_anyvector_ty, LLVMMatchType<0>,
3198 LLVMMatchType<0>],
3199 []>;
3200
3201 class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic
3202 : DefaultAttrsIntrinsic<[],
3203 [llvm_i32_ty,
3204 llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3205 LLVMMatchType<0>],
3206 []>;
3207
3208 class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic
3209 : DefaultAttrsIntrinsic<[],
3210 [llvm_i32_ty,
3211 llvm_anyvector_ty, LLVMMatchType<0>,
3212 LLVMMatchType<0>, LLVMMatchType<0>],
3213 []>;
3214
3215 class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic
3216 : DefaultAttrsIntrinsic<[],
3217 [llvm_i32_ty,
3218 llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3219 LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3220 []>;
3221
3222 class SME2_Matrix_ArrayVector_Single_Index_Intrinsic
3223 : DefaultAttrsIntrinsic<[],
3224 [llvm_i32_ty,
3225 llvm_anyvector_ty,
3226 LLVMMatchType<0>, llvm_i32_ty],
3227 [ImmArg<ArgIndex<3>>]>;
3228
3229 class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic
3230 : DefaultAttrsIntrinsic<[],
3231 [llvm_i32_ty,
3232 llvm_anyvector_ty, LLVMMatchType<0>,
3233 LLVMMatchType<0>, llvm_i32_ty],
3234 [ImmArg<ArgIndex<4>>]>;
3235
3236 class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic
3237 : DefaultAttrsIntrinsic<[],
3238 [llvm_i32_ty,
3239 llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3240 LLVMMatchType<0>, llvm_i32_ty],
3241 [ImmArg<ArgIndex<6>>]>;
3242
3243 class SME2_VG2_Multi_Imm_Intrinsic
3244 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
3245 [llvm_anyvector_ty, LLVMMatchType<0>,
3246 llvm_i32_ty],
3247 [IntrNoMem, ImmArg<ArgIndex<2>>]>;
3248
3249 class SME2_VG4_Multi_Imm_Intrinsic
3250 : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
3251 [llvm_anyvector_ty, LLVMMatchType<0>,
3252 LLVMMatchType<0>, LLVMMatchType<0>,
3253 llvm_i32_ty],
3254 [IntrNoMem, ImmArg<ArgIndex<4>>]>;
3255
3256 class SME2_ZA_Write_VG2_Intrinsic
3257 : DefaultAttrsIntrinsic<[],
3258 [llvm_i32_ty,
3259 llvm_anyvector_ty, LLVMMatchType<0>],
3260 []>;
3261
3262 class SME2_ZA_Write_VG4_Intrinsic
3263 : DefaultAttrsIntrinsic<[],
3264 [llvm_i32_ty,
3265 llvm_anyvector_ty, LLVMMatchType<0>,
3266 LLVMMatchType<0>, LLVMMatchType<0>],
3267 []>;
3268
3269 class SME2_VG2_Multi_Single_Intrinsic
3270 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3271 [LLVMMatchType<0>, LLVMMatchType<0>,
3272 LLVMMatchType<0>],
3273 [IntrNoMem]>;
3274
3275 class SME2_VG4_Multi_Single_Intrinsic
3276 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3277 LLVMMatchType<0>, LLVMMatchType<0>],
3278 [LLVMMatchType<0>, LLVMMatchType<0>,
3279 LLVMMatchType<0>, LLVMMatchType<0>,
3280 LLVMMatchType<0>],
3281 [IntrNoMem]>;
3282
3283 class SME2_VG2_Multi_Multi_Intrinsic
3284 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3285 [LLVMMatchType<0>, LLVMMatchType<0>,
3286 LLVMMatchType<0>, LLVMMatchType<0>],
3287 [IntrNoMem]>;
3288
3289 class SME2_VG4_Multi_Multi_Intrinsic
3290 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3291 LLVMMatchType<0>, LLVMMatchType<0>],
3292 [LLVMMatchType<0>, LLVMMatchType<0>,
3293 LLVMMatchType<0>, LLVMMatchType<0>,
3294 LLVMMatchType<0>, LLVMMatchType<0>,
3295 LLVMMatchType<0>, LLVMMatchType<0>],
3296 [IntrNoMem]>;
3297
3298 class SVE2_VG2_Sel_Intrinsic
3299 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3300 [llvm_aarch64_svcount_ty, LLVMMatchType<0>,
3301 LLVMMatchType<0>, LLVMMatchType<0>,
3302 LLVMMatchType<0>], [IntrNoMem]>;
3303
3304 class SVE2_VG4_Sel_Intrinsic
3305 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3306 LLVMMatchType<0>, LLVMMatchType<0>],
3307 [llvm_aarch64_svcount_ty, LLVMMatchType<0>,
3308 LLVMMatchType<0>, LLVMMatchType<0>,
3309 LLVMMatchType<0>, LLVMMatchType<0>,
3310 LLVMMatchType<0>, LLVMMatchType<0>,
3311 LLVMMatchType<0>], [IntrNoMem]>;
3312
3313 class SME2_CVT_VG2_SINGLE_Intrinsic
3314 : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
3315 [llvm_anyvector_ty, LLVMMatchType<0>],
3316 [IntrNoMem]>;
3317
3318 class SME2_CVT_VG2_SINGLE_BF16_Intrinsic
3319 : DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty],
3320 [llvm_nxv4f32_ty, llvm_nxv4f32_ty],
3321 [IntrNoMem]>;
3322
3323 class SME2_CVT_WIDENING_VG2_Intrinsic
3324 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3325 [LLVMSubdivide2VectorType<0>], [IntrNoMem]>;
3326
3327
3328 class SME2_CVT_VG4_SINGLE_Intrinsic
3329 : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
3330 [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3331 [IntrNoMem]>;
3332
3333 class SME2_CVT_X2_Intrinsic
3334 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3335 [llvm_anyvector_ty, LLVMMatchType<1>],
3336 [IntrNoMem]>;
3337
3338 class SME2_CVT_X4_Intrinsic
3339 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3340 [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>],
3341 [IntrNoMem]>;
3342
3343 class SME2_BFMLS_Intrinsic
3344 : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
3345 [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
3346 [IntrNoMem]>;
3347
3348 class SME2_BFMLS_Lane_Intrinsic
3349 : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
3350 [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
3351 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
3352
3353 class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
3354 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3355 [llvm_i32_ty],
3356 []>;
3357
3358 class SME2_ZA_ArrayVector_Read_VG4_Intrinsic
3359 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3360 LLVMMatchType<0>, LLVMMatchType<0>],
3361 [llvm_i32_ty],
3362 []>;
3363
3364 class SME2_Matrix_TileVector_Read_VG2_Intrinsic
3365 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3366 [llvm_i32_ty, llvm_i32_ty],
3367 []>;
3368
3369 class SME2_Matrix_TileVector_Read_VG4_Intrinsic
3370 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3371 LLVMMatchType<0>, LLVMMatchType<0>],
3372 [llvm_i32_ty, llvm_i32_ty],
3373 []>;
3374
3375 class SME2_ZA_ArrayVector_Write_VG2_Intrinsic
3376 : DefaultAttrsIntrinsic<[],
3377 [llvm_i32_ty,
3378 llvm_anyvector_ty, LLVMMatchType<0>],
3379 []>;
3380
3381 class SME2_ZA_ArrayVector_Write_VG4_Intrinsic
3382 : DefaultAttrsIntrinsic<[],
3383 [llvm_i32_ty,
3384 llvm_anyvector_ty, LLVMMatchType<0>,
3385 LLVMMatchType<0>, LLVMMatchType<0>],
3386 []>;
3387
3388 class SME2_Matrix_TileVector_Write_VG2_Intrinsic
3389 : DefaultAttrsIntrinsic<[],
3390 [llvm_i32_ty, llvm_i32_ty,
3391 llvm_anyvector_ty, LLVMMatchType<0>],
3392 [ImmArg<ArgIndex<0>>]>;
3393
3394 class SME2_Matrix_TileVector_Write_VG4_Intrinsic
3395 : DefaultAttrsIntrinsic<[],
3396 [llvm_i32_ty, llvm_i32_ty,
3397 llvm_anyvector_ty, LLVMMatchType<0>,
3398 LLVMMatchType<0>, LLVMMatchType<0>],
3399 [ImmArg<ArgIndex<0>>]>;
3400
3401 class SME2_VG2_Multi_Single_Single_Intrinsic
3402 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3403 [LLVMMatchType<0>, LLVMMatchType<0>,
3404 LLVMMatchType<0>, LLVMMatchType<0>],
3405 [IntrNoMem]>;
3406
3407 class SME2_VG4_Multi_Single_Single_Intrinsic
3408 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3409 LLVMMatchType<0>, LLVMMatchType<0>],
3410 [LLVMMatchType<0>, LLVMMatchType<0>,
3411 LLVMMatchType<0>, LLVMMatchType<0>,
3412 LLVMMatchType<0>, LLVMMatchType<0>],
3413 [IntrNoMem]>;
3414
3415 class SVE2_VG2_ZipUzp_Intrinsic
3416 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3417 [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
3418
3419 class SVE2_VG4_ZipUzp_Intrinsic
3420 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3421 LLVMMatchType<0>, LLVMMatchType<0>],
3422 [LLVMMatchType<0>, LLVMMatchType<0>,
3423 LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
3424
3425 class SME2_VG2_Unpk_Intrinsic
3426 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3427 [LLVMSubdivide2VectorType<0>], [IntrNoMem]>;
3428
3429 class SME2_VG4_Unpk_Intrinsic
3430 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
3431 LLVMMatchType<0>, LLVMMatchType<0>],
3432 [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
3433 [IntrNoMem]>;
3434
3435 //
3436 // Multi-vector fused multiply-add/subtract
3437 //
3438
3439 def int_aarch64_sme_fmla_single_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3440 def int_aarch64_sme_fmls_single_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3441 def int_aarch64_sme_fmla_single_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3442 def int_aarch64_sme_fmls_single_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3443
3444 def int_aarch64_sme_fmla_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3445 def int_aarch64_sme_fmls_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3446 def int_aarch64_sme_fmla_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3447 def int_aarch64_sme_fmls_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3448
3449 def int_aarch64_sme_fmla_lane_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3450 def int_aarch64_sme_fmls_lane_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3451 def int_aarch64_sme_fmla_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3452 def int_aarch64_sme_fmls_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3453
3454 //
3455 // Outer product and accumulate/subtract intrinsics
3456 //
3457
3458 def int_aarch64_sme_smopa_za32 : SME_OuterProduct_Intrinsic;
3459 def int_aarch64_sme_umopa_za32 : SME_OuterProduct_Intrinsic;
3460 def int_aarch64_sme_smops_za32 : SME_OuterProduct_Intrinsic;
3461 def int_aarch64_sme_umops_za32 : SME_OuterProduct_Intrinsic;
3462
3463 def int_aarch64_sme_bmopa_za32 : SME_OuterProduct_Intrinsic;
3464 def int_aarch64_sme_bmops_za32 : SME_OuterProduct_Intrinsic;
3465
3466 //
3467 // Multi-vector rounding shift left intrinsics
3468 //
3469
3470 def int_aarch64_sve_srshl_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
3471 def int_aarch64_sve_urshl_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
3472 def int_aarch64_sve_srshl_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
3473 def int_aarch64_sve_urshl_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
3474
3475 def int_aarch64_sve_srshl_x2 : SME2_VG2_Multi_Multi_Intrinsic;
3476 def int_aarch64_sve_urshl_x2 : SME2_VG2_Multi_Multi_Intrinsic;
3477 def int_aarch64_sve_srshl_x4 : SME2_VG4_Multi_Multi_Intrinsic;
3478 def int_aarch64_sve_urshl_x4 : SME2_VG4_Multi_Multi_Intrinsic;
3479
3480 // Multi-vector saturating rounding shift right intrinsics
3481
3482 def int_aarch64_sve_sqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3483 def int_aarch64_sve_uqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3484 def int_aarch64_sve_sqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3485 def int_aarch64_sve_uqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3486
3487 def int_aarch64_sve_sqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3488 def int_aarch64_sve_uqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3489 def int_aarch64_sve_sqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3490 def int_aarch64_sve_uqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3491
3492 def int_aarch64_sve_sqrshru_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3493 def int_aarch64_sve_sqrshru_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3494
3495 def int_aarch64_sve_sqrshrun_x2 : SME2_VG2_Multi_Imm_Intrinsic;
3496 def int_aarch64_sve_sqrshrun_x4 : SME2_VG4_Multi_Imm_Intrinsic;
3497
3498 //
3499 // Multi-vector multiply-add/subtract long
3500 //
3501
3502 foreach ty = ["f", "s", "u"] in {
3503 foreach instr = ["mlal", "mlsl"] in {
3504 def int_aarch64_sme_ # ty # instr # _single_vg2x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic;
3505 def int_aarch64_sme_ # ty # instr # _single_vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3506 def int_aarch64_sme_ # ty # instr # _single_vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3507
3508 def int_aarch64_sme_ # ty # instr # _vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3509 def int_aarch64_sme_ # ty # instr # _vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3510
3511 def int_aarch64_sme_ # ty # instr # _lane_vg2x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
3512 def int_aarch64_sme_ # ty # instr # _lane_vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3513 def int_aarch64_sme_ # ty # instr # _lane_vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3514 }
3515 }
3516
3517 //
3518 // Multi-vector multiply-add long long
3519 //
3520
3521 foreach ty = ["s", "u"] in {
3522 foreach instr = ["mla", "mls"] in {
3523 foreach za = ["za32", "za64"] in {
3524 def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic;
3525 def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3526 def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3527
3528 def int_aarch64_sme_ # ty # instr # _ # za # _vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3529 def int_aarch64_sme_ # ty # instr # _ # za # _vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3530
3531 def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
3532 def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3533 def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3534 }
3535 }
3536 }
3537
3538 def int_aarch64_sme_sumla_za32_single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3539 def int_aarch64_sme_sumla_za32_single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3540
3541 def int_aarch64_sme_sumla_za32_lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
3542 def int_aarch64_sme_sumla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3543 def int_aarch64_sme_sumla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3544
3545 def int_aarch64_sme_usmla_za32_single_vg4x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic;
3546 def int_aarch64_sme_usmla_za32_single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3547 def int_aarch64_sme_usmla_za32_single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3548
3549 def int_aarch64_sme_usmla_za32_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3550 def int_aarch64_sme_usmla_za32_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3551
3552 def int_aarch64_sme_usmla_za32_lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
3553 def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3554 def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3555
3556 def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic;
3557 def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic;
3558
3559 def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
3560 def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
3561
3562 // Multi-vector zeroing
3563
3564 foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
3565 def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects]>;
3566 }
3567
3568 // Multi-vector signed saturating doubling multiply high
3569
3570 def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;
3571 def int_aarch64_sve_sqdmulh_single_vgx4 : SME2_VG4_Multi_Single_Intrinsic;
3572
3573 def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic;
3574 def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic;
3575
3576 // Multi-vector floating-point round to integral value
3577
3578 foreach inst = ["a", "m", "n", "p"] in {
3579 def int_aarch64_sve_frint # inst # _x2 : SVE2_VG2_ZipUzp_Intrinsic;
3580 def int_aarch64_sve_frint # inst # _x4 : SVE2_VG4_ZipUzp_Intrinsic;
3581 }
3582
3583 //
3584 // Multi-vector min/max
3585 //
3586
3587 foreach ty = ["f", "s", "u"] in {
3588 foreach instr = ["max", "min"] in {
3589 def int_aarch64_sve_ # ty # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic;
3590 def int_aarch64_sve_ # ty # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic;
3591
3592 def int_aarch64_sve_ # ty # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic;
3593 def int_aarch64_sve_ # ty # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic;
3594 }
3595 }
3596
3597 //
3598 // Multi-vector floating point min/max number
3599 //
3600
3601 foreach instr = ["fmaxnm", "fminnm"] in {
3602 def int_aarch64_sve_ # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic;
3603 def int_aarch64_sve_ # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic;
3604
3605 def int_aarch64_sve_ # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic;
3606 def int_aarch64_sve_ # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic;
3607 }
3608
3609 //
3610 // Multi-vector floating point absolute min/max number
3611 //
3612
3613 foreach instr = ["famax", "famin"] in {
3614 def int_aarch64_sme_ # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic;
3615 def int_aarch64_sme_ # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic;
3616 }
3617
3618 //
3619 // Multi-vector vertical dot-products
3620 //
3621
3622 def int_aarch64_sme_fvdot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3623
3624 foreach ty = ["s", "u"] in {
3625 def int_aarch64_sme_ #ty # vdot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3626 def int_aarch64_sme_ #ty # vdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3627 def int_aarch64_sme_ #ty # vdot_lane_za64_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3628 }
3629
3630 def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3631 def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3632
3633
3634 //
3635 //Multi-vector floating-point convert from half-precision to deinterleaved single-precision.
3636 //
3637
3638 def int_aarch64_sve_fcvtl_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;
3639
3640 //
3641 // Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16
3642 //
3643 def int_aarch64_sve_fcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3644 def int_aarch64_sve_bfcvtn_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic;
3645
3646 //
3647 // Multi-vector convert to/from floating-point.
3648 //
3649 def int_aarch64_sve_fcvt_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3650 def int_aarch64_sve_bfcvt_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic;
3651 def int_aarch64_sve_fcvtzs_x2 : SME2_CVT_X2_Intrinsic;
3652 def int_aarch64_sve_fcvtzu_x2 : SME2_CVT_X2_Intrinsic;
3653 def int_aarch64_sve_scvtf_x2 : SME2_CVT_X2_Intrinsic;
3654 def int_aarch64_sve_ucvtf_x2 : SME2_CVT_X2_Intrinsic;
3655 def int_aarch64_sve_fcvtzs_x4 : SME2_CVT_X4_Intrinsic;
3656 def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic;
3657 def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic;
3658 def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic;
3659 def int_aarch64_sve_fcvt_widen_x2 : SME2_CVT_WIDENING_VG2_Intrinsic;
3660 //
3661 // Multi-vector saturating extract narrow
3662 //
3663 def int_aarch64_sve_sqcvt_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3664 def int_aarch64_sve_uqcvt_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3665 def int_aarch64_sve_sqcvtu_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3666 def int_aarch64_sve_sqcvt_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3667 def int_aarch64_sve_uqcvt_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3668 def int_aarch64_sve_sqcvtu_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3669
3670 //
3671 // Multi-vector saturating extract narrow and interleave
3672 //
3673 def int_aarch64_sve_sqcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3674 def int_aarch64_sve_uqcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3675 def int_aarch64_sve_sqcvtun_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
3676 def int_aarch64_sve_sqcvtn_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3677 def int_aarch64_sve_uqcvtn_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3678 def int_aarch64_sve_sqcvtun_x4 : SME2_CVT_VG4_SINGLE_Intrinsic;
3679
3680 //
3681 // Multi-Single add/sub
3682 //
3683 def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3684 def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3685 def int_aarch64_sme_add_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3686 def int_aarch64_sme_sub_write_single_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3687
3688 //
3689 // Multi-Multi add/sub
3690 //
3691 def int_aarch64_sme_add_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3692 def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3693 def int_aarch64_sme_add_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3694 def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3695
3696 // Multi-vector clamps
3697 def int_aarch64_sve_sclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
3698 def int_aarch64_sve_uclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
3699 def int_aarch64_sve_fclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
3700 def int_aarch64_sve_bfclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
3701
3702 def int_aarch64_sve_sclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
3703 def int_aarch64_sve_uclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
3704 def int_aarch64_sve_fclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
3705 def int_aarch64_sve_bfclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
3706
3707 //
3708 // Multi-vector add/sub and accumulate into ZA
3709 //
3710 foreach intr = ["add", "sub"] in {
3711 foreach za = ["za16","za32", "za64"] in {
3712 def int_aarch64_sme_ # intr # _ # za # _vg1x2 : SME2_ZA_Write_VG2_Intrinsic;
3713 def int_aarch64_sme_ # intr # _ # za # _vg1x4 : SME2_ZA_Write_VG4_Intrinsic;
3714 }
3715 }
3716
3717 //
3718 // Move multi-vectors to/from ZA
3719 //
3720
3721 def int_aarch64_sme_read_hor_vg2 : SME2_Matrix_TileVector_Read_VG2_Intrinsic;
3722 def int_aarch64_sme_read_hor_vg4 : SME2_Matrix_TileVector_Read_VG4_Intrinsic;
3723
3724 def int_aarch64_sme_read_ver_vg2 : SME2_Matrix_TileVector_Read_VG2_Intrinsic;
3725 def int_aarch64_sme_read_ver_vg4 : SME2_Matrix_TileVector_Read_VG4_Intrinsic;
3726
3727 def int_aarch64_sme_read_vg1x2 : SME2_ZA_ArrayVector_Read_VG2_Intrinsic;
3728 def int_aarch64_sme_read_vg1x4 : SME2_ZA_ArrayVector_Read_VG4_Intrinsic;
3729
3730 def int_aarch64_sme_write_hor_vg2 : SME2_Matrix_TileVector_Write_VG2_Intrinsic;
3731 def int_aarch64_sme_write_hor_vg4 : SME2_Matrix_TileVector_Write_VG4_Intrinsic;
3732
3733 def int_aarch64_sme_write_ver_vg2 : SME2_Matrix_TileVector_Write_VG2_Intrinsic;
3734 def int_aarch64_sme_write_ver_vg4 : SME2_Matrix_TileVector_Write_VG4_Intrinsic;
3735
3736 def int_aarch64_sme_write_vg1x2 : SME2_ZA_ArrayVector_Write_VG2_Intrinsic;
3737 def int_aarch64_sme_write_vg1x4 : SME2_ZA_ArrayVector_Write_VG4_Intrinsic;
3738
3739 //
3740 // Multi-Single Vector add
3741 //
3742 def int_aarch64_sve_add_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
3743 def int_aarch64_sve_add_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
3744
3745 // 2-way and 4-way multi-vector signed/unsigned integer dot-product
3746 foreach ty = ["s", "u"] in {
3747 foreach sz = ["za32", "za64"] in {
3748 def int_aarch64_sme_ # ty # dot_single_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3749 def int_aarch64_sme_ # ty # dot_single_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3750
3751 def int_aarch64_sme_ # ty # dot_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3752 def int_aarch64_sme_ # ty # dot_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3753
3754 def int_aarch64_sme_ # ty # dot_lane_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3755 def int_aarch64_sme_ # ty # dot_lane_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3756 }
3757 }
3758
3759 foreach ty = ["su", "us"] in {
3760 def int_aarch64_sme_ # ty # dot_single_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3761 def int_aarch64_sme_ # ty # dot_single_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3762
3763 def int_aarch64_sme_ # ty # dot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3764 def int_aarch64_sme_ # ty # dot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3765 }
3766
3767 def int_aarch64_sme_usdot_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3768 def int_aarch64_sme_usdot_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3769
3770 // Multi-vector half-precision or bfloat floating-point dot-product
3771 def int_aarch64_sme_fdot_single_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
3772 def int_aarch64_sme_fdot_single_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
3773
3774 def int_aarch64_sme_fdot_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
3775 def int_aarch64_sme_fdot_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
3776
3777 def int_aarch64_sme_fdot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
3778 def int_aarch64_sme_fdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
3779
3780 // Multi-vector zip and unzips
3781 def int_aarch64_sve_zip_x2 : SVE2_VG2_ZipUzp_Intrinsic;
3782 def int_aarch64_sve_zipq_x2 : SVE2_VG2_ZipUzp_Intrinsic;
3783 def int_aarch64_sve_zip_x4 : SVE2_VG4_ZipUzp_Intrinsic;
3784 def int_aarch64_sve_zipq_x4 : SVE2_VG4_ZipUzp_Intrinsic;
3785 def int_aarch64_sve_uzp_x2 : SVE2_VG2_ZipUzp_Intrinsic;
3786 def int_aarch64_sve_uzpq_x2 : SVE2_VG2_ZipUzp_Intrinsic;
3787 def int_aarch64_sve_uzp_x4 : SVE2_VG4_ZipUzp_Intrinsic;
3788 def int_aarch64_sve_uzpq_x4 : SVE2_VG4_ZipUzp_Intrinsic;
3789
3790 // Vector dot-products (2-way)
3791 def int_aarch64_sve_sdot_x2 : SVE2_3VectorArg_Long_Intrinsic;
3792 def int_aarch64_sve_udot_x2 : SVE2_3VectorArg_Long_Intrinsic;
3793 def int_aarch64_sve_fdot_x2 : SVE2_3VectorArg_Long_Intrinsic;
3794 def int_aarch64_sve_sdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
3795 def int_aarch64_sve_udot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
3796 def int_aarch64_sve_fdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
3797
3798 //
3799 // Signed/unsigned multi-vector unpacks
3800 //
3801 def int_aarch64_sve_sunpk_x2 : SME2_VG2_Unpk_Intrinsic;
3802 def int_aarch64_sve_uunpk_x2 : SME2_VG2_Unpk_Intrinsic;
3803 def int_aarch64_sve_sunpk_x4 : SME2_VG4_Unpk_Intrinsic;
3804 def int_aarch64_sve_uunpk_x4 : SME2_VG4_Unpk_Intrinsic;
3805
3806 // 2-way and 4-way vector selects
3807 def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic;
3808 def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic;
3809
3810 class SME_LDR_STR_ZT_Intrinsic
3811 : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
3812 def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
3813 def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
3814
3815 //
3816 // Zero ZT0
3817 //
3818 def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWriteMem]>;
3819
3820 //
3821 // Lookup table expand one register
3822 //
3823 def int_aarch64_sme_luti2_lane_zt
3824 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3825 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3826 def int_aarch64_sme_luti4_lane_zt
3827 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3828 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3829
3830 // Lookup table expand two registers
3831 //
3832 def int_aarch64_sme_luti2_lane_zt_x2
3833 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3834 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3835 def int_aarch64_sme_luti4_lane_zt_x2
3836 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3837 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3838
3839 //
3840 // Lookup table expand four registers
3841 //
3842 def int_aarch64_sme_luti2_lane_zt_x4
3843 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3844 [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3845 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3846 def int_aarch64_sme_luti4_lane_zt_x4
3847 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3848 [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3849 [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
3850
3851 def int_aarch64_sme_luti4_zt_x4
3852 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3853 [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
3854 [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
3855
3856
3857 //
3858 // Register scaling
3859 //
3860 def int_aarch64_sme_fp8_scale_single_x2
3861 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3862 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>],
3863 [IntrNoMem]>;
3864
3865 def int_aarch64_sme_fp8_scale_single_x4
3866 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3867 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>],
3868 [IntrNoMem]>;
3869
3870 def int_aarch64_sme_fp8_scale_x2
3871 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3872 [LLVMMatchType<0>, LLVMMatchType<0>,
3873 LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
3874 [IntrNoMem]>;
3875
3876 def int_aarch64_sme_fp8_scale_x4
3877 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
3878 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
3879 LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
3880 [IntrNoMem]>;
3881
3882 }
3883
3884 // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
3885 //
3886 def int_aarch64_sve_zipq1 : AdvSIMD_2VectorArg_Intrinsic;
3887 def int_aarch64_sve_zipq2 : AdvSIMD_2VectorArg_Intrinsic;
3888 def int_aarch64_sve_uzpq1 : AdvSIMD_2VectorArg_Intrinsic;
3889 def int_aarch64_sve_uzpq2 : AdvSIMD_2VectorArg_Intrinsic;
3890
3891 // SVE2.1 - Programmable table lookup within each quadword vector segment
3892 // (zeroing)/(merging)
3893 //
3894 def int_aarch64_sve_tblq : AdvSIMD_SVE_TBL_Intrinsic;
3895 def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic;
3896
3897 // SVE2.1 - Extract vector segment from each pair of quadword segments.
3898 //
3899 def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic;
3900
3901 //
3902 // SVE2.1 - Move predicate to/from vector
3903 //
3904 def int_aarch64_sve_pmov_to_pred_lane : SVE2_1VectorArgIndexed_Pred_Intrinsic;
3905
3906 def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic;
3907
3908 def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic;
3909 def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;
3910
3911 def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
3912 def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
3913
3914 // SVE2/SME2 - Floating point absolute maximum and minimum
3915
3916 def int_aarch64_sve_famax : AdvSIMD_Pred2VectorArg_Intrinsic;
3917 def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
3918
3919 def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
3920 def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
3921 // Neon absolute maximum and minimum
3922 def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
3923 def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
3924
3925 //
3926 // FP8 Intrinsics
3927 //
3928 let TargetPrefix = "aarch64" in {
3929
3930 // SVE Widening Conversions
3931 class SVE2_FP8_Cvt
3932 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3933 [llvm_nxv16i8_ty],
3934 [IntrReadMem, IntrInaccessibleMemOnly]>;
3935
3936 def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt;
3937 def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt;
3938 def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt;
3939 def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt;
3940
3941 // SVE Narrowing Conversions
3942 class SVE2_FP8_Narrow_Cvt
3943 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
3944 [llvm_anyvector_ty, LLVMMatchType<0>],
3945 [IntrReadMem, IntrInaccessibleMemOnly]>;
3946
3947 def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt;
3948 def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
3949
3950 def int_aarch64_sve_fp8_cvtnt
3951 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
3952 [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
3953 [IntrReadMem, IntrInaccessibleMemOnly]>;
3954
3955 // Dot product
3956 class SVE2_FP8_FMLA_FDOT
3957 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3958 [LLVMMatchType<0>,
3959 llvm_nxv16i8_ty, llvm_nxv16i8_ty],
3960 [IntrReadMem, IntrInaccessibleMemOnly]>;
3961
3962 class SVE2_FP8_FMLA_FDOT_Lane
3963 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3964 [LLVMMatchType<0>,
3965 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
3966 [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
3967
3968 def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT;
3969 def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
3970
3971 // Fused multiply-add
3972 def int_aarch64_sve_fp8_fmlalb : SVE2_FP8_FMLA_FDOT;
3973 def int_aarch64_sve_fp8_fmlalb_lane : SVE2_FP8_FMLA_FDOT_Lane;
3974
3975 def int_aarch64_sve_fp8_fmlalt : SVE2_FP8_FMLA_FDOT;
3976 def int_aarch64_sve_fp8_fmlalt_lane : SVE2_FP8_FMLA_FDOT_Lane;
3977
3978 def int_aarch64_sve_fp8_fmlallbb : SVE2_FP8_FMLA_FDOT;
3979 def int_aarch64_sve_fp8_fmlallbb_lane : SVE2_FP8_FMLA_FDOT_Lane;
3980
3981 def int_aarch64_sve_fp8_fmlallbt : SVE2_FP8_FMLA_FDOT;
3982 def int_aarch64_sve_fp8_fmlallbt_lane : SVE2_FP8_FMLA_FDOT_Lane;
3983
3984 def int_aarch64_sve_fp8_fmlalltb : SVE2_FP8_FMLA_FDOT;
3985 def int_aarch64_sve_fp8_fmlalltb_lane : SVE2_FP8_FMLA_FDOT_Lane;
3986
3987 def int_aarch64_sve_fp8_fmlalltt : SVE2_FP8_FMLA_FDOT;
3988 def int_aarch64_sve_fp8_fmlalltt_lane : SVE2_FP8_FMLA_FDOT_Lane;
3989
3990 class SME2_FP8_CVT_X2_Single_Intrinsic
3991 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
3992 [llvm_nxv16i8_ty],
3993 [IntrReadMem, IntrInaccessibleMemOnly]>;
3994
3995 class SME2_FP8_CVT_Single_X4_Intrinsic
3996 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
3997 [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
3998 [IntrReadMem, IntrInaccessibleMemOnly]>;
3999
4000 class SME_FP8_OuterProduct_Intrinsic
4001 : DefaultAttrsIntrinsic<[],
4002 [llvm_i32_ty,
4003 llvm_nxv16i1_ty, llvm_nxv16i1_ty,
4004 llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4005 [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4006
4007 class SME_FP8_ZA_LANE_VGx1_Intrinsic
4008 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4009 llvm_nxv16i8_ty,
4010 llvm_nxv16i8_ty,
4011 llvm_i32_ty],
4012 [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<3>>]>;
4013
4014 class SME_FP8_ZA_LANE_VGx2_Intrinsic
4015 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4016 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4017 llvm_nxv16i8_ty,
4018 llvm_i32_ty],
4019 [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<4>>]>;
4020
4021 class SME_FP8_ZA_LANE_VGx4_Intrinsic
4022 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4023 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4024 llvm_nxv16i8_ty,
4025 llvm_i32_ty],
4026 [IntrInaccessibleMemOnly, IntrHasSideEffects, ImmArg<ArgIndex<6>>]>;
4027 class SME_FP8_ZA_SINGLE_VGx1_Intrinsic
4028 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4029 llvm_nxv16i8_ty,
4030 llvm_nxv16i8_ty],
4031 [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4032
4033 class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
4034 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4035 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4036 llvm_nxv16i8_ty],
4037 [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4038
4039 class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
4040 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4041 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4042 llvm_nxv16i8_ty],
4043 [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4044
4045 class SME_FP8_ZA_MULTI_VGx2_Intrinsic
4046 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4047 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4048 llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4049 [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4050
4051 class SME_FP8_ZA_MULTI_VGx4_Intrinsic
4052 : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
4053 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
4054 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
4055 [IntrInaccessibleMemOnly, IntrHasSideEffects]>;
4056 //
4057 // CVT from FP8 to half-precision/BFloat16 multi-vector
4058 //
4059 def int_aarch64_sve_fp8_cvt1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
4060 def int_aarch64_sve_fp8_cvt2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
4061
4062 //
4063 // CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector
4064 //
4065 def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
4066 def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
4067
4068 //
4069 // CVT to FP8 from half-precision/BFloat16/single-precision multi-vector
4070 //
4071 def int_aarch64_sve_fp8_cvt_x2
4072 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
4073 [llvm_anyvector_ty, LLVMMatchType<0>],
4074 [IntrReadMem, IntrInaccessibleMemOnly]>;
4075
4076 def int_aarch64_sve_fp8_cvt_x4 : SME2_FP8_CVT_Single_X4_Intrinsic;
4077 def int_aarch64_sve_fp8_cvtn_x4 : SME2_FP8_CVT_Single_X4_Intrinsic;
4078
4079 // FP8 outer product
4080 def int_aarch64_sme_fp8_fmopa_za16 : SME_FP8_OuterProduct_Intrinsic;
4081 def int_aarch64_sme_fp8_fmopa_za32 : SME_FP8_OuterProduct_Intrinsic;
4082
4083 //
4084 // ZA multiply-add
4085 //
4086 // Double-vector groups (F8F16)
4087 def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic;
4088 def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4089 def int_aarch64_sme_fp8_fmlal_lane_za16_vg2x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
4090 // Single
4091 def int_aarch64_sme_fp8_fmlal_single_za16_vg2x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic;
4092 def int_aarch64_sme_fp8_fmlal_single_za16_vg2x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
4093 def int_aarch64_sme_fp8_fmlal_single_za16_vg2x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
4094 // Multi
4095 def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
4096 def int_aarch64_sme_fp8_fmlal_multi_za16_vg2x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;
4097
4098 // Quad-vector groups (F8F32)
4099 def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x1 : SME_FP8_ZA_LANE_VGx1_Intrinsic;
4100 def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4101 def int_aarch64_sme_fp8_fmlall_lane_za32_vg4x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
4102 // Single
4103 def int_aarch64_sme_fp8_fmlall_single_za32_vg4x1 : SME_FP8_ZA_SINGLE_VGx1_Intrinsic;
4104 def int_aarch64_sme_fp8_fmlall_single_za32_vg4x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
4105 def int_aarch64_sme_fp8_fmlall_single_za32_vg4x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
4106 // Multi
4107 def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
4108 def int_aarch64_sme_fp8_fmlall_multi_za32_vg4x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;
4109
4110 //
4111 // FP8 FDOT intrinsics
4112 //
4113 // (indexed)
4114 def int_aarch64_sme_fp8_fdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4115 def int_aarch64_sme_fp8_fdot_lane_za32_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4116
4117 def int_aarch64_sme_fp8_fdot_lane_za16_vg1x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
4118 def int_aarch64_sme_fp8_fdot_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx4_Intrinsic;
4119 // Single
4120 def int_aarch64_sme_fp8_fdot_single_za16_vg1x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
4121 def int_aarch64_sme_fp8_fdot_single_za32_vg1x2 : SME_FP8_ZA_SINGLE_VGx2_Intrinsic;
4122
4123 def int_aarch64_sme_fp8_fdot_single_za16_vg1x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
4124 def int_aarch64_sme_fp8_fdot_single_za32_vg1x4 : SME_FP8_ZA_SINGLE_VGx4_Intrinsic;
4125 // Multi
4126 def int_aarch64_sme_fp8_fdot_multi_za16_vg1x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
4127 def int_aarch64_sme_fp8_fdot_multi_za32_vg1x2 : SME_FP8_ZA_MULTI_VGx2_Intrinsic;
4128
4129 def int_aarch64_sme_fp8_fdot_multi_za16_vg1x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;
4130 def int_aarch64_sme_fp8_fdot_multi_za32_vg1x4 : SME_FP8_ZA_MULTI_VGx4_Intrinsic;
4131
4132 // FVDOT
4133 def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4134 def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4135 def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME_FP8_ZA_LANE_VGx2_Intrinsic;
4136 }