Warning, /include/llvm/IR/IntrinsicsNVVM.td is written in an unsupported language. File is not indexed.
0001 //===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file defines all of the NVVM-specific intrinsics for use with NVPTX.
0010 //
0011 //===----------------------------------------------------------------------===//
0012
0013 // The following intrinsics were once defined here, but are now auto-upgraded
0014 // to target-generic LLVM intrinsics.
0015 //
0016 // * llvm.nvvm.brev32 --> llvm.bitreverse.i32
0017 // * llvm.nvvm.brev64 --> llvm.bitreverse.i64
0018 // * llvm.nvvm.clz.i --> llvm.ctlz.i32
0019 // * llvm.nvvm.clz.ll --> trunc i64 llvm.ctlz.i64(x) to i32
0020 // * llvm.nvvm.popc.i --> llvm.ctpop.i32
0021 // * llvm.nvvm.popc.ll --> trunc i64 llvm.ctpop.i64 to i32
0022 // * llvm.nvvm.abs.i --> select(x >= -x, x, -x)
0023 // * llvm.nvvm.abs.ll --> ibid.
0024 // * llvm.nvvm.max.i --> select(x sge y, x, y)
0025 // * llvm.nvvm.max.ll --> ibid.
0026 // * llvm.nvvm.max.ui --> select(x uge y, x, y)
0027 // * llvm.nvvm.max.ull --> ibid.
0028 // * llvm.nvvm.max.i --> select(x sle y, x, y)
0029 // * llvm.nvvm.max.ll --> ibid.
0030 // * llvm.nvvm.max.ui --> select(x ule y, x, y)
0031 // * llvm.nvvm.max.ull --> ibid.
0032 // * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32
0033 // * llvm.nvvm.bitcast.f2i --> bitcast
0034 // * llvm.nvvm.bitcast.i2f --> ibid.
0035 // * llvm.nvvm.bitcast.d2ll --> ibid.
0036 // * llvm.nvvm.bitcast.ll2d --> ibid.
0037 // * llvm.nvvm.ptr.gen.to.global --> addrspacecast
0038 // * llvm.nvvm.ptr.gen.to.shared --> ibid.
0039 // * llvm.nvvm.ptr.gen.to.constant --> ibid.
0040 // * llvm.nvvm.ptr.gen.to.local --> ibid.
0041 // * llvm.nvvm.ptr.global.to.gen --> ibid.
0042 // * llvm.nvvm.ptr.shared.to.gen --> ibid.
0043 // * llvm.nvvm.ptr.constant.to.gen --> ibid.
0044 // * llvm.nvvm.ptr.local.to.gen --> ibid.
0045 // * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant
0046 // * llvm.nvvm.ldg.global.f --> ibid.
0047 // * llvm.nvvm.ldg.global.p --> ibid.
0048
0049 def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
0050 def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
0051
0052 //
0053 // MISC
0054 //
0055
0056 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
0057 // Geom: m<M>n<N>k<K>. E.g. m8n32k16
0058 // Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
0059 // PtxEltType: PTX type for the element.
0060 class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
0061 string geom = Geom;
0062 string frag = Frag;
0063 string ptx_elt_type = PtxEltType;
0064 string gft = Geom#":"#Frag#":"#ptx_elt_type;
0065 string ft = frag#":"#ptx_elt_type;
0066 list<LLVMType> regs = !cond(
0067 // mma fp ops use smaller fragments than wmma fp ops
0068 !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
0069 !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
0070 !eq(gft,"m16n8k8:a:f16") : !listsplat(llvm_v2f16_ty, 2),
0071 !eq(gft,"m16n8k8:b:f16") : [llvm_v2f16_ty],
0072 !eq(gft,"m16n8k8:c:f16") : !listsplat(llvm_v2f16_ty, 2),
0073 !eq(gft,"m16n8k8:d:f16") : !listsplat(llvm_v2f16_ty, 2),
0074 !eq(gft,"m16n8k8:c:f32") : !listsplat(llvm_float_ty, 4),
0075 !eq(gft,"m16n8k8:d:f32") : !listsplat(llvm_float_ty, 4),
0076 !eq(gft,"m16n8k16:a:f16") : !listsplat(llvm_v2f16_ty, 4),
0077 !eq(gft,"m16n8k16:b:f16") : !listsplat(llvm_v2f16_ty, 2),
0078 !eq(gft,"m16n8k16:c:f16") : !listsplat(llvm_v2f16_ty, 2),
0079 !eq(gft,"m16n8k16:d:f16") : !listsplat(llvm_v2f16_ty, 2),
0080 !eq(gft,"m16n8k16:c:f32") : !listsplat(llvm_float_ty, 4),
0081 !eq(gft,"m16n8k16:d:f32") : !listsplat(llvm_float_ty, 4),
0082 !eq(gft,"m16n8k4:c:f32") : !listsplat(llvm_float_ty, 4),
0083 !eq(gft,"m16n8k4:d:f32") : !listsplat(llvm_float_ty, 4),
0084
0085 // wmma fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16
0086 // All other supported geometries use the same fragment format for f32 and
0087 // f16, so we only need to consider {fragment, type}.
0088 !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
0089 !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
0090 !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
0091 !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
0092 !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
0093 !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
0094
0095 // wmma tf32 -> s32 @ m16n16k8
0096 !eq(gft,"m16n16k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
0097 !eq(gft,"m16n16k8:b:tf32") : !listsplat(llvm_i32_ty, 4),
0098
0099 // mma tf32 -> s32 @ m16n16k8/m16n8k8
0100 !eq(gft,"m16n8k4:a:tf32") : !listsplat(llvm_i32_ty, 2),
0101 !eq(gft,"m16n8k4:b:tf32") : [llvm_i32_ty],
0102 !eq(gft,"m16n8k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
0103 !eq(gft,"m16n8k8:b:tf32") : !listsplat(llvm_i32_ty, 2),
0104
0105 !eq(gft,"m8n8k4:a:f64") : [llvm_double_ty],
0106 !eq(gft,"m8n8k4:b:f64") : [llvm_double_ty],
0107 !eq(gft,"m8n8k4:c:f64") : !listsplat(llvm_double_ty, 2),
0108 !eq(gft,"m8n8k4:d:f64") : !listsplat(llvm_double_ty, 2),
0109
0110 // wmma bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16
0111 !eq(gft,"m16n16k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
0112 !eq(gft,"m16n16k16:b:bf16") : !listsplat(llvm_i32_ty, 4),
0113 !eq(gft,"m8n32k16:a:bf16") : !listsplat(llvm_i32_ty, 2),
0114 !eq(gft,"m8n32k16:b:bf16") : !listsplat(llvm_i32_ty, 8),
0115 !eq(gft,"m32n8k16:a:bf16") : !listsplat(llvm_i32_ty, 8),
0116 !eq(gft,"m32n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
0117
0118 // mma bf16 -> s32 @ m16n8k16/m16n8k8
0119 !eq(gft,"m16n8k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
0120 !eq(gft,"m16n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
0121 !eq(gft,"m16n8k8:a:bf16") : !listsplat(llvm_i32_ty, 2),
0122 !eq(gft,"m16n8k8:b:bf16") : [llvm_i32_ty],
0123
0124 // wmma u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
0125 !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
0126 !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
0127 !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
0128 !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
0129 !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0130 !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0131
0132 !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
0133 !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
0134 !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
0135 !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
0136 !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0137 !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0138
0139 !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
0140 !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
0141 !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
0142 !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
0143 !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0144 !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0145
0146 // mma u8/s8 -> s32 @ m8n8k16/m16n8k16/m16n8k32
0147 !eq(gft,"m8n8k16:a:u8") : [llvm_i32_ty],
0148 !eq(gft,"m8n8k16:a:s8") : [llvm_i32_ty],
0149 !eq(gft,"m8n8k16:b:u8") : [llvm_i32_ty],
0150 !eq(gft,"m8n8k16:b:s8") : [llvm_i32_ty],
0151 !eq(gft,"m8n8k16:c:s32") : !listsplat(llvm_i32_ty, 2),
0152 !eq(gft,"m8n8k16:d:s32") : !listsplat(llvm_i32_ty, 2),
0153
0154 !eq(gft,"m16n8k16:a:u8") : !listsplat(llvm_i32_ty, 2),
0155 !eq(gft,"m16n8k16:a:s8") : !listsplat(llvm_i32_ty, 2),
0156 !eq(gft,"m16n8k16:b:u8") : [llvm_i32_ty],
0157 !eq(gft,"m16n8k16:b:s8") : [llvm_i32_ty],
0158 !eq(gft,"m16n8k16:c:s32") : !listsplat(llvm_i32_ty, 4),
0159 !eq(gft,"m16n8k16:d:s32") : !listsplat(llvm_i32_ty, 4),
0160
0161 !eq(gft,"m16n8k32:a:u8") : !listsplat(llvm_i32_ty, 4),
0162 !eq(gft,"m16n8k32:a:s8") : !listsplat(llvm_i32_ty, 4),
0163 !eq(gft,"m16n8k32:b:u8") : !listsplat(llvm_i32_ty, 2),
0164 !eq(gft,"m16n8k32:b:s8") : !listsplat(llvm_i32_ty, 2),
0165 !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
0166 !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
0167
0168 // wmma/mma u4/s4 -> s32 @ m8n8k32 (u4/s4)
0169 !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty],
0170 !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty],
0171 !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
0172 !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
0173 !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
0174 !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
0175
0176 !eq(gft,"m16n8k32:a:u4") : !listsplat(llvm_i32_ty, 2),
0177 !eq(gft,"m16n8k32:a:s4") : !listsplat(llvm_i32_ty, 2),
0178 !eq(gft,"m16n8k32:b:u4") : [llvm_i32_ty],
0179 !eq(gft,"m16n8k32:b:s4") : [llvm_i32_ty],
0180 !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
0181 !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
0182
0183 !eq(gft,"m16n8k64:a:u4") : !listsplat(llvm_i32_ty, 4),
0184 !eq(gft,"m16n8k64:a:s4") : !listsplat(llvm_i32_ty, 4),
0185 !eq(gft,"m16n8k64:b:u4") : !listsplat(llvm_i32_ty, 2),
0186 !eq(gft,"m16n8k64:b:s4") : !listsplat(llvm_i32_ty, 2),
0187 !eq(gft,"m16n8k64:c:s32") : !listsplat(llvm_i32_ty, 4),
0188 !eq(gft,"m16n8k64:d:s32") : !listsplat(llvm_i32_ty, 4),
0189
0190 // wmma/mma b1 -> s32 @ m8n8k128(b1)
0191 !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
0192 !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
0193 !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
0194 !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
0195
0196 !eq(gft,"m16n8k128:a:b1") : !listsplat(llvm_i32_ty, 2),
0197 !eq(gft,"m16n8k128:b:b1") : [llvm_i32_ty],
0198 !eq(gft,"m16n8k128:c:s32") : !listsplat(llvm_i32_ty, 4),
0199 !eq(gft,"m16n8k128:d:s32") : !listsplat(llvm_i32_ty, 4),
0200
0201 !eq(gft,"m16n8k256:a:b1") : !listsplat(llvm_i32_ty, 4),
0202 !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
0203 !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
0204 !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
0205
0206 // ldmatrix b16 -> s32 @ m8n8
0207 !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
0208 !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
0209 !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
0210 );
0211 }
0212
0213 class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
0214 string intr = "llvm.nvvm.wmma."
0215 # Frag.geom
0216 # "." # Op
0217 # "." # Frag.frag
0218 # "." # Layout
0219 # !if(WithStride, ".stride", "")
0220 # "." # Frag.ptx_elt_type
0221 ;
0222 // TODO(tra): record name should ideally use the same field order as the intrinsic.
0223 // E.g. string record = !subst("llvm", "int",
0224 // !subst(".", "_", llvm));
0225 string record = "int_nvvm_wmma_"
0226 # Frag.geom
0227 # "_" # Op
0228 # "_" # Frag.frag
0229 # "_" # Frag.ptx_elt_type
0230 # "_" # Layout
0231 # !if(WithStride, "_stride", "");
0232 }
0233
0234 class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0235 list<WMMA_REGS> id_frags = !cond(
0236 // FP16 ops are identified by accumulator & result type.
0237 !eq(A.ptx_elt_type, "f16") : [D, C],
0238 // other ops are identified by input types.
0239 !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B],
0240 true: [A]
0241 );
0242 string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
0243 }
0244
0245 class WMMA_NAME<string ALayout, string BLayout, int Satfinite, string Rnd, string b1op,
0246 WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0247 string signature = MMA_SIGNATURE<A, B, C, D>.ret;
0248 string llvm = "llvm.nvvm.wmma."
0249 # A.geom
0250 # ".mma"
0251 # b1op
0252 # "." # ALayout
0253 # "." # BLayout
0254 # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "")
0255 # signature
0256 # !if(Satfinite, ".satfinite", "");
0257
0258 string record = !subst(".", "_",
0259 !subst("llvm.", "int_", llvm));
0260 }
0261
0262 class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
0263 WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0264 string signature = MMA_SIGNATURE<A, B, C, D>.ret;
0265 string llvm = "llvm.nvvm.mma"
0266 # b1op
0267 # "." # A.geom
0268 # "." # ALayout
0269 # "." # BLayout
0270 # !if(Satfinite, ".satfinite", "")
0271 # signature;
0272 string record = !subst(".", "_",
0273 !subst("llvm.", "int_", llvm));
0274 }
0275
0276 class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
0277 string intr = "llvm.nvvm.ldmatrix.sync.aligned"
0278 # "." # Frag.geom
0279 # "." # Frag.frag
0280 # !if(Trans, ".trans", "")
0281 # "." # Frag.ptx_elt_type
0282 ;
0283 string record = !subst(".", "_",
0284 !subst("llvm.", "int_", intr));
0285 }
0286
0287 // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
0288 // Geom: list of supported geometries.
0289 // TypeN: PTX type of the corresponding fragment's element.
0290 // TypeB and TypeD may be empty if it must match that of TypeA or TypeC.
0291 class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
0292 list<string> TypeC, list<string> TypeD> {
0293 list<list<WMMA_REGS>> ret =
0294 !foldl([]<list<WMMA_REGS>>, Geom, t1, geom, !listconcat(t1,
0295 !foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
0296 !foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
0297 !foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
0298 !foldl([]<list<WMMA_REGS>>, !if(!size(TypeD), TypeD, [type_c]), t5, type_d, !listconcat(t5,
0299 [[WMMA_REGS<geom, "a", type_a>,
0300 WMMA_REGS<geom, "b", type_b>,
0301 WMMA_REGS<geom, "c", type_c>,
0302 WMMA_REGS<geom, "d", type_d>]]))))))))));
0303 // Debugging aid for readable representation of the list above.
0304 list<list<string>> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]);
0305 }
0306
0307 class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
0308 list<WMMA_REGS> ret =
0309 !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
0310 !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
0311 !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
0312 [WMMA_REGS<geom, frag, type>]))))));
0313 // Debugging aid for readable representation of the list above.
0314 list<string> ops = !foreach(x, ret, x.gft);
0315 }
0316
0317 class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
0318 list<WMMA_REGS> ret =
0319 !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
0320 !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
0321 !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
0322 [WMMA_REGS<geom, frag, type>]))))));
0323 // Debugging aid for readable representation of the list above.
0324 list<string> ops = !foreach(x, ret, x.gft);
0325 }
0326
0327 // Creates list of valid combinations of fragments. This is the main list that
0328 // drives generation of corresponding intrinsics and instructions.
0329 class NVVM_MMA_OPS {
0330 list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
0331 ["m16n16k8"],
0332 ["tf32"], [], ["f32"], []>.ret;
0333 list<list<WMMA_REGS>> bf16_wmma_ops = MMA_OPS<
0334 ["m16n16k16", "m32n8k16", "m8n32k16"],
0335 ["bf16"], [], ["f32"], []>.ret;
0336 list<list<WMMA_REGS>> f64_wmma_ops = MMA_OPS<
0337 ["m8n8k4"],
0338 ["f64"], [], ["f64"], []>.ret;
0339 list<list<WMMA_REGS>> fp_wmma_ops = MMA_OPS<
0340 ["m16n16k16", "m32n8k16", "m8n32k16"],
0341 ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
0342 list<list<WMMA_REGS>> int_wmma_ops = MMA_OPS<
0343 ["m16n16k16", "m32n8k16", "m8n32k16"],
0344 ["s8", "u8"], [], ["s32"], []>.ret;
0345 list<list<WMMA_REGS>> subint_wmma_ops = MMA_OPS<
0346 ["m8n8k32"],
0347 ["s4", "u4"], [], ["s32"], []>.ret;
0348 list<list<WMMA_REGS>> bit_wmma_ops = MMA_OPS<
0349 ["m8n8k128"],
0350 ["b1"], [], ["s32"], []>.ret;
0351 list<list<WMMA_REGS>> all_wmma_ops = !listconcat(
0352 tf32_wmma_ops, bf16_wmma_ops, f64_wmma_ops,
0353 fp_wmma_ops, int_wmma_ops, subint_wmma_ops, bit_wmma_ops);
0354
0355 list<list<WMMA_REGS>> tf32_mma_ops = MMA_OPS<
0356 ["m16n8k4", "m16n8k8"],
0357 ["tf32"], [], ["f32"], []>.ret;
0358 list<list<WMMA_REGS>> bf16_mma_ops = MMA_OPS<
0359 ["m16n8k16", "m16n8k8"],
0360 ["bf16"], [], ["f32"], []>.ret;
0361 list<list<WMMA_REGS>> f64_mma_ops = MMA_OPS<
0362 ["m8n8k4"],
0363 ["f64"], [], ["f64"], []>.ret;
0364 list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
0365 ["m8n8k4", "m16n8k8", "m16n8k16"],
0366 ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
0367 list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
0368 ["m8n8k16", "m16n8k16", "m16n8k32"],
0369 ["s8", "u8"], ["s8", "u8"], ["s32"], []>.ret;
0370 list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
0371 ["m8n8k32", "m16n8k32", "m16n8k64"],
0372 ["s4", "u4"], ["s4", "u4"], ["s32"], []>.ret;
0373 list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
0374 ["m8n8k128", "m16n8k128", "m16n8k256"],
0375 ["b1"], [], ["s32"], []>.ret;
0376 list<list<WMMA_REGS>> all_mma_ops = !listconcat(
0377 tf32_mma_ops, bf16_mma_ops, f64_mma_ops,
0378 fp_mma_ops, int_mma_ops, subint_mma_ops, bit_mma_ops);
0379
0380 list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
0381 ["m16n16k16", "m32n8k16", "m8n32k16"],
0382 ["a", "b"], ["f16", "u8", "s8", "bf16"]>.ret;
0383 list<WMMA_REGS> ldst_cd_ops = MMA_LDST_OPS<
0384 ["m16n16k16", "m32n8k16", "m8n32k16"],
0385 ["c", "d"], ["f16", "f32", "s32"]>.ret;
0386 list<WMMA_REGS> ldst_tf32_ab_ops = MMA_LDST_OPS<
0387 ["m16n16k8"],
0388 ["a", "b"], ["tf32"]>.ret;
0389 list<WMMA_REGS> ldst_tf32_cd_ops = MMA_LDST_OPS<
0390 ["m16n16k8"],
0391 ["c", "d"], ["f32"]>.ret;
0392 list<WMMA_REGS> ldst_f64_abcd_ops = MMA_LDST_OPS<
0393 ["m8n8k4"],
0394 ["a", "b", "c", "d"], ["f64"]>.ret;
0395 list<WMMA_REGS> ldst_subint_ab_ops = MMA_LDST_OPS<
0396 ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret;
0397 list<WMMA_REGS> ldst_bit_ab_ops = MMA_LDST_OPS<
0398 ["m8n8k128"], ["a", "b"], ["b1"]>.ret;
0399 list<WMMA_REGS> ldst_subint_cd_ops = MMA_LDST_OPS<
0400 ["m8n8k32", "m8n8k128"], ["c", "d"], ["s32"]>.ret;
0401 list<WMMA_REGS> all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops,
0402 ldst_tf32_ab_ops,
0403 ldst_tf32_cd_ops,
0404 ldst_f64_abcd_ops,
0405 ldst_subint_ab_ops,
0406 ldst_bit_ab_ops,
0407 ldst_subint_cd_ops);
0408 // Separate A/B/C fragments (loads) from D (stores).
0409 list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
0410 list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
0411
0412 list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
0413 ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
0414 list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
0415 }
0416
0417 def NVVM_MMA_OPS : NVVM_MMA_OPS;
0418
0419 // Returns true if this combination of fragment and layout for WMMA load/store
0420 // ops is supported; false otherwise.
0421 // E.g.
0422 // if NVVM_WMMA_LDST_SUPPORTED<...>.ret then
0423 // def : FOO<>; // The record will only be defined for supported ops.
0424 //
0425 class NVVM_WMMA_LDST_SUPPORTED<WMMA_REGS frag, string layout> {
0426 string f = frag.frag;
0427 string t = frag.ptx_elt_type;
0428
0429 bit ret = !cond(
0430 // Sub-int load and store requires A fragment to be of row layout and B
0431 // fragments to be of column layout.
0432 !and(!or(!eq(t, "b1"),
0433 !eq(t, "u4"),
0434 !eq(t, "s4")),
0435 !or(!and(!eq(f, "a"),
0436 !ne(layout, "row")),
0437 !and(!eq(f, "b"),
0438 !ne(layout, "col")))) : false,
0439 true: true
0440 );
0441 }
0442
0443 // Returns true if this combination of layout/satf/rnd for WMMA ops is
0444 // supported; false otherwise.
0445 // E.g.
0446 // if NVVM_WMMA_SUPPORTED<...>.ret then
0447 // def : FOO<>; // The record will only be defined for supported ops.
0448 //
0449 class NVVM_WMMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf, string rnd> {
0450 // WMMA ops check both layouts.
0451 string layout = layout_a # ":" # layout_b;
0452 string t = frags[0].ptx_elt_type;
0453
0454 bit ret = !cond(
0455 // only f64 wmma functions support rnd options
0456 // any non f64 type that uses a rnd value is invalid
0457 !and(!ne(t, "f64"), !ne(rnd, "")) : false,
0458
0459 // satf is only valid for select types
0460 !and(!eq(satf, 1),
0461 !ne(t, "s8"),
0462 !ne(t, "u8"),
0463 !ne(t, "s4"),
0464 !ne(t, "u4"),
0465 !ne(t, "f16")): false,
0466
0467 // Sub-int wmma requires row/column layout
0468 !and(!or(!eq(t, "s4"),
0469 !eq(t, "u4"),
0470 !eq(t, "b1")),
0471 !ne(layout, "row:col")) : false,
0472 true: true
0473 );
0474 }
0475
0476 class NVVM_MMA_B1OPS<list<WMMA_REGS> frags> {
0477 list<string> ret = !cond(
0478 !eq(frags[0].ptx_elt_type, "b1") : [".xor.popc", ".and.popc"],
0479 true: [""]
0480 );
0481 }
0482
0483 // Returns true if this combination of layout/satf for MMA ops is supported;
0484 // false otherwise.
0485 // E.g.
0486 // if NVVM_MMA_SUPPORTED<...>.ret then
0487 // def : FOO<>; // The record will only be defined for supported ops.
0488 //
0489 class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf> {
0490 // MMA ops check both layouts.
0491 string layout = layout_a # ":" # layout_b;
0492 string a_type = frags[0].ptx_elt_type;
0493 string b_type = frags[1].ptx_elt_type;
0494 string c_type = frags[2].ptx_elt_type;
0495 string d_type = frags[3].ptx_elt_type;
0496 string geom = frags[0].geom;
0497
0498 // gcd is a shortcut used to identify instructions that depend on
0499 // geom+frag_c+frag_d.
0500 string gcd = geom # ":" # c_type # d_type;
0501 bit ret = !cond(
0502
0503 // Limit satf to valid types
0504 !and(!eq(satf, 1),
0505 !ne(a_type, "s8"),
0506 !ne(a_type, "u8"),
0507 !ne(a_type, "s4"),
0508 !ne(a_type, "u4")): false,
0509
0510 // m8n8k4 has no C=f32 D=f16 variant.
0511 !eq(gcd, "m8n8k4:f32f16"): false,
0512
0513 // only m8n8k4 for f16 does not require row:col layout
0514 !and(!ne(layout, "row:col"),
0515 !or(!ne(geom, "m8n8k4"),
0516 !ne(a_type, "f16"))) : false,
0517
0518 // m16n8k8 requires A and B to be the same type and C and D to be the same
0519 // type.
0520 !and(!eq(geom, "m16n8k8"),
0521 !or(!ne(a_type, b_type),
0522 !ne(c_type, d_type))): false,
0523
0524 // m16n8k8 requires C and D to be the same type.
0525 !and(!eq(geom, "m16n8k8"),
0526 !ne(c_type, d_type)): false,
0527
0528 // All other are OK.
0529 true: true
0530 );
0531 }
0532
0533 // Returns true if the fragment is valid for ldmatrix ops is supported;
0534 // false otherwise.
0535 // E.g.
0536 // if NVVM_LDMATRIX_SUPPORTED<...>.ret then
0537 // def : FOO<>; // The record will only be defined for supported ops.
0538 //
0539 class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
0540 string g = frag.geom;
0541 string t = frag.ptx_elt_type;
0542
0543 bit ret = !cond(
0544 // Only currently support m8n8 and b16
0545 !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
0546 true: false
0547 );
0548 }
0549
0550 class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
0551 string Suffix = !if(sync, "sync_", "")
0552 # mode # "_"
0553 # type
0554 # !if(return_pred, "p", "");
0555
0556 string Name = "int_nvvm_shfl_" # Suffix;
0557 string Builtin = "__nvvm_shfl_" # Suffix;
0558 string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
0559 bit withGccBuiltin = !not(return_pred);
0560 bit withoutGccBuiltin = return_pred;
0561 LLVMType OpType = !cond(
0562 !eq(type,"i32"): llvm_i32_ty,
0563 !eq(type,"f32"): llvm_float_ty);
0564 list<LLVMType> RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]);
0565 list<LLVMType> ArgsTy = !if(sync,
0566 [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty],
0567 [OpType, llvm_i32_ty, llvm_i32_ty]);
0568 }
0569
0570 class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
0571 string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d";
0572
0573 bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
0574 int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
0575 list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
0576 list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0577 list<LLVMType> ArgsTy = !listconcat(
0578 [llvm_shared_ptr_ty, // dst_smem_ptr
0579 llvm_shared_ptr_ty, // mbarrier_smem_ptr
0580 llvm_ptr_ty], // tensormap_ptr
0581 TensorDimsTy, // actual tensor dims
0582 Im2ColOffsetsTy, // im2col offsets
0583 [llvm_i16_ty, // cta_mask
0584 llvm_i64_ty, // cache_hint
0585 llvm_i1_ty, // Flag for cta_mask
0586 llvm_i1_ty] // Flag for cache_hint
0587 );
0588
0589 int TempFlagsStartIdx = !add(dim, 5);
0590 int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
0591 list<IntrinsicProperty> IntrProp = [IntrConvergent,
0592 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
0593 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoCapture<ArgIndex<2>>,
0594 ImmArg<ArgIndex<FlagsStartIdx>>,
0595 ImmArg<ArgIndex<!add(FlagsStartIdx, 1)>>];
0596 }
0597
0598 class CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, string mode> {
0599 string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d";
0600
0601 list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0602 list<LLVMType> ArgsTy = !listconcat(
0603 [llvm_shared_ptr_ty, // src_smem_ptr
0604 llvm_ptr_ty], // tensormap_ptr
0605 TensorDimsTy, // actual tensor dims
0606 [llvm_i64_ty, // cache_hint
0607 llvm_i1_ty] // Flag for cache_hint
0608 );
0609 int FlagsStartIdx = !add(dim, 3);
0610 list<IntrinsicProperty> IntrProp = [IntrConvergent,
0611 ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
0612 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
0613 ImmArg<ArgIndex<FlagsStartIdx>>];
0614 }
0615
0616 class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
0617 string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d";
0618
0619 bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
0620 int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
0621 list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
0622 list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0623 list<LLVMType> ArgsTy = !listconcat(
0624 [llvm_ptr_ty], // tensormap_ptr
0625 TensorDimsTy, // actual tensor dims
0626 Im2ColOffsetsTy, // im2col offsets
0627 [llvm_i64_ty, // cache_hint
0628 llvm_i1_ty] // Flag for cache_hint
0629 );
0630
0631 int TempFlagsStartIdx = !add(dim, 2);
0632 int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
0633 list<IntrinsicProperty> IntrProp = [IntrConvergent,
0634 ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
0635 ImmArg<ArgIndex<FlagsStartIdx>>];
0636 }
0637
0638 class CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, string mode, string op> {
0639 string Suffix = op # "_" # mode # "_" # dim # "d";
0640 string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix;
0641
0642 list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0643 list<LLVMType> ArgsTy = !listconcat(
0644 [llvm_shared_ptr_ty, // src_smem_ptr
0645 llvm_ptr_ty], // tensormap_ptr
0646 TensorDimsTy, // actual tensor dims
0647 [llvm_i64_ty, // cache_hint
0648 llvm_i1_ty] // Flag for cache_hint
0649 );
0650 int FlagsStartIdx = !add(dim, 3);
0651 list<IntrinsicProperty> IntrProp = [IntrConvergent,
0652 ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
0653 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
0654 ImmArg<ArgIndex<FlagsStartIdx>>];
0655 }
0656
0657 let TargetPrefix = "nvvm" in {
0658 def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">,
0659 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0660 [IntrNoMem, IntrSpeculatable]>;
0661
0662 def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">,
0663 DefaultAttrsIntrinsic<[], [llvm_i32_ty],
0664 [IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
0665
0666 //
0667 // Min Max
0668 //
0669
0670 foreach operation = ["min", "max"] in {
0671 def int_nvvm_f # operation # _d :
0672 ClangBuiltin<!strconcat("__nvvm_f", operation, "_d")>,
0673 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0674 [IntrNoMem, IntrSpeculatable, Commutative]>;
0675
0676 foreach variant = ["_f", "_ftz_f", "_nan_f", "_ftz_nan_f",
0677 "_xorsign_abs_f", "_ftz_xorsign_abs_f", "_nan_xorsign_abs_f",
0678 "_ftz_nan_xorsign_abs_f"] in {
0679 def int_nvvm_f # operation # variant :
0680 ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0681 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0682 [IntrNoMem, IntrSpeculatable, Commutative]>;
0683 }
0684
0685 foreach variant = ["_f16", "_ftz_f16", "_nan_f16", "_ftz_nan_f16",
0686 "_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
0687 "_ftz_nan_xorsign_abs_f16"] in {
0688 def int_nvvm_f # operation # variant :
0689 DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
0690 [IntrNoMem, IntrSpeculatable, Commutative]>;
0691 }
0692
0693 foreach variant = ["_f16x2", "_ftz_f16x2", "_nan_f16x2",
0694 "_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
0695 "_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
0696 def int_nvvm_f # operation # variant :
0697 DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
0698 [IntrNoMem, IntrSpeculatable, Commutative]>;
0699 }
0700
0701 foreach variant = ["_bf16", "_ftz_bf16", "_nan_bf16", "_ftz_nan_bf16",
0702 "_xorsign_abs_bf16", "_ftz_xorsign_abs_bf16", "_nan_xorsign_abs_bf16",
0703 "_ftz_nan_xorsign_abs_bf16"] in {
0704 def int_nvvm_f # operation # variant :
0705 ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0706 DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty],
0707 [IntrNoMem, IntrSpeculatable, Commutative]>;
0708 }
0709
0710 foreach variant = ["_bf16x2", "_ftz_bf16x2", "_nan_bf16x2",
0711 "_ftz_nan_bf16x2", "_xorsign_abs_bf16x2", "_ftz_xorsign_abs_bf16x2",
0712 "_nan_xorsign_abs_bf16x2", "_ftz_nan_xorsign_abs_bf16x2"] in {
0713 def int_nvvm_f # operation # variant :
0714 ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0715 DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty],
0716 [IntrNoMem, IntrSpeculatable, Commutative]>;
0717 }
0718 }
0719
0720 //
0721 // Multiplication
0722 //
0723
0724 def int_nvvm_mulhi_s : ClangBuiltin<"__nvvm_mulhi_s">,
0725 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
0726 [IntrNoMem, IntrSpeculatable, Commutative]>;
0727 def int_nvvm_mulhi_us : ClangBuiltin<"__nvvm_mulhi_us">,
0728 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
0729 [IntrNoMem, IntrSpeculatable, Commutative]>;
0730
0731 def int_nvvm_mulhi_i : ClangBuiltin<"__nvvm_mulhi_i">,
0732 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0733 [IntrNoMem, IntrSpeculatable, Commutative]>;
0734 def int_nvvm_mulhi_ui : ClangBuiltin<"__nvvm_mulhi_ui">,
0735 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0736 [IntrNoMem, IntrSpeculatable, Commutative]>;
0737
0738 def int_nvvm_mulhi_ll : ClangBuiltin<"__nvvm_mulhi_ll">,
0739 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
0740 [IntrNoMem, IntrSpeculatable, Commutative]>;
0741 def int_nvvm_mulhi_ull : ClangBuiltin<"__nvvm_mulhi_ull">,
0742 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
0743 [IntrNoMem, IntrSpeculatable, Commutative]>;
0744
0745 def int_nvvm_mul_rn_ftz_f : ClangBuiltin<"__nvvm_mul_rn_ftz_f">,
0746 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0747 [IntrNoMem, IntrSpeculatable, Commutative]>;
0748 def int_nvvm_mul_rn_f : ClangBuiltin<"__nvvm_mul_rn_f">,
0749 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0750 [IntrNoMem, IntrSpeculatable, Commutative]>;
0751 def int_nvvm_mul_rz_ftz_f : ClangBuiltin<"__nvvm_mul_rz_ftz_f">,
0752 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0753 [IntrNoMem, IntrSpeculatable, Commutative]>;
0754 def int_nvvm_mul_rz_f : ClangBuiltin<"__nvvm_mul_rz_f">,
0755 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0756 [IntrNoMem, IntrSpeculatable, Commutative]>;
0757 def int_nvvm_mul_rm_ftz_f : ClangBuiltin<"__nvvm_mul_rm_ftz_f">,
0758 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0759 [IntrNoMem, IntrSpeculatable, Commutative]>;
0760 def int_nvvm_mul_rm_f : ClangBuiltin<"__nvvm_mul_rm_f">,
0761 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0762 [IntrNoMem, IntrSpeculatable, Commutative]>;
0763 def int_nvvm_mul_rp_ftz_f : ClangBuiltin<"__nvvm_mul_rp_ftz_f">,
0764 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0765 [IntrNoMem, IntrSpeculatable, Commutative]>;
0766 def int_nvvm_mul_rp_f : ClangBuiltin<"__nvvm_mul_rp_f">,
0767 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0768 [IntrNoMem, IntrSpeculatable, Commutative]>;
0769
0770 def int_nvvm_mul_rn_d : ClangBuiltin<"__nvvm_mul_rn_d">,
0771 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0772 [IntrNoMem, IntrSpeculatable, Commutative]>;
0773 def int_nvvm_mul_rz_d : ClangBuiltin<"__nvvm_mul_rz_d">,
0774 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0775 [IntrNoMem, IntrSpeculatable, Commutative]>;
0776 def int_nvvm_mul_rm_d : ClangBuiltin<"__nvvm_mul_rm_d">,
0777 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0778 [IntrNoMem, IntrSpeculatable, Commutative]>;
0779 def int_nvvm_mul_rp_d : ClangBuiltin<"__nvvm_mul_rp_d">,
0780 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0781 [IntrNoMem, IntrSpeculatable, Commutative]>;
0782
0783 def int_nvvm_mul24_i : ClangBuiltin<"__nvvm_mul24_i">,
0784 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0785 [IntrNoMem, IntrSpeculatable, Commutative]>;
0786 def int_nvvm_mul24_ui : ClangBuiltin<"__nvvm_mul24_ui">,
0787 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0788 [IntrNoMem, IntrSpeculatable, Commutative]>;
0789
0790 //
0791 // Div
0792 //
0793
0794 def int_nvvm_div_approx_ftz_f : ClangBuiltin<"__nvvm_div_approx_ftz_f">,
0795 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0796 [IntrNoMem]>;
0797 def int_nvvm_div_approx_f : ClangBuiltin<"__nvvm_div_approx_f">,
0798 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0799 [IntrNoMem]>;
0800
0801 def int_nvvm_div_rn_ftz_f : ClangBuiltin<"__nvvm_div_rn_ftz_f">,
0802 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0803 [IntrNoMem]>;
0804 def int_nvvm_div_rn_f : ClangBuiltin<"__nvvm_div_rn_f">,
0805 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0806 [IntrNoMem]>;
0807
0808 def int_nvvm_div_rz_ftz_f : ClangBuiltin<"__nvvm_div_rz_ftz_f">,
0809 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0810 [IntrNoMem]>;
0811 def int_nvvm_div_rz_f : ClangBuiltin<"__nvvm_div_rz_f">,
0812 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0813 [IntrNoMem]>;
0814
0815 def int_nvvm_div_rm_ftz_f : ClangBuiltin<"__nvvm_div_rm_ftz_f">,
0816 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0817 [IntrNoMem]>;
0818 def int_nvvm_div_rm_f : ClangBuiltin<"__nvvm_div_rm_f">,
0819 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0820 [IntrNoMem]>;
0821
0822 def int_nvvm_div_rp_ftz_f : ClangBuiltin<"__nvvm_div_rp_ftz_f">,
0823 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0824 [IntrNoMem]>;
0825 def int_nvvm_div_rp_f : ClangBuiltin<"__nvvm_div_rp_f">,
0826 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0827 [IntrNoMem]>;
0828
0829 def int_nvvm_div_rn_d : ClangBuiltin<"__nvvm_div_rn_d">,
0830 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0831 [IntrNoMem]>;
0832 def int_nvvm_div_rz_d : ClangBuiltin<"__nvvm_div_rz_d">,
0833 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0834 [IntrNoMem]>;
0835 def int_nvvm_div_rm_d : ClangBuiltin<"__nvvm_div_rm_d">,
0836 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0837 [IntrNoMem]>;
0838 def int_nvvm_div_rp_d : ClangBuiltin<"__nvvm_div_rp_d">,
0839 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0840 [IntrNoMem]>;
0841
0842 def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
0843 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0844 [IntrNoMem]>;
0845 def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
0846 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0847 [IntrNoMem]>;
0848
0849 //
0850 // Sad
0851 //
0852
0853 def int_nvvm_sad_s : ClangBuiltin<"__nvvm_sad_s">,
0854 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
0855 [IntrNoMem, Commutative, IntrSpeculatable]>;
0856 def int_nvvm_sad_us : ClangBuiltin<"__nvvm_sad_us">,
0857 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
0858 [IntrNoMem, Commutative, IntrSpeculatable]>;
0859
0860 def int_nvvm_sad_i : ClangBuiltin<"__nvvm_sad_i">,
0861 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0862 [IntrNoMem, Commutative, IntrSpeculatable]>;
0863 def int_nvvm_sad_ui : ClangBuiltin<"__nvvm_sad_ui">,
0864 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0865 [IntrNoMem, Commutative, IntrSpeculatable]>;
0866
0867 def int_nvvm_sad_ll : ClangBuiltin<"__nvvm_sad_ll">,
0868 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
0869 [IntrNoMem, Commutative, IntrSpeculatable]>;
0870 def int_nvvm_sad_ull : ClangBuiltin<"__nvvm_sad_ull">,
0871 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
0872 [IntrNoMem, Commutative, IntrSpeculatable]>;
0873
0874
0875 //
0876 // Floor Ceil
0877 //
0878
0879 def int_nvvm_floor_ftz_f : ClangBuiltin<"__nvvm_floor_ftz_f">,
0880 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0881 def int_nvvm_floor_f : ClangBuiltin<"__nvvm_floor_f">,
0882 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0883 def int_nvvm_floor_d : ClangBuiltin<"__nvvm_floor_d">,
0884 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0885
0886 def int_nvvm_ceil_ftz_f : ClangBuiltin<"__nvvm_ceil_ftz_f">,
0887 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0888 def int_nvvm_ceil_f : ClangBuiltin<"__nvvm_ceil_f">,
0889 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0890 def int_nvvm_ceil_d : ClangBuiltin<"__nvvm_ceil_d">,
0891 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0892
0893 //
0894 // Abs
0895 //
0896
0897 def int_nvvm_fabs_ftz_f : ClangBuiltin<"__nvvm_fabs_ftz_f">,
0898 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0899 def int_nvvm_fabs_f : ClangBuiltin<"__nvvm_fabs_f">,
0900 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0901 def int_nvvm_fabs_d : ClangBuiltin<"__nvvm_fabs_d">,
0902 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0903
0904 //
0905 // Abs, Neg bf16, bf16x2
0906 //
0907
0908 foreach unary = ["abs", "neg"] in {
0909 def int_nvvm_ # unary # _bf16 :
0910 ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16")>,
0911 DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
0912 def int_nvvm_ # unary # _bf16x2 :
0913 ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16x2")>,
0914 DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty], [IntrNoMem]>;
0915 }
0916
0917 //
0918 // Round
0919 //
0920
0921 def int_nvvm_round_ftz_f : ClangBuiltin<"__nvvm_round_ftz_f">,
0922 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0923 def int_nvvm_round_f : ClangBuiltin<"__nvvm_round_f">,
0924 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0925
0926 def int_nvvm_round_d : ClangBuiltin<"__nvvm_round_d">,
0927 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0928
0929 //
0930 // Trunc
0931 //
0932
0933 def int_nvvm_trunc_ftz_f : ClangBuiltin<"__nvvm_trunc_ftz_f">,
0934 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0935 def int_nvvm_trunc_f : ClangBuiltin<"__nvvm_trunc_f">,
0936 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0937
0938 def int_nvvm_trunc_d : ClangBuiltin<"__nvvm_trunc_d">,
0939 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0940
0941 //
0942 // Saturate
0943 //
0944
0945 def int_nvvm_saturate_ftz_f : ClangBuiltin<"__nvvm_saturate_ftz_f">,
0946 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0947 def int_nvvm_saturate_f : ClangBuiltin<"__nvvm_saturate_f">,
0948 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0949
0950 def int_nvvm_saturate_d : ClangBuiltin<"__nvvm_saturate_d">,
0951 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0952
0953 //
0954 // Exp2 Log2
0955 //
0956
0957 def int_nvvm_ex2_approx_ftz_f : ClangBuiltin<"__nvvm_ex2_approx_ftz_f">,
0958 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0959 def int_nvvm_ex2_approx_f : ClangBuiltin<"__nvvm_ex2_approx_f">,
0960 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0961 def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
0962 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
0963 def int_nvvm_ex2_approx_f16 :
0964 DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
0965 def int_nvvm_ex2_approx_f16x2 :
0966 DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
0967
0968 def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
0969 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0970 def int_nvvm_lg2_approx_f : ClangBuiltin<"__nvvm_lg2_approx_f">,
0971 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0972 def int_nvvm_lg2_approx_d : ClangBuiltin<"__nvvm_lg2_approx_d">,
0973 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
0974
0975 //
0976 // Sin Cos
0977 //
0978
0979 def int_nvvm_sin_approx_ftz_f : ClangBuiltin<"__nvvm_sin_approx_ftz_f">,
0980 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0981 def int_nvvm_sin_approx_f : ClangBuiltin<"__nvvm_sin_approx_f">,
0982 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0983
0984 def int_nvvm_cos_approx_ftz_f : ClangBuiltin<"__nvvm_cos_approx_ftz_f">,
0985 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0986 def int_nvvm_cos_approx_f : ClangBuiltin<"__nvvm_cos_approx_f">,
0987 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0988
0989 //
0990 // Fma
0991 //
0992
0993 foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
0994 "_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
0995 def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_half_ty],
0996 [llvm_half_ty, llvm_half_ty, llvm_half_ty],
0997 [IntrNoMem, IntrSpeculatable]>;
0998 }
0999
1000 foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
1001 "_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
1002 def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_v2f16_ty],
1003 [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
1004 [IntrNoMem, IntrSpeculatable]>;
1005 }
1006
1007 foreach variant = ["_rn_bf16", "_rn_ftz_bf16", "_rn_sat_bf16",
1008 "_rn_ftz_sat_bf16", "_rn_relu_bf16", "_rn_ftz_relu_bf16"] in {
1009 def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1010 DefaultAttrsIntrinsic<[llvm_bfloat_ty],
1011 [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty],
1012 [IntrNoMem, IntrSpeculatable]>;
1013 }
1014
1015 foreach variant = ["_rn_bf16x2", "_rn_ftz_bf16x2", "_rn_sat_bf16x2",
1016 "_rn_ftz_sat_bf16x2", "_rn_relu_bf16x2", "_rn_ftz_relu_bf16x2"] in {
1017 def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1018 DefaultAttrsIntrinsic<[llvm_v2bf16_ty],
1019 [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty],
1020 [IntrNoMem, IntrSpeculatable]>;
1021 }
1022
1023 foreach variant = ["_rn_ftz_f", "_rn_f", "_rz_ftz_f", "_rz_f", "_rm_ftz_f",
1024 "_rm_f", "_rp_ftz_f", "_rp_f"] in {
1025 def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1026 DefaultAttrsIntrinsic<[llvm_float_ty],
1027 [llvm_float_ty, llvm_float_ty, llvm_float_ty],
1028 [IntrNoMem, IntrSpeculatable]>;
1029 }
1030
1031 foreach variant = ["_rn_d", "_rz_d", "_rm_d", "_rp_d"] in {
1032 def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1033 DefaultAttrsIntrinsic<[llvm_double_ty],
1034 [llvm_double_ty, llvm_double_ty, llvm_double_ty],
1035 [IntrNoMem, IntrSpeculatable]>;
1036 }
1037
1038 //
1039 // Rcp
1040 //
1041
1042 def int_nvvm_rcp_rn_ftz_f : ClangBuiltin<"__nvvm_rcp_rn_ftz_f">,
1043 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1044 def int_nvvm_rcp_rn_f : ClangBuiltin<"__nvvm_rcp_rn_f">,
1045 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1046 def int_nvvm_rcp_rz_ftz_f : ClangBuiltin<"__nvvm_rcp_rz_ftz_f">,
1047 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1048 def int_nvvm_rcp_rz_f : ClangBuiltin<"__nvvm_rcp_rz_f">,
1049 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1050 def int_nvvm_rcp_rm_ftz_f : ClangBuiltin<"__nvvm_rcp_rm_ftz_f">,
1051 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1052 def int_nvvm_rcp_rm_f : ClangBuiltin<"__nvvm_rcp_rm_f">,
1053 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1054 def int_nvvm_rcp_rp_ftz_f : ClangBuiltin<"__nvvm_rcp_rp_ftz_f">,
1055 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1056 def int_nvvm_rcp_rp_f : ClangBuiltin<"__nvvm_rcp_rp_f">,
1057 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1058
1059 def int_nvvm_rcp_rn_d : ClangBuiltin<"__nvvm_rcp_rn_d">,
1060 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1061 def int_nvvm_rcp_rz_d : ClangBuiltin<"__nvvm_rcp_rz_d">,
1062 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1063 def int_nvvm_rcp_rm_d : ClangBuiltin<"__nvvm_rcp_rm_d">,
1064 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1065 def int_nvvm_rcp_rp_d : ClangBuiltin<"__nvvm_rcp_rp_d">,
1066 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1067
1068 def int_nvvm_rcp_approx_ftz_f : ClangBuiltin<"__nvvm_rcp_approx_ftz_f">,
1069 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1070 def int_nvvm_rcp_approx_ftz_d : ClangBuiltin<"__nvvm_rcp_approx_ftz_d">,
1071 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1072
1073 //
1074 // Sqrt
1075 //
1076
1077 def int_nvvm_sqrt_f : ClangBuiltin<"__nvvm_sqrt_f">,
1078 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1079 def int_nvvm_sqrt_rn_ftz_f : ClangBuiltin<"__nvvm_sqrt_rn_ftz_f">,
1080 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1081 def int_nvvm_sqrt_rn_f : ClangBuiltin<"__nvvm_sqrt_rn_f">,
1082 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1083 def int_nvvm_sqrt_rz_ftz_f : ClangBuiltin<"__nvvm_sqrt_rz_ftz_f">,
1084 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1085 def int_nvvm_sqrt_rz_f : ClangBuiltin<"__nvvm_sqrt_rz_f">,
1086 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1087 def int_nvvm_sqrt_rm_ftz_f : ClangBuiltin<"__nvvm_sqrt_rm_ftz_f">,
1088 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1089 def int_nvvm_sqrt_rm_f : ClangBuiltin<"__nvvm_sqrt_rm_f">,
1090 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1091 def int_nvvm_sqrt_rp_ftz_f : ClangBuiltin<"__nvvm_sqrt_rp_ftz_f">,
1092 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1093 def int_nvvm_sqrt_rp_f : ClangBuiltin<"__nvvm_sqrt_rp_f">,
1094 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1095 def int_nvvm_sqrt_approx_ftz_f : ClangBuiltin<"__nvvm_sqrt_approx_ftz_f">,
1096 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1097 def int_nvvm_sqrt_approx_f : ClangBuiltin<"__nvvm_sqrt_approx_f">,
1098 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1099
1100 def int_nvvm_sqrt_rn_d : ClangBuiltin<"__nvvm_sqrt_rn_d">,
1101 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1102 def int_nvvm_sqrt_rz_d : ClangBuiltin<"__nvvm_sqrt_rz_d">,
1103 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1104 def int_nvvm_sqrt_rm_d : ClangBuiltin<"__nvvm_sqrt_rm_d">,
1105 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1106 def int_nvvm_sqrt_rp_d : ClangBuiltin<"__nvvm_sqrt_rp_d">,
1107 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1108
1109 //
1110 // Rsqrt
1111 //
1112
1113 def int_nvvm_rsqrt_approx_ftz_f : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
1114 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1115 def int_nvvm_rsqrt_approx_ftz_d : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_d">,
1116 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1117 def int_nvvm_rsqrt_approx_f : ClangBuiltin<"__nvvm_rsqrt_approx_f">,
1118 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1119 def int_nvvm_rsqrt_approx_d : ClangBuiltin<"__nvvm_rsqrt_approx_d">,
1120 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1121
1122 //
1123 // Add
1124 //
1125
1126 def int_nvvm_add_rn_ftz_f : ClangBuiltin<"__nvvm_add_rn_ftz_f">,
1127 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1128 [IntrNoMem, IntrSpeculatable, Commutative]>;
1129 def int_nvvm_add_rn_f : ClangBuiltin<"__nvvm_add_rn_f">,
1130 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1131 [IntrNoMem, IntrSpeculatable, Commutative]>;
1132 def int_nvvm_add_rz_ftz_f : ClangBuiltin<"__nvvm_add_rz_ftz_f">,
1133 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1134 [IntrNoMem, IntrSpeculatable, Commutative]>;
1135 def int_nvvm_add_rz_f : ClangBuiltin<"__nvvm_add_rz_f">,
1136 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1137 [IntrNoMem, IntrSpeculatable, Commutative]>;
1138 def int_nvvm_add_rm_ftz_f : ClangBuiltin<"__nvvm_add_rm_ftz_f">,
1139 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1140 [IntrNoMem, IntrSpeculatable, Commutative]>;
1141 def int_nvvm_add_rm_f : ClangBuiltin<"__nvvm_add_rm_f">,
1142 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1143 [IntrNoMem, IntrSpeculatable, Commutative]>;
1144 def int_nvvm_add_rp_ftz_f : ClangBuiltin<"__nvvm_add_rp_ftz_f">,
1145 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1146 [IntrNoMem, IntrSpeculatable, Commutative]>;
1147 def int_nvvm_add_rp_f : ClangBuiltin<"__nvvm_add_rp_f">,
1148 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1149 [IntrNoMem, IntrSpeculatable, Commutative]>;
1150
1151 def int_nvvm_add_rn_d : ClangBuiltin<"__nvvm_add_rn_d">,
1152 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1153 [IntrNoMem, IntrSpeculatable, Commutative]>;
1154 def int_nvvm_add_rz_d : ClangBuiltin<"__nvvm_add_rz_d">,
1155 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1156 [IntrNoMem, IntrSpeculatable, Commutative]>;
1157 def int_nvvm_add_rm_d : ClangBuiltin<"__nvvm_add_rm_d">,
1158 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1159 [IntrNoMem, IntrSpeculatable, Commutative]>;
1160 def int_nvvm_add_rp_d : ClangBuiltin<"__nvvm_add_rp_d">,
1161 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1162 [IntrNoMem, IntrSpeculatable, Commutative]>;
1163
1164 //
1165 // Dot Product
1166 //
1167 foreach a_type = ["s", "u"] in {
1168 foreach b_type = ["s", "u"] in {
1169 def int_nvvm_idp4a_ # a_type # _ # b_type :
1170 DefaultAttrsIntrinsic<[llvm_i32_ty],
1171 [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1172 [IntrNoMem, IntrSpeculatable]>;
1173 def int_nvvm_idp2a_ # a_type # _ # b_type :
1174 DefaultAttrsIntrinsic<[llvm_i32_ty],
1175 [llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
1176 [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>;
1177 }
1178 }
1179
1180 //
1181 // Funnel-shift
1182 //
1183 foreach direction = ["l", "r"] in
1184 def int_nvvm_fsh # direction # _clamp :
1185 DefaultAttrsIntrinsic<[llvm_anyint_ty],
1186 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
1187 [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
1188
1189 //
1190 // FLO - Find Leading One
1191 //
1192 foreach sign = ["s", "u"] in
1193 def int_nvvm_flo_ # sign :
1194 DefaultAttrsIntrinsic<[llvm_i32_ty],
1195 [llvm_anyint_ty, llvm_i1_ty],
1196 [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
1197
1198 //
1199 // Convert
1200 //
1201
1202 def int_nvvm_d2f_rn_ftz : ClangBuiltin<"__nvvm_d2f_rn_ftz">,
1203 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1204 def int_nvvm_d2f_rn : ClangBuiltin<"__nvvm_d2f_rn">,
1205 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1206 def int_nvvm_d2f_rz_ftz : ClangBuiltin<"__nvvm_d2f_rz_ftz">,
1207 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1208 def int_nvvm_d2f_rz : ClangBuiltin<"__nvvm_d2f_rz">,
1209 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1210 def int_nvvm_d2f_rm_ftz : ClangBuiltin<"__nvvm_d2f_rm_ftz">,
1211 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1212 def int_nvvm_d2f_rm : ClangBuiltin<"__nvvm_d2f_rm">,
1213 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1214 def int_nvvm_d2f_rp_ftz : ClangBuiltin<"__nvvm_d2f_rp_ftz">,
1215 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1216 def int_nvvm_d2f_rp : ClangBuiltin<"__nvvm_d2f_rp">,
1217 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1218
1219 def int_nvvm_d2i_rn : ClangBuiltin<"__nvvm_d2i_rn">,
1220 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1221 def int_nvvm_d2i_rz : ClangBuiltin<"__nvvm_d2i_rz">,
1222 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1223 def int_nvvm_d2i_rm : ClangBuiltin<"__nvvm_d2i_rm">,
1224 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1225 def int_nvvm_d2i_rp : ClangBuiltin<"__nvvm_d2i_rp">,
1226 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1227
1228 def int_nvvm_d2ui_rn : ClangBuiltin<"__nvvm_d2ui_rn">,
1229 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1230 def int_nvvm_d2ui_rz : ClangBuiltin<"__nvvm_d2ui_rz">,
1231 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1232 def int_nvvm_d2ui_rm : ClangBuiltin<"__nvvm_d2ui_rm">,
1233 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1234 def int_nvvm_d2ui_rp : ClangBuiltin<"__nvvm_d2ui_rp">,
1235 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1236
1237 def int_nvvm_i2d_rn : ClangBuiltin<"__nvvm_i2d_rn">,
1238 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1239 def int_nvvm_i2d_rz : ClangBuiltin<"__nvvm_i2d_rz">,
1240 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1241 def int_nvvm_i2d_rm : ClangBuiltin<"__nvvm_i2d_rm">,
1242 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1243 def int_nvvm_i2d_rp : ClangBuiltin<"__nvvm_i2d_rp">,
1244 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1245
1246 def int_nvvm_ui2d_rn : ClangBuiltin<"__nvvm_ui2d_rn">,
1247 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1248 def int_nvvm_ui2d_rz : ClangBuiltin<"__nvvm_ui2d_rz">,
1249 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1250 def int_nvvm_ui2d_rm : ClangBuiltin<"__nvvm_ui2d_rm">,
1251 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1252 def int_nvvm_ui2d_rp : ClangBuiltin<"__nvvm_ui2d_rp">,
1253 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1254
1255 def int_nvvm_f2i_rn_ftz : ClangBuiltin<"__nvvm_f2i_rn_ftz">,
1256 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1257 def int_nvvm_f2i_rn : ClangBuiltin<"__nvvm_f2i_rn">,
1258 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1259 def int_nvvm_f2i_rz_ftz : ClangBuiltin<"__nvvm_f2i_rz_ftz">,
1260 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1261 def int_nvvm_f2i_rz : ClangBuiltin<"__nvvm_f2i_rz">,
1262 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1263 def int_nvvm_f2i_rm_ftz : ClangBuiltin<"__nvvm_f2i_rm_ftz">,
1264 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1265 def int_nvvm_f2i_rm : ClangBuiltin<"__nvvm_f2i_rm">,
1266 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1267 def int_nvvm_f2i_rp_ftz : ClangBuiltin<"__nvvm_f2i_rp_ftz">,
1268 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1269 def int_nvvm_f2i_rp : ClangBuiltin<"__nvvm_f2i_rp">,
1270 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1271
1272 def int_nvvm_f2ui_rn_ftz : ClangBuiltin<"__nvvm_f2ui_rn_ftz">,
1273 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1274 def int_nvvm_f2ui_rn : ClangBuiltin<"__nvvm_f2ui_rn">,
1275 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1276 def int_nvvm_f2ui_rz_ftz : ClangBuiltin<"__nvvm_f2ui_rz_ftz">,
1277 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1278 def int_nvvm_f2ui_rz : ClangBuiltin<"__nvvm_f2ui_rz">,
1279 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1280 def int_nvvm_f2ui_rm_ftz : ClangBuiltin<"__nvvm_f2ui_rm_ftz">,
1281 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1282 def int_nvvm_f2ui_rm : ClangBuiltin<"__nvvm_f2ui_rm">,
1283 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1284 def int_nvvm_f2ui_rp_ftz : ClangBuiltin<"__nvvm_f2ui_rp_ftz">,
1285 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1286 def int_nvvm_f2ui_rp : ClangBuiltin<"__nvvm_f2ui_rp">,
1287 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1288
1289 def int_nvvm_i2f_rn : ClangBuiltin<"__nvvm_i2f_rn">,
1290 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1291 def int_nvvm_i2f_rz : ClangBuiltin<"__nvvm_i2f_rz">,
1292 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1293 def int_nvvm_i2f_rm : ClangBuiltin<"__nvvm_i2f_rm">,
1294 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1295 def int_nvvm_i2f_rp : ClangBuiltin<"__nvvm_i2f_rp">,
1296 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1297
1298 def int_nvvm_ui2f_rn : ClangBuiltin<"__nvvm_ui2f_rn">,
1299 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1300 def int_nvvm_ui2f_rz : ClangBuiltin<"__nvvm_ui2f_rz">,
1301 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1302 def int_nvvm_ui2f_rm : ClangBuiltin<"__nvvm_ui2f_rm">,
1303 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1304 def int_nvvm_ui2f_rp : ClangBuiltin<"__nvvm_ui2f_rp">,
1305 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1306
1307 def int_nvvm_lohi_i2d : ClangBuiltin<"__nvvm_lohi_i2d">,
1308 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
1309 [IntrNoMem, IntrSpeculatable, Commutative]>;
1310
1311 def int_nvvm_d2i_lo : ClangBuiltin<"__nvvm_d2i_lo">,
1312 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1313 def int_nvvm_d2i_hi : ClangBuiltin<"__nvvm_d2i_hi">,
1314 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1315
1316 def int_nvvm_f2ll_rn_ftz : ClangBuiltin<"__nvvm_f2ll_rn_ftz">,
1317 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1318 def int_nvvm_f2ll_rn : ClangBuiltin<"__nvvm_f2ll_rn">,
1319 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1320 def int_nvvm_f2ll_rz_ftz : ClangBuiltin<"__nvvm_f2ll_rz_ftz">,
1321 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1322 def int_nvvm_f2ll_rz : ClangBuiltin<"__nvvm_f2ll_rz">,
1323 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1324 def int_nvvm_f2ll_rm_ftz : ClangBuiltin<"__nvvm_f2ll_rm_ftz">,
1325 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1326 def int_nvvm_f2ll_rm : ClangBuiltin<"__nvvm_f2ll_rm">,
1327 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1328 def int_nvvm_f2ll_rp_ftz : ClangBuiltin<"__nvvm_f2ll_rp_ftz">,
1329 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1330 def int_nvvm_f2ll_rp : ClangBuiltin<"__nvvm_f2ll_rp">,
1331 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1332
1333 def int_nvvm_f2ull_rn_ftz : ClangBuiltin<"__nvvm_f2ull_rn_ftz">,
1334 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1335 def int_nvvm_f2ull_rn : ClangBuiltin<"__nvvm_f2ull_rn">,
1336 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1337 def int_nvvm_f2ull_rz_ftz : ClangBuiltin<"__nvvm_f2ull_rz_ftz">,
1338 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1339 def int_nvvm_f2ull_rz : ClangBuiltin<"__nvvm_f2ull_rz">,
1340 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1341 def int_nvvm_f2ull_rm_ftz : ClangBuiltin<"__nvvm_f2ull_rm_ftz">,
1342 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1343 def int_nvvm_f2ull_rm : ClangBuiltin<"__nvvm_f2ull_rm">,
1344 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1345 def int_nvvm_f2ull_rp_ftz : ClangBuiltin<"__nvvm_f2ull_rp_ftz">,
1346 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1347 def int_nvvm_f2ull_rp : ClangBuiltin<"__nvvm_f2ull_rp">,
1348 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1349
1350 def int_nvvm_d2ll_rn : ClangBuiltin<"__nvvm_d2ll_rn">,
1351 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1352 def int_nvvm_d2ll_rz : ClangBuiltin<"__nvvm_d2ll_rz">,
1353 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1354 def int_nvvm_d2ll_rm : ClangBuiltin<"__nvvm_d2ll_rm">,
1355 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1356 def int_nvvm_d2ll_rp : ClangBuiltin<"__nvvm_d2ll_rp">,
1357 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1358
1359 def int_nvvm_d2ull_rn : ClangBuiltin<"__nvvm_d2ull_rn">,
1360 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1361 def int_nvvm_d2ull_rz : ClangBuiltin<"__nvvm_d2ull_rz">,
1362 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1363 def int_nvvm_d2ull_rm : ClangBuiltin<"__nvvm_d2ull_rm">,
1364 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1365 def int_nvvm_d2ull_rp : ClangBuiltin<"__nvvm_d2ull_rp">,
1366 DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1367
1368 def int_nvvm_ll2f_rn : ClangBuiltin<"__nvvm_ll2f_rn">,
1369 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1370 def int_nvvm_ll2f_rz : ClangBuiltin<"__nvvm_ll2f_rz">,
1371 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1372 def int_nvvm_ll2f_rm : ClangBuiltin<"__nvvm_ll2f_rm">,
1373 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1374 def int_nvvm_ll2f_rp : ClangBuiltin<"__nvvm_ll2f_rp">,
1375 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1376 def int_nvvm_ull2f_rn : ClangBuiltin<"__nvvm_ull2f_rn">,
1377 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1378 def int_nvvm_ull2f_rz : ClangBuiltin<"__nvvm_ull2f_rz">,
1379 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1380 def int_nvvm_ull2f_rm : ClangBuiltin<"__nvvm_ull2f_rm">,
1381 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1382 def int_nvvm_ull2f_rp : ClangBuiltin<"__nvvm_ull2f_rp">,
1383 DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1384
1385 def int_nvvm_ll2d_rn : ClangBuiltin<"__nvvm_ll2d_rn">,
1386 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1387 def int_nvvm_ll2d_rz : ClangBuiltin<"__nvvm_ll2d_rz">,
1388 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1389 def int_nvvm_ll2d_rm : ClangBuiltin<"__nvvm_ll2d_rm">,
1390 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1391 def int_nvvm_ll2d_rp : ClangBuiltin<"__nvvm_ll2d_rp">,
1392 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1393 def int_nvvm_ull2d_rn : ClangBuiltin<"__nvvm_ull2d_rn">,
1394 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1395 def int_nvvm_ull2d_rz : ClangBuiltin<"__nvvm_ull2d_rz">,
1396 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1397 def int_nvvm_ull2d_rm : ClangBuiltin<"__nvvm_ull2d_rm">,
1398 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1399 def int_nvvm_ull2d_rp : ClangBuiltin<"__nvvm_ull2d_rp">,
1400 DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1401
1402 def int_nvvm_f2h_rn_ftz : ClangBuiltin<"__nvvm_f2h_rn_ftz">,
1403 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1404 def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">,
1405 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1406
1407 def int_nvvm_bf2h_rn_ftz : ClangBuiltin<"__nvvm_bf2h_rn_ftz">,
1408 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1409 def int_nvvm_bf2h_rn : ClangBuiltin<"__nvvm_bf2h_rn">,
1410 DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1411
1412 def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">,
1413 Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1414 def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
1415 Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1416 def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">,
1417 Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1418 def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
1419 Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
1420
1421 def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">,
1422 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1423 def int_nvvm_ff2f16x2_rn_relu : ClangBuiltin<"__nvvm_ff2f16x2_rn_relu">,
1424 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1425 def int_nvvm_ff2f16x2_rz : ClangBuiltin<"__nvvm_ff2f16x2_rz">,
1426 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1427 def int_nvvm_ff2f16x2_rz_relu : ClangBuiltin<"__nvvm_ff2f16x2_rz_relu">,
1428 Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1429
1430 def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">,
1431 Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1432 def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">,
1433 Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1434 def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">,
1435 Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1436 def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">,
1437 Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1438
1439 def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">,
1440 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1441 def int_nvvm_f2tf32_rna_satfinite : ClangBuiltin<"__nvvm_f2tf32_rna_satfinite">,
1442 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1443 def int_nvvm_f2tf32_rn : ClangBuiltin<"__nvvm_f2tf32_rn">,
1444 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1445 def int_nvvm_f2tf32_rn_relu : ClangBuiltin<"__nvvm_f2tf32_rn_relu">,
1446 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1447 def int_nvvm_f2tf32_rn_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_satfinite">,
1448 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1449 def int_nvvm_f2tf32_rn_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_relu_satfinite">,
1450 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1451 def int_nvvm_f2tf32_rz : ClangBuiltin<"__nvvm_f2tf32_rz">,
1452 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1453 def int_nvvm_f2tf32_rz_relu : ClangBuiltin<"__nvvm_f2tf32_rz_relu">,
1454 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1455 def int_nvvm_f2tf32_rz_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_satfinite">,
1456 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1457 def int_nvvm_f2tf32_rz_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_relu_satfinite">,
1458 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1459
1460 def int_nvvm_ff_to_e4m3x2_rn : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn">,
1461 Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1462 def int_nvvm_ff_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn_relu">,
1463 Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1464 def int_nvvm_ff_to_e5m2x2_rn : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn">,
1465 Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1466 def int_nvvm_ff_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn_relu">,
1467 Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1468
1469 def int_nvvm_f16x2_to_e4m3x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn">,
1470 Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1471 def int_nvvm_f16x2_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn_relu">,
1472 Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1473 def int_nvvm_f16x2_to_e5m2x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn">,
1474 Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1475 def int_nvvm_f16x2_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn_relu">,
1476 Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1477
1478 def int_nvvm_e4m3x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn">,
1479 Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1480 def int_nvvm_e4m3x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn_relu">,
1481 Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1482 def int_nvvm_e5m2x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn">,
1483 Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1484 def int_nvvm_e5m2x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn_relu">,
1485 Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1486
1487 // FNS
1488
1489 def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">,
1490 DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1491 [IntrNoMem]>;
1492
1493 // Atomics not available as llvm intrinsics.
1494 def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
1495 [llvm_anyptr_ty, llvm_i32_ty],
1496 [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1497 def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
1498 [llvm_anyptr_ty, llvm_i32_ty],
1499 [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1500
1501 class SCOPED_ATOMIC2_impl<LLVMType elty>
1502 : Intrinsic<[elty],
1503 [llvm_anyptr_ty, LLVMMatchType<0>],
1504 [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1505 class SCOPED_ATOMIC3_impl<LLVMType elty>
1506 : Intrinsic<[elty],
1507 [llvm_anyptr_ty, LLVMMatchType<0>,
1508 LLVMMatchType<0>],
1509 [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1510
1511 multiclass PTXAtomicWithScope2<LLVMType elty> {
1512 def _cta : SCOPED_ATOMIC2_impl<elty>;
1513 def _sys : SCOPED_ATOMIC2_impl<elty>;
1514 }
1515 multiclass PTXAtomicWithScope3<LLVMType elty> {
1516 def _cta : SCOPED_ATOMIC3_impl<elty>;
1517 def _sys : SCOPED_ATOMIC3_impl<elty>;
1518 }
1519 multiclass PTXAtomicWithScope2_fi {
1520 defm _f: PTXAtomicWithScope2<llvm_anyfloat_ty>;
1521 defm _i: PTXAtomicWithScope2<llvm_anyint_ty>;
1522 }
1523 defm int_nvvm_atomic_add_gen : PTXAtomicWithScope2_fi;
1524 defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1525 defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1526 defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2<llvm_anyint_ty>;
1527 defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1528 defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1529 defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1530 defm int_nvvm_atomic_or_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1531 defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1532 defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
1533
1534 // Bar.Sync
1535
1536 // The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the
1537 // intrinsics in this file, this one is a user-facing API.
1538 def int_nvvm_barrier0 : ClangBuiltin<"__syncthreads">,
1539 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1540 // Synchronize all threads in the CTA at barrier 'n'.
1541 def int_nvvm_barrier_n : ClangBuiltin<"__nvvm_bar_n">,
1542 Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1543 // Synchronize 'm', a multiple of warp size, (arg 2) threads in
1544 // the CTA at barrier 'n' (arg 1).
1545 def int_nvvm_barrier : ClangBuiltin<"__nvvm_bar">,
1546 Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1547 def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">,
1548 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1549 def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">,
1550 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1551 def int_nvvm_barrier0_or : ClangBuiltin<"__nvvm_bar0_or">,
1552 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1553
1554 def int_nvvm_bar_sync :
1555 Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1556 ClangBuiltin<"__nvvm_bar_sync">;
1557 def int_nvvm_bar_warp_sync :
1558 Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1559 ClangBuiltin<"__nvvm_bar_warp_sync">;
1560
1561 // barrier.sync id[, cnt]
1562 def int_nvvm_barrier_sync :
1563 Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1564 ClangBuiltin<"__nvvm_barrier_sync">;
1565 def int_nvvm_barrier_sync_cnt :
1566 Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1567 ClangBuiltin<"__nvvm_barrier_sync_cnt">;
1568
1569 // barrier.cluster.[wait, arrive, arrive.relaxed]
1570 def int_nvvm_barrier_cluster_arrive :
1571 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1572 def int_nvvm_barrier_cluster_arrive_relaxed :
1573 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1574 def int_nvvm_barrier_cluster_wait :
1575 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1576
1577 // 'aligned' versions of the above barrier.cluster.* intrinsics
1578 def int_nvvm_barrier_cluster_arrive_aligned :
1579 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1580 def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
1581 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1582 def int_nvvm_barrier_cluster_wait_aligned :
1583 Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1584
1585 // Membar
1586 def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
1587 Intrinsic<[], [], [IntrNoCallback]>;
1588 def int_nvvm_membar_gl : ClangBuiltin<"__nvvm_membar_gl">,
1589 Intrinsic<[], [], [IntrNoCallback]>;
1590 def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">,
1591 Intrinsic<[], [], [IntrNoCallback]>;
1592 def int_nvvm_fence_sc_cluster:
1593 Intrinsic<[], [], [IntrNoCallback]>;
1594
1595 // Proxy fence (uni-directional)
1596 foreach scope = ["cta", "cluster", "gpu", "sys"] in {
1597
1598 def int_nvvm_fence_proxy_tensormap_generic_release_ # scope:
1599 Intrinsic<[], [], [IntrNoCallback],
1600 "llvm.nvvm.fence.proxy.tensormap_generic.release." # scope>;
1601
1602 def int_nvvm_fence_proxy_tensormap_generic_acquire_ # scope:
1603 Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
1604 [IntrNoCallback, IntrArgMemOnly, ImmArg<ArgIndex<1>>],
1605 "llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>;
1606
1607 }
1608
1609 // Async Copy
1610 def int_nvvm_cp_async_mbarrier_arrive :
1611 ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
1612 Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1613 def int_nvvm_cp_async_mbarrier_arrive_shared :
1614 ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
1615 Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1616 def int_nvvm_cp_async_mbarrier_arrive_noinc :
1617 ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
1618 Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1619 def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
1620 ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
1621 Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1622
1623 multiclass CP_ASYNC_SHARED_GLOBAL<string n, string cc> {
1624 def NAME: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty],
1625 [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1626 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1627 "llvm.nvvm.cp.async." # cc # ".shared.global." # n>;
1628 def _s: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty],
1629 [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1630 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1631 "llvm.nvvm.cp.async." # cc # ".shared.global." # n # ".s">;
1632 }
1633
1634 defm int_nvvm_cp_async_ca_shared_global_4 : CP_ASYNC_SHARED_GLOBAL<"4", "ca">;
1635 defm int_nvvm_cp_async_ca_shared_global_8 : CP_ASYNC_SHARED_GLOBAL<"8", "ca">;
1636 defm int_nvvm_cp_async_ca_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "ca">;
1637 defm int_nvvm_cp_async_cg_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "cg">;
1638
1639 def int_nvvm_cp_async_commit_group :
1640 ClangBuiltin<"__nvvm_cp_async_commit_group">,
1641 Intrinsic<[],[],[]>;
1642
1643 def int_nvvm_cp_async_wait_group :
1644 ClangBuiltin<"__nvvm_cp_async_wait_group">,
1645 Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1646
1647 def int_nvvm_cp_async_wait_all :
1648 ClangBuiltin<"__nvvm_cp_async_wait_all">,
1649 Intrinsic<[],[],[]>;
1650
1651 // cp.async.bulk variants of the commit/wait group
1652 def int_nvvm_cp_async_bulk_commit_group :
1653 Intrinsic<[],[],[]>;
1654
1655 def int_nvvm_cp_async_bulk_wait_group :
1656 Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1657
1658 def int_nvvm_cp_async_bulk_wait_group_read :
1659 Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1660
1661 // mbarrier
1662 def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
1663 Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1664 def int_nvvm_mbarrier_init_shared :
1665 ClangBuiltin<"__nvvm_mbarrier_init_shared">,
1666 Intrinsic<[],[llvm_shared_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1667
1668 def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">,
1669 Intrinsic<[],[llvm_ptr_ty],
1670 [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1671 WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1672 def int_nvvm_mbarrier_inval_shared :
1673 ClangBuiltin<"__nvvm_mbarrier_inval_shared">,
1674 Intrinsic<[],[llvm_shared_ptr_ty],
1675 [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1676 WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1677
1678 def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">,
1679 Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1680 def int_nvvm_mbarrier_arrive_shared :
1681 ClangBuiltin<"__nvvm_mbarrier_arrive_shared">,
1682 Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1683 def int_nvvm_mbarrier_arrive_noComplete :
1684 ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
1685 Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1686 def int_nvvm_mbarrier_arrive_noComplete_shared :
1687 ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
1688 Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1689 llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1690
1691 def int_nvvm_mbarrier_arrive_drop :
1692 ClangBuiltin<"__nvvm_mbarrier_arrive_drop">,
1693 Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1694 def int_nvvm_mbarrier_arrive_drop_shared :
1695 ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
1696 Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1697 def int_nvvm_mbarrier_arrive_drop_noComplete :
1698 ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
1699 Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1700 def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
1701 ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
1702 Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1703 llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1704
1705 def int_nvvm_mbarrier_test_wait :
1706 ClangBuiltin<"__nvvm_mbarrier_test_wait">,
1707 Intrinsic<[llvm_i1_ty],[llvm_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1708 def int_nvvm_mbarrier_test_wait_shared :
1709 ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">,
1710 Intrinsic<[llvm_i1_ty],[llvm_shared_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1711
1712 def int_nvvm_mbarrier_pending_count :
1713 ClangBuiltin<"__nvvm_mbarrier_pending_count">,
1714 Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>;
1715
1716 // Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
1717 // pointer's alignment.
1718 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
1719 [llvm_anyptr_ty, llvm_i32_ty],
1720 [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1721 "llvm.nvvm.ldu.global.i">;
1722 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
1723 [llvm_anyptr_ty, llvm_i32_ty],
1724 [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1725 "llvm.nvvm.ldu.global.f">;
1726 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
1727 [llvm_anyptr_ty, llvm_i32_ty],
1728 [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1729 "llvm.nvvm.ldu.global.p">;
1730
1731 // Used in nvvm internally to help address space opt and ptx code generation
1732 // This is for params that are passed to kernel functions by pointer by-val.
1733 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
1734 [llvm_anyptr_ty],
1735 [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1736 "llvm.nvvm.ptr.gen.to.param">;
1737
1738 // sm70+, PTX7.7+
1739 def int_nvvm_ptr_param_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
1740 [llvm_anyptr_ty],
1741 [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1742 "llvm.nvvm.ptr.param.to.gen">;
1743
1744 // Move intrinsics, used in nvvm internally
1745
1746 def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
1747 "llvm.nvvm.move.i16">;
1748 def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
1749 "llvm.nvvm.move.i32">;
1750 def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
1751 "llvm.nvvm.move.i64">;
1752 def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
1753 [IntrNoMem], "llvm.nvvm.move.float">;
1754 def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
1755 [IntrNoMem], "llvm.nvvm.move.double">;
1756 def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
1757 [IntrNoMem, NoCapture<ArgIndex<0>>], "llvm.nvvm.move.ptr">;
1758
1759
1760 // For getting the handle from a texture or surface variable
1761 def int_nvvm_texsurf_handle
1762 : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty],
1763 [IntrNoMem], "llvm.nvvm.texsurf.handle">;
1764 def int_nvvm_texsurf_handle_internal
1765 : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
1766 [IntrNoMem], "llvm.nvvm.texsurf.handle.internal">;
1767
1768 /// Error / Warn
1769 def int_nvvm_compiler_error :
1770 Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
1771 def int_nvvm_compiler_warn :
1772 Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
1773
1774 def int_nvvm_reflect :
1775 Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
1776 ClangBuiltin<"__nvvm_reflect">;
1777
1778 // isspacep.{const, global, local, shared}
1779 def int_nvvm_isspacep_const
1780 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1781 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1782 "llvm.nvvm.isspacep.const">,
1783 ClangBuiltin<"__nvvm_isspacep_const">;
1784 def int_nvvm_isspacep_global
1785 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1786 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1787 "llvm.nvvm.isspacep.global">,
1788 ClangBuiltin<"__nvvm_isspacep_global">;
1789 def int_nvvm_isspacep_local
1790 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1791 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1792 "llvm.nvvm.isspacep.local">,
1793 ClangBuiltin<"__nvvm_isspacep_local">;
1794 def int_nvvm_isspacep_shared
1795 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1796 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1797 "llvm.nvvm.isspacep.shared">,
1798 ClangBuiltin<"__nvvm_isspacep_shared">;
1799 def int_nvvm_isspacep_shared_cluster
1800 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1801 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1802 "llvm.nvvm.isspacep.shared.cluster">;
1803
1804 // Environment register read
1805 def int_nvvm_read_ptx_sreg_envreg0
1806 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1807 "llvm.nvvm.read.ptx.sreg.envreg0">,
1808 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
1809 def int_nvvm_read_ptx_sreg_envreg1
1810 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1811 "llvm.nvvm.read.ptx.sreg.envreg1">,
1812 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
1813 def int_nvvm_read_ptx_sreg_envreg2
1814 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1815 "llvm.nvvm.read.ptx.sreg.envreg2">,
1816 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
1817 def int_nvvm_read_ptx_sreg_envreg3
1818 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1819 "llvm.nvvm.read.ptx.sreg.envreg3">,
1820 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
1821 def int_nvvm_read_ptx_sreg_envreg4
1822 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1823 "llvm.nvvm.read.ptx.sreg.envreg4">,
1824 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
1825 def int_nvvm_read_ptx_sreg_envreg5
1826 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1827 "llvm.nvvm.read.ptx.sreg.envreg5">,
1828 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
1829 def int_nvvm_read_ptx_sreg_envreg6
1830 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1831 "llvm.nvvm.read.ptx.sreg.envreg6">,
1832 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
1833 def int_nvvm_read_ptx_sreg_envreg7
1834 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1835 "llvm.nvvm.read.ptx.sreg.envreg7">,
1836 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
1837 def int_nvvm_read_ptx_sreg_envreg8
1838 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1839 "llvm.nvvm.read.ptx.sreg.envreg8">,
1840 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
1841 def int_nvvm_read_ptx_sreg_envreg9
1842 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1843 "llvm.nvvm.read.ptx.sreg.envreg9">,
1844 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
1845 def int_nvvm_read_ptx_sreg_envreg10
1846 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1847 "llvm.nvvm.read.ptx.sreg.envreg10">,
1848 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
1849 def int_nvvm_read_ptx_sreg_envreg11
1850 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1851 "llvm.nvvm.read.ptx.sreg.envreg11">,
1852 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
1853 def int_nvvm_read_ptx_sreg_envreg12
1854 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1855 "llvm.nvvm.read.ptx.sreg.envreg12">,
1856 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
1857 def int_nvvm_read_ptx_sreg_envreg13
1858 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1859 "llvm.nvvm.read.ptx.sreg.envreg13">,
1860 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
1861 def int_nvvm_read_ptx_sreg_envreg14
1862 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1863 "llvm.nvvm.read.ptx.sreg.envreg14">,
1864 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
1865 def int_nvvm_read_ptx_sreg_envreg15
1866 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1867 "llvm.nvvm.read.ptx.sreg.envreg15">,
1868 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
1869 def int_nvvm_read_ptx_sreg_envreg16
1870 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1871 "llvm.nvvm.read.ptx.sreg.envreg16">,
1872 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
1873 def int_nvvm_read_ptx_sreg_envreg17
1874 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1875 "llvm.nvvm.read.ptx.sreg.envreg17">,
1876 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
1877 def int_nvvm_read_ptx_sreg_envreg18
1878 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1879 "llvm.nvvm.read.ptx.sreg.envreg18">,
1880 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
1881 def int_nvvm_read_ptx_sreg_envreg19
1882 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1883 "llvm.nvvm.read.ptx.sreg.envreg19">,
1884 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
1885 def int_nvvm_read_ptx_sreg_envreg20
1886 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1887 "llvm.nvvm.read.ptx.sreg.envreg20">,
1888 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
1889 def int_nvvm_read_ptx_sreg_envreg21
1890 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1891 "llvm.nvvm.read.ptx.sreg.envreg21">,
1892 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
1893 def int_nvvm_read_ptx_sreg_envreg22
1894 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1895 "llvm.nvvm.read.ptx.sreg.envreg22">,
1896 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
1897 def int_nvvm_read_ptx_sreg_envreg23
1898 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1899 "llvm.nvvm.read.ptx.sreg.envreg23">,
1900 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
1901 def int_nvvm_read_ptx_sreg_envreg24
1902 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1903 "llvm.nvvm.read.ptx.sreg.envreg24">,
1904 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
1905 def int_nvvm_read_ptx_sreg_envreg25
1906 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1907 "llvm.nvvm.read.ptx.sreg.envreg25">,
1908 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
1909 def int_nvvm_read_ptx_sreg_envreg26
1910 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1911 "llvm.nvvm.read.ptx.sreg.envreg26">,
1912 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
1913 def int_nvvm_read_ptx_sreg_envreg27
1914 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1915 "llvm.nvvm.read.ptx.sreg.envreg27">,
1916 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
1917 def int_nvvm_read_ptx_sreg_envreg28
1918 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1919 "llvm.nvvm.read.ptx.sreg.envreg28">,
1920 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
1921 def int_nvvm_read_ptx_sreg_envreg29
1922 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1923 "llvm.nvvm.read.ptx.sreg.envreg29">,
1924 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
1925 def int_nvvm_read_ptx_sreg_envreg30
1926 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1927 "llvm.nvvm.read.ptx.sreg.envreg30">,
1928 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
1929 def int_nvvm_read_ptx_sreg_envreg31
1930 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1931 "llvm.nvvm.read.ptx.sreg.envreg31">,
1932 ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
1933
1934
1935 // Texture Fetch
1936 // texmode_independent
1937 def int_nvvm_tex_1d_v4f32_s32
1938 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1939 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1940 "llvm.nvvm.tex.1d.v4f32.s32">;
1941 def int_nvvm_tex_1d_v4f32_f32
1942 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1943 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1944 "llvm.nvvm.tex.1d.v4f32.f32">;
1945 def int_nvvm_tex_1d_level_v4f32_f32
1946 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1947 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1948 "llvm.nvvm.tex.1d.level.v4f32.f32">;
1949 def int_nvvm_tex_1d_grad_v4f32_f32
1950 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1951 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1952 llvm_float_ty], [],
1953 "llvm.nvvm.tex.1d.grad.v4f32.f32">;
1954 def int_nvvm_tex_1d_v4s32_s32
1955 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1956 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1957 "llvm.nvvm.tex.1d.v4s32.s32">;
1958 def int_nvvm_tex_1d_v4s32_f32
1959 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1960 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1961 "llvm.nvvm.tex.1d.v4s32.f32">;
1962 def int_nvvm_tex_1d_level_v4s32_f32
1963 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1964 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1965 "llvm.nvvm.tex.1d.level.v4s32.f32">;
1966 def int_nvvm_tex_1d_grad_v4s32_f32
1967 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1968 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1969 llvm_float_ty], [],
1970 "llvm.nvvm.tex.1d.grad.v4s32.f32">;
1971 def int_nvvm_tex_1d_v4u32_s32
1972 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1973 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1974 "llvm.nvvm.tex.1d.v4u32.s32">;
1975 def int_nvvm_tex_1d_v4u32_f32
1976 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1977 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1978 "llvm.nvvm.tex.1d.v4u32.f32">;
1979 def int_nvvm_tex_1d_level_v4u32_f32
1980 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1981 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1982 "llvm.nvvm.tex.1d.level.v4u32.f32">;
1983 def int_nvvm_tex_1d_grad_v4u32_f32
1984 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1985 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1986 llvm_float_ty], [],
1987 "llvm.nvvm.tex.1d.grad.v4u32.f32">;
1988
1989 def int_nvvm_tex_1d_array_v4f32_s32
1990 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1991 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
1992 "llvm.nvvm.tex.1d.array.v4f32.s32">;
1993 def int_nvvm_tex_1d_array_v4f32_f32
1994 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1995 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
1996 "llvm.nvvm.tex.1d.array.v4f32.f32">;
1997 def int_nvvm_tex_1d_array_level_v4f32_f32
1998 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1999 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2000 llvm_float_ty], [],
2001 "llvm.nvvm.tex.1d.array.level.v4f32.f32">;
2002 def int_nvvm_tex_1d_array_grad_v4f32_f32
2003 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2004 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2005 llvm_float_ty, llvm_float_ty], [],
2006 "llvm.nvvm.tex.1d.array.grad.v4f32.f32">;
2007 def int_nvvm_tex_1d_array_v4s32_s32
2008 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2009 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2010 "llvm.nvvm.tex.1d.array.v4s32.s32">;
2011 def int_nvvm_tex_1d_array_v4s32_f32
2012 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2013 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2014 "llvm.nvvm.tex.1d.array.v4s32.f32">;
2015 def int_nvvm_tex_1d_array_level_v4s32_f32
2016 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2017 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2018 llvm_float_ty], [],
2019 "llvm.nvvm.tex.1d.array.level.v4s32.f32">;
2020 def int_nvvm_tex_1d_array_grad_v4s32_f32
2021 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2022 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2023 llvm_float_ty, llvm_float_ty], [],
2024 "llvm.nvvm.tex.1d.array.grad.v4s32.f32">;
2025 def int_nvvm_tex_1d_array_v4u32_s32
2026 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2027 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2028 "llvm.nvvm.tex.1d.array.v4u32.s32">;
2029 def int_nvvm_tex_1d_array_v4u32_f32
2030 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2031 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2032 "llvm.nvvm.tex.1d.array.v4u32.f32">;
2033 def int_nvvm_tex_1d_array_level_v4u32_f32
2034 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2035 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2036 llvm_float_ty], [],
2037 "llvm.nvvm.tex.1d.array.level.v4u32.f32">;
2038 def int_nvvm_tex_1d_array_grad_v4u32_f32
2039 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2040 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2041 llvm_float_ty, llvm_float_ty], [],
2042 "llvm.nvvm.tex.1d.array.grad.v4u32.f32">;
2043
2044 def int_nvvm_tex_2d_v4f32_s32
2045 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2046 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2047 "llvm.nvvm.tex.2d.v4f32.s32">;
2048 def int_nvvm_tex_2d_v4f32_f32
2049 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2050 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2051 "llvm.nvvm.tex.2d.v4f32.f32">;
2052 def int_nvvm_tex_2d_level_v4f32_f32
2053 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2054 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2055 llvm_float_ty], [],
2056 "llvm.nvvm.tex.2d.level.v4f32.f32">;
2057 def int_nvvm_tex_2d_grad_v4f32_f32
2058 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2059 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2060 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2061 "llvm.nvvm.tex.2d.grad.v4f32.f32">;
2062 def int_nvvm_tex_2d_v4s32_s32
2063 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2064 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2065 "llvm.nvvm.tex.2d.v4s32.s32">;
2066 def int_nvvm_tex_2d_v4s32_f32
2067 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2068 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2069 "llvm.nvvm.tex.2d.v4s32.f32">;
2070 def int_nvvm_tex_2d_level_v4s32_f32
2071 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2072 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2073 llvm_float_ty], [],
2074 "llvm.nvvm.tex.2d.level.v4s32.f32">;
2075 def int_nvvm_tex_2d_grad_v4s32_f32
2076 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2077 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2078 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2079 "llvm.nvvm.tex.2d.grad.v4s32.f32">;
2080 def int_nvvm_tex_2d_v4u32_s32
2081 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2082 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2083 "llvm.nvvm.tex.2d.v4u32.s32">;
2084 def int_nvvm_tex_2d_v4u32_f32
2085 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2086 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2087 "llvm.nvvm.tex.2d.v4u32.f32">;
2088 def int_nvvm_tex_2d_level_v4u32_f32
2089 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2090 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2091 llvm_float_ty], [],
2092 "llvm.nvvm.tex.2d.level.v4u32.f32">;
2093 def int_nvvm_tex_2d_grad_v4u32_f32
2094 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2095 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2096 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2097 "llvm.nvvm.tex.2d.grad.v4u32.f32">;
2098
2099 def int_nvvm_tex_2d_array_v4f32_s32
2100 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2101 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2102 llvm_i32_ty], [],
2103 "llvm.nvvm.tex.2d.array.v4f32.s32">;
2104 def int_nvvm_tex_2d_array_v4f32_f32
2105 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2106 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2107 llvm_float_ty], [],
2108 "llvm.nvvm.tex.2d.array.v4f32.f32">;
2109 def int_nvvm_tex_2d_array_level_v4f32_f32
2110 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2111 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2112 llvm_float_ty, llvm_float_ty], [],
2113 "llvm.nvvm.tex.2d.array.level.v4f32.f32">;
2114 def int_nvvm_tex_2d_array_grad_v4f32_f32
2115 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2116 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2117 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2118 llvm_float_ty], [],
2119 "llvm.nvvm.tex.2d.array.grad.v4f32.f32">;
2120 def int_nvvm_tex_2d_array_v4s32_s32
2121 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2122 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2123 llvm_i32_ty], [],
2124 "llvm.nvvm.tex.2d.array.v4s32.s32">;
2125 def int_nvvm_tex_2d_array_v4s32_f32
2126 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2127 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2128 llvm_float_ty], [],
2129 "llvm.nvvm.tex.2d.array.v4s32.f32">;
2130 def int_nvvm_tex_2d_array_level_v4s32_f32
2131 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2132 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2133 llvm_float_ty, llvm_float_ty], [],
2134 "llvm.nvvm.tex.2d.array.level.v4s32.f32">;
2135 def int_nvvm_tex_2d_array_grad_v4s32_f32
2136 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2137 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2138 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2139 llvm_float_ty], [],
2140 "llvm.nvvm.tex.2d.array.grad.v4s32.f32">;
2141 def int_nvvm_tex_2d_array_v4u32_s32
2142 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2143 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2144 llvm_i32_ty], [],
2145 "llvm.nvvm.tex.2d.array.v4u32.s32">;
2146 def int_nvvm_tex_2d_array_v4u32_f32
2147 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2148 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2149 llvm_float_ty], [],
2150 "llvm.nvvm.tex.2d.array.v4u32.f32">;
2151 def int_nvvm_tex_2d_array_level_v4u32_f32
2152 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2153 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2154 llvm_float_ty, llvm_float_ty], [],
2155 "llvm.nvvm.tex.2d.array.level.v4u32.f32">;
2156 def int_nvvm_tex_2d_array_grad_v4u32_f32
2157 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2158 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2159 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2160 llvm_float_ty], [],
2161 "llvm.nvvm.tex.2d.array.grad.v4u32.f32">;
2162
2163 def int_nvvm_tex_3d_v4f32_s32
2164 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2165 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2166 [], "llvm.nvvm.tex.3d.v4f32.s32">;
2167 def int_nvvm_tex_3d_v4f32_f32
2168 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2169 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2170 llvm_float_ty], [],
2171 "llvm.nvvm.tex.3d.v4f32.f32">;
2172 def int_nvvm_tex_3d_level_v4f32_f32
2173 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2174 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2175 llvm_float_ty, llvm_float_ty], [],
2176 "llvm.nvvm.tex.3d.level.v4f32.f32">;
2177 def int_nvvm_tex_3d_grad_v4f32_f32
2178 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2179 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2180 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2181 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2182 "llvm.nvvm.tex.3d.grad.v4f32.f32">;
2183 def int_nvvm_tex_3d_v4s32_s32
2184 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2185 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2186 [], "llvm.nvvm.tex.3d.v4s32.s32">;
2187 def int_nvvm_tex_3d_v4s32_f32
2188 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2189 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2190 llvm_float_ty], [],
2191 "llvm.nvvm.tex.3d.v4s32.f32">;
2192 def int_nvvm_tex_3d_level_v4s32_f32
2193 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2194 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2195 llvm_float_ty, llvm_float_ty], [],
2196 "llvm.nvvm.tex.3d.level.v4s32.f32">;
2197 def int_nvvm_tex_3d_grad_v4s32_f32
2198 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2199 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2200 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2201 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2202 "llvm.nvvm.tex.3d.grad.v4s32.f32">;
2203 def int_nvvm_tex_3d_v4u32_s32
2204 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2205 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2206 [], "llvm.nvvm.tex.3d.v4u32.s32">;
2207 def int_nvvm_tex_3d_v4u32_f32
2208 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2209 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2210 llvm_float_ty], [],
2211 "llvm.nvvm.tex.3d.v4u32.f32">;
2212 def int_nvvm_tex_3d_level_v4u32_f32
2213 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2214 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2215 llvm_float_ty, llvm_float_ty], [],
2216 "llvm.nvvm.tex.3d.level.v4u32.f32">;
2217 def int_nvvm_tex_3d_grad_v4u32_f32
2218 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2219 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2220 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2221 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2222 "llvm.nvvm.tex.3d.grad.v4u32.f32">;
2223
2224 def int_nvvm_tex_cube_v4f32_f32
2225 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2226 [llvm_i64_ty, llvm_i64_ty,
2227 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2228 "llvm.nvvm.tex.cube.v4f32.f32">;
2229 def int_nvvm_tex_cube_level_v4f32_f32
2230 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2231 [llvm_i64_ty, llvm_i64_ty,
2232 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2233 "llvm.nvvm.tex.cube.level.v4f32.f32">;
2234 def int_nvvm_tex_cube_v4s32_f32
2235 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2236 [llvm_i64_ty, llvm_i64_ty,
2237 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2238 "llvm.nvvm.tex.cube.v4s32.f32">;
2239 def int_nvvm_tex_cube_level_v4s32_f32
2240 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2241 [llvm_i64_ty, llvm_i64_ty,
2242 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2243 "llvm.nvvm.tex.cube.level.v4s32.f32">;
2244 def int_nvvm_tex_cube_v4u32_f32
2245 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2246 [llvm_i64_ty, llvm_i64_ty,
2247 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2248 "llvm.nvvm.tex.cube.v4u32.f32">;
2249 def int_nvvm_tex_cube_level_v4u32_f32
2250 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2251 [llvm_i64_ty, llvm_i64_ty,
2252 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2253 "llvm.nvvm.tex.cube.level.v4u32.f32">;
2254
2255 def int_nvvm_tex_cube_array_v4f32_f32
2256 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2257 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2258 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2259 "llvm.nvvm.tex.cube.array.v4f32.f32">;
2260 def int_nvvm_tex_cube_array_level_v4f32_f32
2261 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2262 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2263 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2264 "llvm.nvvm.tex.cube.array.level.v4f32.f32">;
2265 def int_nvvm_tex_cube_array_v4s32_f32
2266 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2267 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2268 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2269 "llvm.nvvm.tex.cube.array.v4s32.f32">;
2270 def int_nvvm_tex_cube_array_level_v4s32_f32
2271 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2272 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2273 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2274 "llvm.nvvm.tex.cube.array.level.v4s32.f32">;
2275 def int_nvvm_tex_cube_array_v4u32_f32
2276 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2277 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2278 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2279 "llvm.nvvm.tex.cube.array.v4u32.f32">;
2280 def int_nvvm_tex_cube_array_level_v4u32_f32
2281 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2282 [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2283 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2284 "llvm.nvvm.tex.cube.array.level.v4u32.f32">;
2285
2286 def int_nvvm_tld4_r_2d_v4f32_f32
2287 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2288 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2289 "llvm.nvvm.tld4.r.2d.v4f32.f32">;
2290 def int_nvvm_tld4_g_2d_v4f32_f32
2291 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2292 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2293 "llvm.nvvm.tld4.g.2d.v4f32.f32">;
2294 def int_nvvm_tld4_b_2d_v4f32_f32
2295 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2296 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2297 "llvm.nvvm.tld4.b.2d.v4f32.f32">;
2298 def int_nvvm_tld4_a_2d_v4f32_f32
2299 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2300 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2301 "llvm.nvvm.tld4.a.2d.v4f32.f32">;
2302 def int_nvvm_tld4_r_2d_v4s32_f32
2303 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2304 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2305 "llvm.nvvm.tld4.r.2d.v4s32.f32">;
2306 def int_nvvm_tld4_g_2d_v4s32_f32
2307 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2308 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2309 "llvm.nvvm.tld4.g.2d.v4s32.f32">;
2310 def int_nvvm_tld4_b_2d_v4s32_f32
2311 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2312 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2313 "llvm.nvvm.tld4.b.2d.v4s32.f32">;
2314 def int_nvvm_tld4_a_2d_v4s32_f32
2315 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2316 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2317 "llvm.nvvm.tld4.a.2d.v4s32.f32">;
2318 def int_nvvm_tld4_r_2d_v4u32_f32
2319 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2320 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2321 "llvm.nvvm.tld4.r.2d.v4u32.f32">;
2322 def int_nvvm_tld4_g_2d_v4u32_f32
2323 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2324 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2325 "llvm.nvvm.tld4.g.2d.v4u32.f32">;
2326 def int_nvvm_tld4_b_2d_v4u32_f32
2327 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2328 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2329 "llvm.nvvm.tld4.b.2d.v4u32.f32">;
2330 def int_nvvm_tld4_a_2d_v4u32_f32
2331 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2332 [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2333 "llvm.nvvm.tld4.a.2d.v4u32.f32">;
2334
2335
2336 // texmode_unified
2337 def int_nvvm_tex_unified_1d_v4f32_s32
2338 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2339 [llvm_i64_ty, llvm_i32_ty], [],
2340 "llvm.nvvm.tex.unified.1d.v4f32.s32">;
2341 def int_nvvm_tex_unified_1d_v4f32_f32
2342 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2343 [llvm_i64_ty, llvm_float_ty], [],
2344 "llvm.nvvm.tex.unified.1d.v4f32.f32">;
2345 def int_nvvm_tex_unified_1d_level_v4f32_f32
2346 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2347 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2348 "llvm.nvvm.tex.unified.1d.level.v4f32.f32">;
2349 def int_nvvm_tex_unified_1d_grad_v4f32_f32
2350 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2351 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2352 llvm_float_ty], [],
2353 "llvm.nvvm.tex.unified.1d.grad.v4f32.f32">;
2354 def int_nvvm_tex_unified_1d_v4s32_s32
2355 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2356 [llvm_i64_ty, llvm_i32_ty], [],
2357 "llvm.nvvm.tex.unified.1d.v4s32.s32">;
2358 def int_nvvm_tex_unified_1d_v4s32_f32
2359 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2360 [llvm_i64_ty, llvm_float_ty], [],
2361 "llvm.nvvm.tex.unified.1d.v4s32.f32">;
2362 def int_nvvm_tex_unified_1d_level_v4s32_f32
2363 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2364 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2365 "llvm.nvvm.tex.unified.1d.level.v4s32.f32">;
2366 def int_nvvm_tex_unified_1d_grad_v4s32_f32
2367 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2368 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2369 llvm_float_ty], [],
2370 "llvm.nvvm.tex.unified.1d.grad.v4s32.f32">;
2371 def int_nvvm_tex_unified_1d_v4u32_s32
2372 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2373 [llvm_i64_ty, llvm_i32_ty], [],
2374 "llvm.nvvm.tex.unified.1d.v4u32.s32">;
2375 def int_nvvm_tex_unified_1d_v4u32_f32
2376 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2377 [llvm_i64_ty, llvm_float_ty], [],
2378 "llvm.nvvm.tex.unified.1d.v4u32.f32">;
2379 def int_nvvm_tex_unified_1d_level_v4u32_f32
2380 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2381 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2382 "llvm.nvvm.tex.unified.1d.level.v4u32.f32">;
2383 def int_nvvm_tex_unified_1d_grad_v4u32_f32
2384 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2385 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2386 llvm_float_ty], [],
2387 "llvm.nvvm.tex.unified.1d.grad.v4u32.f32">;
2388
2389 def int_nvvm_tex_unified_1d_array_v4f32_s32
2390 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2391 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2392 "llvm.nvvm.tex.unified.1d.array.v4f32.s32">;
2393 def int_nvvm_tex_unified_1d_array_v4f32_f32
2394 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2395 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2396 "llvm.nvvm.tex.unified.1d.array.v4f32.f32">;
2397 def int_nvvm_tex_unified_1d_array_level_v4f32_f32
2398 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2399 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2400 llvm_float_ty], [],
2401 "llvm.nvvm.tex.unified.1d.array.level.v4f32.f32">;
2402 def int_nvvm_tex_unified_1d_array_grad_v4f32_f32
2403 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2404 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2405 llvm_float_ty, llvm_float_ty], [],
2406 "llvm.nvvm.tex.unified.1d.array.grad.v4f32.f32">;
2407 def int_nvvm_tex_unified_1d_array_v4s32_s32
2408 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2409 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2410 "llvm.nvvm.tex.unified.1d.array.v4s32.s32">;
2411 def int_nvvm_tex_unified_1d_array_v4s32_f32
2412 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2413 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2414 "llvm.nvvm.tex.unified.1d.array.v4s32.f32">;
2415 def int_nvvm_tex_unified_1d_array_level_v4s32_f32
2416 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2417 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2418 llvm_float_ty], [],
2419 "llvm.nvvm.tex.unified.1d.array.level.v4s32.f32">;
2420 def int_nvvm_tex_unified_1d_array_grad_v4s32_f32
2421 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2422 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2423 llvm_float_ty, llvm_float_ty], [],
2424 "llvm.nvvm.tex.unified.1d.array.grad.v4s32.f32">;
2425 def int_nvvm_tex_unified_1d_array_v4u32_s32
2426 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2427 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2428 "llvm.nvvm.tex.unified.1d.array.v4u32.s32">;
2429 def int_nvvm_tex_unified_1d_array_v4u32_f32
2430 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2431 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2432 "llvm.nvvm.tex.unified.1d.array.v4u32.f32">;
2433 def int_nvvm_tex_unified_1d_array_level_v4u32_f32
2434 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2435 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2436 llvm_float_ty], [],
2437 "llvm.nvvm.tex.unified.1d.array.level.v4u32.f32">;
2438 def int_nvvm_tex_unified_1d_array_grad_v4u32_f32
2439 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2440 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2441 llvm_float_ty, llvm_float_ty], [],
2442 "llvm.nvvm.tex.unified.1d.array.grad.v4u32.f32">;
2443
2444 def int_nvvm_tex_unified_2d_v4f32_s32
2445 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2446 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2447 "llvm.nvvm.tex.unified.2d.v4f32.s32">;
2448 def int_nvvm_tex_unified_2d_v4f32_f32
2449 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2450 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2451 "llvm.nvvm.tex.unified.2d.v4f32.f32">;
2452 def int_nvvm_tex_unified_2d_level_v4f32_f32
2453 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2454 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2455 llvm_float_ty], [],
2456 "llvm.nvvm.tex.unified.2d.level.v4f32.f32">;
2457 def int_nvvm_tex_unified_2d_grad_v4f32_f32
2458 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2459 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2460 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2461 "llvm.nvvm.tex.unified.2d.grad.v4f32.f32">;
2462 def int_nvvm_tex_unified_2d_v4s32_s32
2463 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2464 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2465 "llvm.nvvm.tex.unified.2d.v4s32.s32">;
2466 def int_nvvm_tex_unified_2d_v4s32_f32
2467 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2468 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2469 "llvm.nvvm.tex.unified.2d.v4s32.f32">;
2470 def int_nvvm_tex_unified_2d_level_v4s32_f32
2471 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2472 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2473 llvm_float_ty], [],
2474 "llvm.nvvm.tex.unified.2d.level.v4s32.f32">;
2475 def int_nvvm_tex_unified_2d_grad_v4s32_f32
2476 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2477 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2478 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2479 "llvm.nvvm.tex.unified.2d.grad.v4s32.f32">;
2480 def int_nvvm_tex_unified_2d_v4u32_s32
2481 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2482 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2483 "llvm.nvvm.tex.unified.2d.v4u32.s32">;
2484 def int_nvvm_tex_unified_2d_v4u32_f32
2485 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2486 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2487 "llvm.nvvm.tex.unified.2d.v4u32.f32">;
2488 def int_nvvm_tex_unified_2d_level_v4u32_f32
2489 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2490 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2491 llvm_float_ty], [],
2492 "llvm.nvvm.tex.unified.2d.level.v4u32.f32">;
2493 def int_nvvm_tex_unified_2d_grad_v4u32_f32
2494 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2495 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2496 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2497 "llvm.nvvm.tex.unified.2d.grad.v4u32.f32">;
2498
2499 def int_nvvm_tex_unified_2d_array_v4f32_s32
2500 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2501 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2502 llvm_i32_ty], [],
2503 "llvm.nvvm.tex.unified.2d.array.v4f32.s32">;
2504 def int_nvvm_tex_unified_2d_array_v4f32_f32
2505 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2506 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2507 llvm_float_ty], [],
2508 "llvm.nvvm.tex.unified.2d.array.v4f32.f32">;
2509 def int_nvvm_tex_unified_2d_array_level_v4f32_f32
2510 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2511 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2512 llvm_float_ty, llvm_float_ty], [],
2513 "llvm.nvvm.tex.unified.2d.array.level.v4f32.f32">;
2514 def int_nvvm_tex_unified_2d_array_grad_v4f32_f32
2515 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2516 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2517 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2518 llvm_float_ty], [],
2519 "llvm.nvvm.tex.unified.2d.array.grad.v4f32.f32">;
2520 def int_nvvm_tex_unified_2d_array_v4s32_s32
2521 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2522 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2523 llvm_i32_ty], [],
2524 "llvm.nvvm.tex.unified.2d.array.v4s32.s32">;
2525 def int_nvvm_tex_unified_2d_array_v4s32_f32
2526 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2527 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2528 llvm_float_ty], [],
2529 "llvm.nvvm.tex.unified.2d.array.v4s32.f32">;
2530 def int_nvvm_tex_unified_2d_array_level_v4s32_f32
2531 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2532 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2533 llvm_float_ty, llvm_float_ty], [],
2534 "llvm.nvvm.tex.unified.2d.array.level.v4s32.f32">;
2535 def int_nvvm_tex_unified_2d_array_grad_v4s32_f32
2536 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2537 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2538 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2539 llvm_float_ty], [],
2540 "llvm.nvvm.tex.unified.2d.array.grad.v4s32.f32">;
2541 def int_nvvm_tex_unified_2d_array_v4u32_s32
2542 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2543 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2544 llvm_i32_ty], [],
2545 "llvm.nvvm.tex.unified.2d.array.v4u32.s32">;
2546 def int_nvvm_tex_unified_2d_array_v4u32_f32
2547 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2548 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2549 llvm_float_ty], [],
2550 "llvm.nvvm.tex.unified.2d.array.v4u32.f32">;
2551 def int_nvvm_tex_unified_2d_array_level_v4u32_f32
2552 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2553 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2554 llvm_float_ty, llvm_float_ty], [],
2555 "llvm.nvvm.tex.unified.2d.array.level.v4u32.f32">;
2556 def int_nvvm_tex_unified_2d_array_grad_v4u32_f32
2557 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2558 [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2559 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2560 llvm_float_ty], [],
2561 "llvm.nvvm.tex.unified.2d.array.grad.v4u32.f32">;
2562
2563 def int_nvvm_tex_unified_3d_v4f32_s32
2564 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2565 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2566 [], "llvm.nvvm.tex.unified.3d.v4f32.s32">;
2567 def int_nvvm_tex_unified_3d_v4f32_f32
2568 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2569 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2570 llvm_float_ty], [],
2571 "llvm.nvvm.tex.unified.3d.v4f32.f32">;
2572 def int_nvvm_tex_unified_3d_level_v4f32_f32
2573 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2574 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2575 llvm_float_ty, llvm_float_ty], [],
2576 "llvm.nvvm.tex.unified.3d.level.v4f32.f32">;
2577 def int_nvvm_tex_unified_3d_grad_v4f32_f32
2578 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2579 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2580 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2581 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2582 "llvm.nvvm.tex.unified.3d.grad.v4f32.f32">;
2583 def int_nvvm_tex_unified_3d_v4s32_s32
2584 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2585 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2586 [], "llvm.nvvm.tex.unified.3d.v4s32.s32">;
2587 def int_nvvm_tex_unified_3d_v4s32_f32
2588 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2589 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2590 llvm_float_ty], [],
2591 "llvm.nvvm.tex.unified.3d.v4s32.f32">;
2592 def int_nvvm_tex_unified_3d_level_v4s32_f32
2593 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2594 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2595 llvm_float_ty, llvm_float_ty], [],
2596 "llvm.nvvm.tex.unified.3d.level.v4s32.f32">;
2597 def int_nvvm_tex_unified_3d_grad_v4s32_f32
2598 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2599 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2600 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2601 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2602 "llvm.nvvm.tex.unified.3d.grad.v4s32.f32">;
2603 def int_nvvm_tex_unified_3d_v4u32_s32
2604 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2605 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2606 [], "llvm.nvvm.tex.unified.3d.v4u32.s32">;
2607 def int_nvvm_tex_unified_3d_v4u32_f32
2608 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2609 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2610 llvm_float_ty], [],
2611 "llvm.nvvm.tex.unified.3d.v4u32.f32">;
2612 def int_nvvm_tex_unified_3d_level_v4u32_f32
2613 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2614 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2615 llvm_float_ty, llvm_float_ty], [],
2616 "llvm.nvvm.tex.unified.3d.level.v4u32.f32">;
2617 def int_nvvm_tex_unified_3d_grad_v4u32_f32
2618 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2619 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2620 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2621 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2622 "llvm.nvvm.tex.unified.3d.grad.v4u32.f32">;
2623
2624 def int_nvvm_tex_unified_cube_v4f32_f32
2625 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2626 [llvm_i64_ty,
2627 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2628 "llvm.nvvm.tex.unified.cube.v4f32.f32">;
2629 def int_nvvm_tex_unified_cube_level_v4f32_f32
2630 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2631 [llvm_i64_ty,
2632 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2633 "llvm.nvvm.tex.unified.cube.level.v4f32.f32">;
2634 def int_nvvm_tex_unified_cube_v4s32_f32
2635 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2636 [llvm_i64_ty,
2637 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2638 "llvm.nvvm.tex.unified.cube.v4s32.f32">;
2639 def int_nvvm_tex_unified_cube_level_v4s32_f32
2640 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2641 [llvm_i64_ty,
2642 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2643 "llvm.nvvm.tex.unified.cube.level.v4s32.f32">;
2644 def int_nvvm_tex_unified_cube_v4u32_f32
2645 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2646 [llvm_i64_ty,
2647 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2648 "llvm.nvvm.tex.unified.cube.v4u32.f32">;
2649 def int_nvvm_tex_unified_cube_level_v4u32_f32
2650 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2651 [llvm_i64_ty,
2652 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2653 "llvm.nvvm.tex.unified.cube.level.v4u32.f32">;
2654
2655 def int_nvvm_tex_unified_cube_array_v4f32_f32
2656 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2657 [llvm_i64_ty, llvm_i32_ty,
2658 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2659 "llvm.nvvm.tex.unified.cube.array.v4f32.f32">;
2660 def int_nvvm_tex_unified_cube_array_level_v4f32_f32
2661 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2662 [llvm_i64_ty, llvm_i32_ty,
2663 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2664 "llvm.nvvm.tex.unified.cube.array.level.v4f32.f32">;
2665 def int_nvvm_tex_unified_cube_array_v4s32_f32
2666 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2667 [llvm_i64_ty, llvm_i32_ty,
2668 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2669 "llvm.nvvm.tex.unified.cube.array.v4s32.f32">;
2670 def int_nvvm_tex_unified_cube_array_level_v4s32_f32
2671 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2672 [llvm_i64_ty, llvm_i32_ty,
2673 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2674 "llvm.nvvm.tex.unified.cube.array.level.v4s32.f32">;
2675 def int_nvvm_tex_unified_cube_array_v4u32_f32
2676 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2677 [llvm_i64_ty, llvm_i32_ty,
2678 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2679 "llvm.nvvm.tex.unified.cube.array.v4u32.f32">;
2680 def int_nvvm_tex_unified_cube_array_level_v4u32_f32
2681 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2682 [llvm_i64_ty, llvm_i32_ty,
2683 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2684 "llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
2685
2686 def int_nvvm_tex_unified_cube_grad_v4f32_f32
2687 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2688 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2689 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2690 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2691 "llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
2692 def int_nvvm_tex_unified_cube_grad_v4s32_f32
2693 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2694 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2695 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2696 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2697 "llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
2698 def int_nvvm_tex_unified_cube_grad_v4u32_f32
2699 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2700 [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2701 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2702 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2703 "llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
2704
2705 def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
2706 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2707 [llvm_i64_ty, llvm_i32_ty,
2708 llvm_float_ty, llvm_float_ty,
2709 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2710 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2711 "llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
2712 def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
2713 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2714 [llvm_i64_ty, llvm_i32_ty,
2715 llvm_float_ty, llvm_float_ty,
2716 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2717 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2718 "llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
2719 def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
2720 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2721 [llvm_i64_ty, llvm_i32_ty,
2722 llvm_float_ty, llvm_float_ty,
2723 llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2724 llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2725 "llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
2726
2727 def int_nvvm_tld4_unified_r_2d_v4f32_f32
2728 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2729 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2730 "llvm.nvvm.tld4.unified.r.2d.v4f32.f32">;
2731 def int_nvvm_tld4_unified_g_2d_v4f32_f32
2732 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2733 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2734 "llvm.nvvm.tld4.unified.g.2d.v4f32.f32">;
2735 def int_nvvm_tld4_unified_b_2d_v4f32_f32
2736 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2737 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2738 "llvm.nvvm.tld4.unified.b.2d.v4f32.f32">;
2739 def int_nvvm_tld4_unified_a_2d_v4f32_f32
2740 : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2741 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2742 "llvm.nvvm.tld4.unified.a.2d.v4f32.f32">;
2743 def int_nvvm_tld4_unified_r_2d_v4s32_f32
2744 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2745 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2746 "llvm.nvvm.tld4.unified.r.2d.v4s32.f32">;
2747 def int_nvvm_tld4_unified_g_2d_v4s32_f32
2748 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2749 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2750 "llvm.nvvm.tld4.unified.g.2d.v4s32.f32">;
2751 def int_nvvm_tld4_unified_b_2d_v4s32_f32
2752 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2753 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2754 "llvm.nvvm.tld4.unified.b.2d.v4s32.f32">;
2755 def int_nvvm_tld4_unified_a_2d_v4s32_f32
2756 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2757 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2758 "llvm.nvvm.tld4.unified.a.2d.v4s32.f32">;
2759 def int_nvvm_tld4_unified_r_2d_v4u32_f32
2760 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2761 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2762 "llvm.nvvm.tld4.unified.r.2d.v4u32.f32">;
2763 def int_nvvm_tld4_unified_g_2d_v4u32_f32
2764 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2765 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2766 "llvm.nvvm.tld4.unified.g.2d.v4u32.f32">;
2767 def int_nvvm_tld4_unified_b_2d_v4u32_f32
2768 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2769 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2770 "llvm.nvvm.tld4.unified.b.2d.v4u32.f32">;
2771 def int_nvvm_tld4_unified_a_2d_v4u32_f32
2772 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2773 [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2774 "llvm.nvvm.tld4.unified.a.2d.v4u32.f32">;
2775
2776
2777 //=== Surface Load
2778 // .clamp variants
2779 def int_nvvm_suld_1d_i8_clamp
2780 : Intrinsic<[llvm_i16_ty],
2781 [llvm_i64_ty, llvm_i32_ty], [],
2782 "llvm.nvvm.suld.1d.i8.clamp">;
2783 def int_nvvm_suld_1d_i16_clamp
2784 : Intrinsic<[llvm_i16_ty],
2785 [llvm_i64_ty, llvm_i32_ty], [],
2786 "llvm.nvvm.suld.1d.i16.clamp">;
2787 def int_nvvm_suld_1d_i32_clamp
2788 : Intrinsic<[llvm_i32_ty],
2789 [llvm_i64_ty, llvm_i32_ty], [],
2790 "llvm.nvvm.suld.1d.i32.clamp">;
2791 def int_nvvm_suld_1d_i64_clamp
2792 : Intrinsic<[llvm_i64_ty],
2793 [llvm_i64_ty, llvm_i32_ty], [],
2794 "llvm.nvvm.suld.1d.i64.clamp">;
2795 def int_nvvm_suld_1d_v2i8_clamp
2796 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2797 [llvm_i64_ty, llvm_i32_ty], [],
2798 "llvm.nvvm.suld.1d.v2i8.clamp">;
2799 def int_nvvm_suld_1d_v2i16_clamp
2800 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2801 [llvm_i64_ty, llvm_i32_ty], [],
2802 "llvm.nvvm.suld.1d.v2i16.clamp">;
2803 def int_nvvm_suld_1d_v2i32_clamp
2804 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2805 [llvm_i64_ty, llvm_i32_ty], [],
2806 "llvm.nvvm.suld.1d.v2i32.clamp">;
2807 def int_nvvm_suld_1d_v2i64_clamp
2808 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2809 [llvm_i64_ty, llvm_i32_ty], [],
2810 "llvm.nvvm.suld.1d.v2i64.clamp">;
2811 def int_nvvm_suld_1d_v4i8_clamp
2812 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2813 [llvm_i64_ty, llvm_i32_ty], [],
2814 "llvm.nvvm.suld.1d.v4i8.clamp">;
2815 def int_nvvm_suld_1d_v4i16_clamp
2816 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2817 [llvm_i64_ty, llvm_i32_ty], [],
2818 "llvm.nvvm.suld.1d.v4i16.clamp">;
2819 def int_nvvm_suld_1d_v4i32_clamp
2820 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2821 [llvm_i64_ty, llvm_i32_ty], [],
2822 "llvm.nvvm.suld.1d.v4i32.clamp">;
2823
2824 def int_nvvm_suld_1d_array_i8_clamp
2825 : Intrinsic<[llvm_i16_ty],
2826 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2827 "llvm.nvvm.suld.1d.array.i8.clamp">;
2828 def int_nvvm_suld_1d_array_i16_clamp
2829 : Intrinsic<[llvm_i16_ty],
2830 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2831 "llvm.nvvm.suld.1d.array.i16.clamp">;
2832 def int_nvvm_suld_1d_array_i32_clamp
2833 : Intrinsic<[llvm_i32_ty],
2834 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2835 "llvm.nvvm.suld.1d.array.i32.clamp">;
2836 def int_nvvm_suld_1d_array_i64_clamp
2837 : Intrinsic<[llvm_i64_ty],
2838 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2839 "llvm.nvvm.suld.1d.array.i64.clamp">;
2840 def int_nvvm_suld_1d_array_v2i8_clamp
2841 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2842 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2843 "llvm.nvvm.suld.1d.array.v2i8.clamp">;
2844 def int_nvvm_suld_1d_array_v2i16_clamp
2845 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2846 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2847 "llvm.nvvm.suld.1d.array.v2i16.clamp">;
2848 def int_nvvm_suld_1d_array_v2i32_clamp
2849 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2850 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2851 "llvm.nvvm.suld.1d.array.v2i32.clamp">;
2852 def int_nvvm_suld_1d_array_v2i64_clamp
2853 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2854 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2855 "llvm.nvvm.suld.1d.array.v2i64.clamp">;
2856 def int_nvvm_suld_1d_array_v4i8_clamp
2857 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2858 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2859 "llvm.nvvm.suld.1d.array.v4i8.clamp">;
2860 def int_nvvm_suld_1d_array_v4i16_clamp
2861 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2862 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2863 "llvm.nvvm.suld.1d.array.v4i16.clamp">;
2864 def int_nvvm_suld_1d_array_v4i32_clamp
2865 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2866 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2867 "llvm.nvvm.suld.1d.array.v4i32.clamp">;
2868
2869 def int_nvvm_suld_2d_i8_clamp
2870 : Intrinsic<[llvm_i16_ty],
2871 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2872 "llvm.nvvm.suld.2d.i8.clamp">;
2873 def int_nvvm_suld_2d_i16_clamp
2874 : Intrinsic<[llvm_i16_ty],
2875 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2876 "llvm.nvvm.suld.2d.i16.clamp">;
2877 def int_nvvm_suld_2d_i32_clamp
2878 : Intrinsic<[llvm_i32_ty],
2879 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2880 "llvm.nvvm.suld.2d.i32.clamp">;
2881 def int_nvvm_suld_2d_i64_clamp
2882 : Intrinsic<[llvm_i64_ty],
2883 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2884 "llvm.nvvm.suld.2d.i64.clamp">;
2885 def int_nvvm_suld_2d_v2i8_clamp
2886 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2887 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2888 "llvm.nvvm.suld.2d.v2i8.clamp">;
2889 def int_nvvm_suld_2d_v2i16_clamp
2890 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2891 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2892 "llvm.nvvm.suld.2d.v2i16.clamp">;
2893 def int_nvvm_suld_2d_v2i32_clamp
2894 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2895 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2896 "llvm.nvvm.suld.2d.v2i32.clamp">;
2897 def int_nvvm_suld_2d_v2i64_clamp
2898 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2899 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2900 "llvm.nvvm.suld.2d.v2i64.clamp">;
2901 def int_nvvm_suld_2d_v4i8_clamp
2902 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2903 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2904 "llvm.nvvm.suld.2d.v4i8.clamp">;
2905 def int_nvvm_suld_2d_v4i16_clamp
2906 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2907 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2908 "llvm.nvvm.suld.2d.v4i16.clamp">;
2909 def int_nvvm_suld_2d_v4i32_clamp
2910 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2911 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2912 "llvm.nvvm.suld.2d.v4i32.clamp">;
2913
2914 def int_nvvm_suld_2d_array_i8_clamp
2915 : Intrinsic<[llvm_i16_ty],
2916 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2917 "llvm.nvvm.suld.2d.array.i8.clamp">;
2918 def int_nvvm_suld_2d_array_i16_clamp
2919 : Intrinsic<[llvm_i16_ty],
2920 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2921 "llvm.nvvm.suld.2d.array.i16.clamp">;
2922 def int_nvvm_suld_2d_array_i32_clamp
2923 : Intrinsic<[llvm_i32_ty],
2924 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2925 "llvm.nvvm.suld.2d.array.i32.clamp">;
2926 def int_nvvm_suld_2d_array_i64_clamp
2927 : Intrinsic<[llvm_i64_ty],
2928 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2929 "llvm.nvvm.suld.2d.array.i64.clamp">;
2930 def int_nvvm_suld_2d_array_v2i8_clamp
2931 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2932 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2933 "llvm.nvvm.suld.2d.array.v2i8.clamp">;
2934 def int_nvvm_suld_2d_array_v2i16_clamp
2935 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2936 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2937 "llvm.nvvm.suld.2d.array.v2i16.clamp">;
2938 def int_nvvm_suld_2d_array_v2i32_clamp
2939 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2940 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2941 "llvm.nvvm.suld.2d.array.v2i32.clamp">;
2942 def int_nvvm_suld_2d_array_v2i64_clamp
2943 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2944 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2945 "llvm.nvvm.suld.2d.array.v2i64.clamp">;
2946 def int_nvvm_suld_2d_array_v4i8_clamp
2947 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2948 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2949 "llvm.nvvm.suld.2d.array.v4i8.clamp">;
2950 def int_nvvm_suld_2d_array_v4i16_clamp
2951 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2952 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2953 "llvm.nvvm.suld.2d.array.v4i16.clamp">;
2954 def int_nvvm_suld_2d_array_v4i32_clamp
2955 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2956 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2957 "llvm.nvvm.suld.2d.array.v4i32.clamp">;
2958
2959 def int_nvvm_suld_3d_i8_clamp
2960 : Intrinsic<[llvm_i16_ty],
2961 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2962 "llvm.nvvm.suld.3d.i8.clamp">;
2963 def int_nvvm_suld_3d_i16_clamp
2964 : Intrinsic<[llvm_i16_ty],
2965 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2966 "llvm.nvvm.suld.3d.i16.clamp">;
2967 def int_nvvm_suld_3d_i32_clamp
2968 : Intrinsic<[llvm_i32_ty],
2969 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2970 "llvm.nvvm.suld.3d.i32.clamp">;
2971 def int_nvvm_suld_3d_i64_clamp
2972 : Intrinsic<[llvm_i64_ty],
2973 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2974 "llvm.nvvm.suld.3d.i64.clamp">;
2975 def int_nvvm_suld_3d_v2i8_clamp
2976 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2977 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2978 "llvm.nvvm.suld.3d.v2i8.clamp">;
2979 def int_nvvm_suld_3d_v2i16_clamp
2980 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2981 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2982 "llvm.nvvm.suld.3d.v2i16.clamp">;
2983 def int_nvvm_suld_3d_v2i32_clamp
2984 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2985 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2986 "llvm.nvvm.suld.3d.v2i32.clamp">;
2987 def int_nvvm_suld_3d_v2i64_clamp
2988 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2989 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2990 "llvm.nvvm.suld.3d.v2i64.clamp">;
2991 def int_nvvm_suld_3d_v4i8_clamp
2992 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2993 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2994 "llvm.nvvm.suld.3d.v4i8.clamp">;
2995 def int_nvvm_suld_3d_v4i16_clamp
2996 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2997 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2998 "llvm.nvvm.suld.3d.v4i16.clamp">;
2999 def int_nvvm_suld_3d_v4i32_clamp
3000 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3001 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3002 "llvm.nvvm.suld.3d.v4i32.clamp">;
3003
3004 // .trap variants
3005 def int_nvvm_suld_1d_i8_trap
3006 : Intrinsic<[llvm_i16_ty],
3007 [llvm_i64_ty, llvm_i32_ty], [],
3008 "llvm.nvvm.suld.1d.i8.trap">;
3009 def int_nvvm_suld_1d_i16_trap
3010 : Intrinsic<[llvm_i16_ty],
3011 [llvm_i64_ty, llvm_i32_ty], [],
3012 "llvm.nvvm.suld.1d.i16.trap">;
3013 def int_nvvm_suld_1d_i32_trap
3014 : Intrinsic<[llvm_i32_ty],
3015 [llvm_i64_ty, llvm_i32_ty], [],
3016 "llvm.nvvm.suld.1d.i32.trap">;
3017 def int_nvvm_suld_1d_i64_trap
3018 : Intrinsic<[llvm_i64_ty],
3019 [llvm_i64_ty, llvm_i32_ty], [],
3020 "llvm.nvvm.suld.1d.i64.trap">;
3021 def int_nvvm_suld_1d_v2i8_trap
3022 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3023 [llvm_i64_ty, llvm_i32_ty], [],
3024 "llvm.nvvm.suld.1d.v2i8.trap">;
3025 def int_nvvm_suld_1d_v2i16_trap
3026 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3027 [llvm_i64_ty, llvm_i32_ty], [],
3028 "llvm.nvvm.suld.1d.v2i16.trap">;
3029 def int_nvvm_suld_1d_v2i32_trap
3030 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3031 [llvm_i64_ty, llvm_i32_ty], [],
3032 "llvm.nvvm.suld.1d.v2i32.trap">;
3033 def int_nvvm_suld_1d_v2i64_trap
3034 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3035 [llvm_i64_ty, llvm_i32_ty], [],
3036 "llvm.nvvm.suld.1d.v2i64.trap">;
3037 def int_nvvm_suld_1d_v4i8_trap
3038 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3039 [llvm_i64_ty, llvm_i32_ty], [],
3040 "llvm.nvvm.suld.1d.v4i8.trap">;
3041 def int_nvvm_suld_1d_v4i16_trap
3042 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3043 [llvm_i64_ty, llvm_i32_ty], [],
3044 "llvm.nvvm.suld.1d.v4i16.trap">;
3045 def int_nvvm_suld_1d_v4i32_trap
3046 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3047 [llvm_i64_ty, llvm_i32_ty], [],
3048 "llvm.nvvm.suld.1d.v4i32.trap">;
3049
3050 def int_nvvm_suld_1d_array_i8_trap
3051 : Intrinsic<[llvm_i16_ty],
3052 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3053 "llvm.nvvm.suld.1d.array.i8.trap">;
3054 def int_nvvm_suld_1d_array_i16_trap
3055 : Intrinsic<[llvm_i16_ty],
3056 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3057 "llvm.nvvm.suld.1d.array.i16.trap">;
3058 def int_nvvm_suld_1d_array_i32_trap
3059 : Intrinsic<[llvm_i32_ty],
3060 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3061 "llvm.nvvm.suld.1d.array.i32.trap">;
3062 def int_nvvm_suld_1d_array_i64_trap
3063 : Intrinsic<[llvm_i64_ty],
3064 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3065 "llvm.nvvm.suld.1d.array.i64.trap">;
3066 def int_nvvm_suld_1d_array_v2i8_trap
3067 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3068 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3069 "llvm.nvvm.suld.1d.array.v2i8.trap">;
3070 def int_nvvm_suld_1d_array_v2i16_trap
3071 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3072 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3073 "llvm.nvvm.suld.1d.array.v2i16.trap">;
3074 def int_nvvm_suld_1d_array_v2i32_trap
3075 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3076 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3077 "llvm.nvvm.suld.1d.array.v2i32.trap">;
3078 def int_nvvm_suld_1d_array_v2i64_trap
3079 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3080 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3081 "llvm.nvvm.suld.1d.array.v2i64.trap">;
3082 def int_nvvm_suld_1d_array_v4i8_trap
3083 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3084 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3085 "llvm.nvvm.suld.1d.array.v4i8.trap">;
3086 def int_nvvm_suld_1d_array_v4i16_trap
3087 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3088 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3089 "llvm.nvvm.suld.1d.array.v4i16.trap">;
3090 def int_nvvm_suld_1d_array_v4i32_trap
3091 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3092 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3093 "llvm.nvvm.suld.1d.array.v4i32.trap">;
3094
3095 def int_nvvm_suld_2d_i8_trap
3096 : Intrinsic<[llvm_i16_ty],
3097 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3098 "llvm.nvvm.suld.2d.i8.trap">;
3099 def int_nvvm_suld_2d_i16_trap
3100 : Intrinsic<[llvm_i16_ty],
3101 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3102 "llvm.nvvm.suld.2d.i16.trap">;
3103 def int_nvvm_suld_2d_i32_trap
3104 : Intrinsic<[llvm_i32_ty],
3105 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3106 "llvm.nvvm.suld.2d.i32.trap">;
3107 def int_nvvm_suld_2d_i64_trap
3108 : Intrinsic<[llvm_i64_ty],
3109 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3110 "llvm.nvvm.suld.2d.i64.trap">;
3111 def int_nvvm_suld_2d_v2i8_trap
3112 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3113 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3114 "llvm.nvvm.suld.2d.v2i8.trap">;
3115 def int_nvvm_suld_2d_v2i16_trap
3116 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3117 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3118 "llvm.nvvm.suld.2d.v2i16.trap">;
3119 def int_nvvm_suld_2d_v2i32_trap
3120 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3121 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3122 "llvm.nvvm.suld.2d.v2i32.trap">;
3123 def int_nvvm_suld_2d_v2i64_trap
3124 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3125 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3126 "llvm.nvvm.suld.2d.v2i64.trap">;
3127 def int_nvvm_suld_2d_v4i8_trap
3128 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3129 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3130 "llvm.nvvm.suld.2d.v4i8.trap">;
3131 def int_nvvm_suld_2d_v4i16_trap
3132 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3133 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3134 "llvm.nvvm.suld.2d.v4i16.trap">;
3135 def int_nvvm_suld_2d_v4i32_trap
3136 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3137 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3138 "llvm.nvvm.suld.2d.v4i32.trap">;
3139
3140 def int_nvvm_suld_2d_array_i8_trap
3141 : Intrinsic<[llvm_i16_ty],
3142 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3143 "llvm.nvvm.suld.2d.array.i8.trap">;
3144 def int_nvvm_suld_2d_array_i16_trap
3145 : Intrinsic<[llvm_i16_ty],
3146 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3147 "llvm.nvvm.suld.2d.array.i16.trap">;
3148 def int_nvvm_suld_2d_array_i32_trap
3149 : Intrinsic<[llvm_i32_ty],
3150 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3151 "llvm.nvvm.suld.2d.array.i32.trap">;
3152 def int_nvvm_suld_2d_array_i64_trap
3153 : Intrinsic<[llvm_i64_ty],
3154 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3155 "llvm.nvvm.suld.2d.array.i64.trap">;
3156 def int_nvvm_suld_2d_array_v2i8_trap
3157 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3158 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3159 "llvm.nvvm.suld.2d.array.v2i8.trap">;
3160 def int_nvvm_suld_2d_array_v2i16_trap
3161 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3162 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3163 "llvm.nvvm.suld.2d.array.v2i16.trap">;
3164 def int_nvvm_suld_2d_array_v2i32_trap
3165 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3166 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3167 "llvm.nvvm.suld.2d.array.v2i32.trap">;
3168 def int_nvvm_suld_2d_array_v2i64_trap
3169 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3170 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3171 "llvm.nvvm.suld.2d.array.v2i64.trap">;
3172 def int_nvvm_suld_2d_array_v4i8_trap
3173 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3174 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3175 "llvm.nvvm.suld.2d.array.v4i8.trap">;
3176 def int_nvvm_suld_2d_array_v4i16_trap
3177 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3178 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3179 "llvm.nvvm.suld.2d.array.v4i16.trap">;
3180 def int_nvvm_suld_2d_array_v4i32_trap
3181 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3182 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3183 "llvm.nvvm.suld.2d.array.v4i32.trap">;
3184
3185 def int_nvvm_suld_3d_i8_trap
3186 : Intrinsic<[llvm_i16_ty],
3187 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3188 "llvm.nvvm.suld.3d.i8.trap">;
3189 def int_nvvm_suld_3d_i16_trap
3190 : Intrinsic<[llvm_i16_ty],
3191 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3192 "llvm.nvvm.suld.3d.i16.trap">;
3193 def int_nvvm_suld_3d_i32_trap
3194 : Intrinsic<[llvm_i32_ty],
3195 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3196 "llvm.nvvm.suld.3d.i32.trap">;
3197 def int_nvvm_suld_3d_i64_trap
3198 : Intrinsic<[llvm_i64_ty],
3199 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3200 "llvm.nvvm.suld.3d.i64.trap">;
3201 def int_nvvm_suld_3d_v2i8_trap
3202 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3203 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3204 "llvm.nvvm.suld.3d.v2i8.trap">;
3205 def int_nvvm_suld_3d_v2i16_trap
3206 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3207 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3208 "llvm.nvvm.suld.3d.v2i16.trap">;
3209 def int_nvvm_suld_3d_v2i32_trap
3210 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3211 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3212 "llvm.nvvm.suld.3d.v2i32.trap">;
3213 def int_nvvm_suld_3d_v2i64_trap
3214 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3215 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3216 "llvm.nvvm.suld.3d.v2i64.trap">;
3217 def int_nvvm_suld_3d_v4i8_trap
3218 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3219 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3220 "llvm.nvvm.suld.3d.v4i8.trap">;
3221 def int_nvvm_suld_3d_v4i16_trap
3222 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3223 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3224 "llvm.nvvm.suld.3d.v4i16.trap">;
3225 def int_nvvm_suld_3d_v4i32_trap
3226 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3227 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3228 "llvm.nvvm.suld.3d.v4i32.trap">;
3229
3230 // .zero variants
3231 def int_nvvm_suld_1d_i8_zero
3232 : Intrinsic<[llvm_i16_ty],
3233 [llvm_i64_ty, llvm_i32_ty], [],
3234 "llvm.nvvm.suld.1d.i8.zero">;
3235 def int_nvvm_suld_1d_i16_zero
3236 : Intrinsic<[llvm_i16_ty],
3237 [llvm_i64_ty, llvm_i32_ty], [],
3238 "llvm.nvvm.suld.1d.i16.zero">;
3239 def int_nvvm_suld_1d_i32_zero
3240 : Intrinsic<[llvm_i32_ty],
3241 [llvm_i64_ty, llvm_i32_ty], [],
3242 "llvm.nvvm.suld.1d.i32.zero">;
3243 def int_nvvm_suld_1d_i64_zero
3244 : Intrinsic<[llvm_i64_ty],
3245 [llvm_i64_ty, llvm_i32_ty], [],
3246 "llvm.nvvm.suld.1d.i64.zero">;
3247 def int_nvvm_suld_1d_v2i8_zero
3248 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3249 [llvm_i64_ty, llvm_i32_ty], [],
3250 "llvm.nvvm.suld.1d.v2i8.zero">;
3251 def int_nvvm_suld_1d_v2i16_zero
3252 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3253 [llvm_i64_ty, llvm_i32_ty], [],
3254 "llvm.nvvm.suld.1d.v2i16.zero">;
3255 def int_nvvm_suld_1d_v2i32_zero
3256 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3257 [llvm_i64_ty, llvm_i32_ty], [],
3258 "llvm.nvvm.suld.1d.v2i32.zero">;
3259 def int_nvvm_suld_1d_v2i64_zero
3260 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3261 [llvm_i64_ty, llvm_i32_ty], [],
3262 "llvm.nvvm.suld.1d.v2i64.zero">;
3263 def int_nvvm_suld_1d_v4i8_zero
3264 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3265 [llvm_i64_ty, llvm_i32_ty], [],
3266 "llvm.nvvm.suld.1d.v4i8.zero">;
3267 def int_nvvm_suld_1d_v4i16_zero
3268 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3269 [llvm_i64_ty, llvm_i32_ty], [],
3270 "llvm.nvvm.suld.1d.v4i16.zero">;
3271 def int_nvvm_suld_1d_v4i32_zero
3272 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3273 [llvm_i64_ty, llvm_i32_ty], [],
3274 "llvm.nvvm.suld.1d.v4i32.zero">;
3275
3276 def int_nvvm_suld_1d_array_i8_zero
3277 : Intrinsic<[llvm_i16_ty],
3278 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3279 "llvm.nvvm.suld.1d.array.i8.zero">;
3280 def int_nvvm_suld_1d_array_i16_zero
3281 : Intrinsic<[llvm_i16_ty],
3282 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3283 "llvm.nvvm.suld.1d.array.i16.zero">;
3284 def int_nvvm_suld_1d_array_i32_zero
3285 : Intrinsic<[llvm_i32_ty],
3286 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3287 "llvm.nvvm.suld.1d.array.i32.zero">;
3288 def int_nvvm_suld_1d_array_i64_zero
3289 : Intrinsic<[llvm_i64_ty],
3290 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3291 "llvm.nvvm.suld.1d.array.i64.zero">;
3292 def int_nvvm_suld_1d_array_v2i8_zero
3293 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3294 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3295 "llvm.nvvm.suld.1d.array.v2i8.zero">;
3296 def int_nvvm_suld_1d_array_v2i16_zero
3297 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3298 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3299 "llvm.nvvm.suld.1d.array.v2i16.zero">;
3300 def int_nvvm_suld_1d_array_v2i32_zero
3301 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3302 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3303 "llvm.nvvm.suld.1d.array.v2i32.zero">;
3304 def int_nvvm_suld_1d_array_v2i64_zero
3305 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3306 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3307 "llvm.nvvm.suld.1d.array.v2i64.zero">;
3308 def int_nvvm_suld_1d_array_v4i8_zero
3309 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3310 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3311 "llvm.nvvm.suld.1d.array.v4i8.zero">;
3312 def int_nvvm_suld_1d_array_v4i16_zero
3313 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3314 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3315 "llvm.nvvm.suld.1d.array.v4i16.zero">;
3316 def int_nvvm_suld_1d_array_v4i32_zero
3317 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3318 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3319 "llvm.nvvm.suld.1d.array.v4i32.zero">;
3320
3321 def int_nvvm_suld_2d_i8_zero
3322 : Intrinsic<[llvm_i16_ty],
3323 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3324 "llvm.nvvm.suld.2d.i8.zero">;
3325 def int_nvvm_suld_2d_i16_zero
3326 : Intrinsic<[llvm_i16_ty],
3327 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3328 "llvm.nvvm.suld.2d.i16.zero">;
3329 def int_nvvm_suld_2d_i32_zero
3330 : Intrinsic<[llvm_i32_ty],
3331 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3332 "llvm.nvvm.suld.2d.i32.zero">;
3333 def int_nvvm_suld_2d_i64_zero
3334 : Intrinsic<[llvm_i64_ty],
3335 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3336 "llvm.nvvm.suld.2d.i64.zero">;
3337 def int_nvvm_suld_2d_v2i8_zero
3338 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3339 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3340 "llvm.nvvm.suld.2d.v2i8.zero">;
3341 def int_nvvm_suld_2d_v2i16_zero
3342 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3343 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3344 "llvm.nvvm.suld.2d.v2i16.zero">;
3345 def int_nvvm_suld_2d_v2i32_zero
3346 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3347 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3348 "llvm.nvvm.suld.2d.v2i32.zero">;
3349 def int_nvvm_suld_2d_v2i64_zero
3350 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3351 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3352 "llvm.nvvm.suld.2d.v2i64.zero">;
3353 def int_nvvm_suld_2d_v4i8_zero
3354 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3355 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3356 "llvm.nvvm.suld.2d.v4i8.zero">;
3357 def int_nvvm_suld_2d_v4i16_zero
3358 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3359 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3360 "llvm.nvvm.suld.2d.v4i16.zero">;
3361 def int_nvvm_suld_2d_v4i32_zero
3362 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3363 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3364 "llvm.nvvm.suld.2d.v4i32.zero">;
3365
3366 def int_nvvm_suld_2d_array_i8_zero
3367 : Intrinsic<[llvm_i16_ty],
3368 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3369 "llvm.nvvm.suld.2d.array.i8.zero">;
3370 def int_nvvm_suld_2d_array_i16_zero
3371 : Intrinsic<[llvm_i16_ty],
3372 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3373 "llvm.nvvm.suld.2d.array.i16.zero">;
3374 def int_nvvm_suld_2d_array_i32_zero
3375 : Intrinsic<[llvm_i32_ty],
3376 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3377 "llvm.nvvm.suld.2d.array.i32.zero">;
3378 def int_nvvm_suld_2d_array_i64_zero
3379 : Intrinsic<[llvm_i64_ty],
3380 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3381 "llvm.nvvm.suld.2d.array.i64.zero">;
3382 def int_nvvm_suld_2d_array_v2i8_zero
3383 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3384 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3385 "llvm.nvvm.suld.2d.array.v2i8.zero">;
3386 def int_nvvm_suld_2d_array_v2i16_zero
3387 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3388 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3389 "llvm.nvvm.suld.2d.array.v2i16.zero">;
3390 def int_nvvm_suld_2d_array_v2i32_zero
3391 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3392 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3393 "llvm.nvvm.suld.2d.array.v2i32.zero">;
3394 def int_nvvm_suld_2d_array_v2i64_zero
3395 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3396 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3397 "llvm.nvvm.suld.2d.array.v2i64.zero">;
3398 def int_nvvm_suld_2d_array_v4i8_zero
3399 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3400 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3401 "llvm.nvvm.suld.2d.array.v4i8.zero">;
3402 def int_nvvm_suld_2d_array_v4i16_zero
3403 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3404 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3405 "llvm.nvvm.suld.2d.array.v4i16.zero">;
3406 def int_nvvm_suld_2d_array_v4i32_zero
3407 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3408 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3409 "llvm.nvvm.suld.2d.array.v4i32.zero">;
3410
3411 def int_nvvm_suld_3d_i8_zero
3412 : Intrinsic<[llvm_i16_ty],
3413 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3414 "llvm.nvvm.suld.3d.i8.zero">;
3415 def int_nvvm_suld_3d_i16_zero
3416 : Intrinsic<[llvm_i16_ty],
3417 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3418 "llvm.nvvm.suld.3d.i16.zero">;
3419 def int_nvvm_suld_3d_i32_zero
3420 : Intrinsic<[llvm_i32_ty],
3421 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3422 "llvm.nvvm.suld.3d.i32.zero">;
3423 def int_nvvm_suld_3d_i64_zero
3424 : Intrinsic<[llvm_i64_ty],
3425 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3426 "llvm.nvvm.suld.3d.i64.zero">;
3427 def int_nvvm_suld_3d_v2i8_zero
3428 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3429 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3430 "llvm.nvvm.suld.3d.v2i8.zero">;
3431 def int_nvvm_suld_3d_v2i16_zero
3432 : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3433 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3434 "llvm.nvvm.suld.3d.v2i16.zero">;
3435 def int_nvvm_suld_3d_v2i32_zero
3436 : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3437 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3438 "llvm.nvvm.suld.3d.v2i32.zero">;
3439 def int_nvvm_suld_3d_v2i64_zero
3440 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3441 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3442 "llvm.nvvm.suld.3d.v2i64.zero">;
3443 def int_nvvm_suld_3d_v4i8_zero
3444 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3445 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3446 "llvm.nvvm.suld.3d.v4i8.zero">;
3447 def int_nvvm_suld_3d_v4i16_zero
3448 : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3449 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3450 "llvm.nvvm.suld.3d.v4i16.zero">;
3451 def int_nvvm_suld_3d_v4i32_zero
3452 : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3453 [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3454 "llvm.nvvm.suld.3d.v4i32.zero">;
3455
3456 //===- Texture Query ------------------------------------------------------===//
3457
3458 def int_nvvm_txq_channel_order
3459 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3460 "llvm.nvvm.txq.channel.order">,
3461 ClangBuiltin<"__nvvm_txq_channel_order">;
3462 def int_nvvm_txq_channel_data_type
3463 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3464 "llvm.nvvm.txq.channel.data.type">,
3465 ClangBuiltin<"__nvvm_txq_channel_data_type">;
3466 def int_nvvm_txq_width
3467 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3468 "llvm.nvvm.txq.width">,
3469 ClangBuiltin<"__nvvm_txq_width">;
3470 def int_nvvm_txq_height
3471 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3472 "llvm.nvvm.txq.height">,
3473 ClangBuiltin<"__nvvm_txq_height">;
3474 def int_nvvm_txq_depth
3475 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3476 "llvm.nvvm.txq.depth">,
3477 ClangBuiltin<"__nvvm_txq_depth">;
3478 def int_nvvm_txq_array_size
3479 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3480 "llvm.nvvm.txq.array.size">,
3481 ClangBuiltin<"__nvvm_txq_array_size">;
3482 def int_nvvm_txq_num_samples
3483 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3484 "llvm.nvvm.txq.num.samples">,
3485 ClangBuiltin<"__nvvm_txq_num_samples">;
3486 def int_nvvm_txq_num_mipmap_levels
3487 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3488 "llvm.nvvm.txq.num.mipmap.levels">,
3489 ClangBuiltin<"__nvvm_txq_num_mipmap_levels">;
3490
3491 //===- Surface Query ------------------------------------------------------===//
3492
3493 def int_nvvm_suq_channel_order
3494 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3495 "llvm.nvvm.suq.channel.order">,
3496 ClangBuiltin<"__nvvm_suq_channel_order">;
3497 def int_nvvm_suq_channel_data_type
3498 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3499 "llvm.nvvm.suq.channel.data.type">,
3500 ClangBuiltin<"__nvvm_suq_channel_data_type">;
3501 def int_nvvm_suq_width
3502 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3503 "llvm.nvvm.suq.width">,
3504 ClangBuiltin<"__nvvm_suq_width">;
3505 def int_nvvm_suq_height
3506 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3507 "llvm.nvvm.suq.height">,
3508 ClangBuiltin<"__nvvm_suq_height">;
3509 def int_nvvm_suq_depth
3510 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3511 "llvm.nvvm.suq.depth">,
3512 ClangBuiltin<"__nvvm_suq_depth">;
3513 def int_nvvm_suq_array_size
3514 : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3515 "llvm.nvvm.suq.array.size">,
3516 ClangBuiltin<"__nvvm_suq_array_size">;
3517
3518
3519 //===- Handle Query -------------------------------------------------------===//
3520
3521 def int_nvvm_istypep_sampler
3522 : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3523 "llvm.nvvm.istypep.sampler">,
3524 ClangBuiltin<"__nvvm_istypep_sampler">;
3525 def int_nvvm_istypep_surface
3526 : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3527 "llvm.nvvm.istypep.surface">,
3528 ClangBuiltin<"__nvvm_istypep_surface">;
3529 def int_nvvm_istypep_texture
3530 : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3531 "llvm.nvvm.istypep.texture">,
3532 ClangBuiltin<"__nvvm_istypep_texture">;
3533
3534
3535
3536 //===- Surface Stores -----------------------------------------------------===//
3537
3538 // Unformatted
3539 // .clamp variant
3540 def int_nvvm_sust_b_1d_i8_clamp
3541 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3542 "llvm.nvvm.sust.b.1d.i8.clamp">,
3543 ClangBuiltin<"__nvvm_sust_b_1d_i8_clamp">;
3544 def int_nvvm_sust_b_1d_i16_clamp
3545 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3546 "llvm.nvvm.sust.b.1d.i16.clamp">,
3547 ClangBuiltin<"__nvvm_sust_b_1d_i16_clamp">;
3548 def int_nvvm_sust_b_1d_i32_clamp
3549 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3550 "llvm.nvvm.sust.b.1d.i32.clamp">,
3551 ClangBuiltin<"__nvvm_sust_b_1d_i32_clamp">;
3552 def int_nvvm_sust_b_1d_i64_clamp
3553 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3554 "llvm.nvvm.sust.b.1d.i64.clamp">,
3555 ClangBuiltin<"__nvvm_sust_b_1d_i64_clamp">;
3556 def int_nvvm_sust_b_1d_v2i8_clamp
3557 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3558 "llvm.nvvm.sust.b.1d.v2i8.clamp">,
3559 ClangBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">;
3560 def int_nvvm_sust_b_1d_v2i16_clamp
3561 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3562 "llvm.nvvm.sust.b.1d.v2i16.clamp">,
3563 ClangBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">;
3564 def int_nvvm_sust_b_1d_v2i32_clamp
3565 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3566 "llvm.nvvm.sust.b.1d.v2i32.clamp">,
3567 ClangBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">;
3568 def int_nvvm_sust_b_1d_v2i64_clamp
3569 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3570 "llvm.nvvm.sust.b.1d.v2i64.clamp">,
3571 ClangBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">;
3572 def int_nvvm_sust_b_1d_v4i8_clamp
3573 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3574 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3575 "llvm.nvvm.sust.b.1d.v4i8.clamp">,
3576 ClangBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">;
3577 def int_nvvm_sust_b_1d_v4i16_clamp
3578 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3579 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3580 "llvm.nvvm.sust.b.1d.v4i16.clamp">,
3581 ClangBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">;
3582 def int_nvvm_sust_b_1d_v4i32_clamp
3583 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3584 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3585 "llvm.nvvm.sust.b.1d.v4i32.clamp">,
3586 ClangBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">;
3587
3588
3589 def int_nvvm_sust_b_1d_array_i8_clamp
3590 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3591 "llvm.nvvm.sust.b.1d.array.i8.clamp">,
3592 ClangBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">;
3593 def int_nvvm_sust_b_1d_array_i16_clamp
3594 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3595 "llvm.nvvm.sust.b.1d.array.i16.clamp">,
3596 ClangBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">;
3597 def int_nvvm_sust_b_1d_array_i32_clamp
3598 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3599 "llvm.nvvm.sust.b.1d.array.i32.clamp">,
3600 ClangBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">;
3601 def int_nvvm_sust_b_1d_array_i64_clamp
3602 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3603 "llvm.nvvm.sust.b.1d.array.i64.clamp">,
3604 ClangBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">;
3605 def int_nvvm_sust_b_1d_array_v2i8_clamp
3606 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3607 llvm_i16_ty, llvm_i16_ty], [],
3608 "llvm.nvvm.sust.b.1d.array.v2i8.clamp">,
3609 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">;
3610 def int_nvvm_sust_b_1d_array_v2i16_clamp
3611 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3612 llvm_i16_ty, llvm_i16_ty], [],
3613 "llvm.nvvm.sust.b.1d.array.v2i16.clamp">,
3614 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">;
3615 def int_nvvm_sust_b_1d_array_v2i32_clamp
3616 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3617 llvm_i32_ty, llvm_i32_ty], [],
3618 "llvm.nvvm.sust.b.1d.array.v2i32.clamp">,
3619 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">;
3620 def int_nvvm_sust_b_1d_array_v2i64_clamp
3621 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3622 llvm_i64_ty, llvm_i64_ty], [],
3623 "llvm.nvvm.sust.b.1d.array.v2i64.clamp">,
3624 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">;
3625 def int_nvvm_sust_b_1d_array_v4i8_clamp
3626 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3627 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3628 "llvm.nvvm.sust.b.1d.array.v4i8.clamp">,
3629 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">;
3630 def int_nvvm_sust_b_1d_array_v4i16_clamp
3631 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3632 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3633 "llvm.nvvm.sust.b.1d.array.v4i16.clamp">,
3634 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">;
3635 def int_nvvm_sust_b_1d_array_v4i32_clamp
3636 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3637 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3638 "llvm.nvvm.sust.b.1d.array.v4i32.clamp">,
3639 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">;
3640
3641
3642 def int_nvvm_sust_b_2d_i8_clamp
3643 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3644 "llvm.nvvm.sust.b.2d.i8.clamp">,
3645 ClangBuiltin<"__nvvm_sust_b_2d_i8_clamp">;
3646 def int_nvvm_sust_b_2d_i16_clamp
3647 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3648 "llvm.nvvm.sust.b.2d.i16.clamp">,
3649 ClangBuiltin<"__nvvm_sust_b_2d_i16_clamp">;
3650 def int_nvvm_sust_b_2d_i32_clamp
3651 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3652 "llvm.nvvm.sust.b.2d.i32.clamp">,
3653 ClangBuiltin<"__nvvm_sust_b_2d_i32_clamp">;
3654 def int_nvvm_sust_b_2d_i64_clamp
3655 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3656 "llvm.nvvm.sust.b.2d.i64.clamp">,
3657 ClangBuiltin<"__nvvm_sust_b_2d_i64_clamp">;
3658 def int_nvvm_sust_b_2d_v2i8_clamp
3659 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3660 llvm_i16_ty, llvm_i16_ty], [],
3661 "llvm.nvvm.sust.b.2d.v2i8.clamp">,
3662 ClangBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">;
3663 def int_nvvm_sust_b_2d_v2i16_clamp
3664 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3665 llvm_i16_ty, llvm_i16_ty], [],
3666 "llvm.nvvm.sust.b.2d.v2i16.clamp">,
3667 ClangBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">;
3668 def int_nvvm_sust_b_2d_v2i32_clamp
3669 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3670 llvm_i32_ty, llvm_i32_ty], [],
3671 "llvm.nvvm.sust.b.2d.v2i32.clamp">,
3672 ClangBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">;
3673 def int_nvvm_sust_b_2d_v2i64_clamp
3674 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3675 llvm_i64_ty, llvm_i64_ty], [],
3676 "llvm.nvvm.sust.b.2d.v2i64.clamp">,
3677 ClangBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">;
3678 def int_nvvm_sust_b_2d_v4i8_clamp
3679 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3680 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3681 "llvm.nvvm.sust.b.2d.v4i8.clamp">,
3682 ClangBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">;
3683 def int_nvvm_sust_b_2d_v4i16_clamp
3684 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3685 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3686 "llvm.nvvm.sust.b.2d.v4i16.clamp">,
3687 ClangBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">;
3688 def int_nvvm_sust_b_2d_v4i32_clamp
3689 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3690 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3691 "llvm.nvvm.sust.b.2d.v4i32.clamp">,
3692 ClangBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">;
3693
3694
3695 def int_nvvm_sust_b_2d_array_i8_clamp
3696 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3697 llvm_i32_ty, llvm_i16_ty], [],
3698 "llvm.nvvm.sust.b.2d.array.i8.clamp">,
3699 ClangBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">;
3700 def int_nvvm_sust_b_2d_array_i16_clamp
3701 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3702 llvm_i32_ty, llvm_i16_ty], [],
3703 "llvm.nvvm.sust.b.2d.array.i16.clamp">,
3704 ClangBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">;
3705 def int_nvvm_sust_b_2d_array_i32_clamp
3706 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3707 llvm_i32_ty, llvm_i32_ty], [],
3708 "llvm.nvvm.sust.b.2d.array.i32.clamp">,
3709 ClangBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">;
3710 def int_nvvm_sust_b_2d_array_i64_clamp
3711 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3712 llvm_i32_ty, llvm_i64_ty], [],
3713 "llvm.nvvm.sust.b.2d.array.i64.clamp">,
3714 ClangBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">;
3715 def int_nvvm_sust_b_2d_array_v2i8_clamp
3716 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3717 llvm_i16_ty, llvm_i16_ty], [],
3718 "llvm.nvvm.sust.b.2d.array.v2i8.clamp">,
3719 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">;
3720 def int_nvvm_sust_b_2d_array_v2i16_clamp
3721 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3722 llvm_i16_ty, llvm_i16_ty], [],
3723 "llvm.nvvm.sust.b.2d.array.v2i16.clamp">,
3724 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">;
3725 def int_nvvm_sust_b_2d_array_v2i32_clamp
3726 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3727 llvm_i32_ty, llvm_i32_ty], [],
3728 "llvm.nvvm.sust.b.2d.array.v2i32.clamp">,
3729 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">;
3730 def int_nvvm_sust_b_2d_array_v2i64_clamp
3731 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3732 llvm_i64_ty, llvm_i64_ty], [],
3733 "llvm.nvvm.sust.b.2d.array.v2i64.clamp">,
3734 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">;
3735 def int_nvvm_sust_b_2d_array_v4i8_clamp
3736 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3737 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3738 "llvm.nvvm.sust.b.2d.array.v4i8.clamp">,
3739 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">;
3740 def int_nvvm_sust_b_2d_array_v4i16_clamp
3741 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3742 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3743 "llvm.nvvm.sust.b.2d.array.v4i16.clamp">,
3744 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">;
3745 def int_nvvm_sust_b_2d_array_v4i32_clamp
3746 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3747 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3748 "llvm.nvvm.sust.b.2d.array.v4i32.clamp">,
3749 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">;
3750
3751
3752 def int_nvvm_sust_b_3d_i8_clamp
3753 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3754 llvm_i32_ty, llvm_i16_ty], [],
3755 "llvm.nvvm.sust.b.3d.i8.clamp">,
3756 ClangBuiltin<"__nvvm_sust_b_3d_i8_clamp">;
3757 def int_nvvm_sust_b_3d_i16_clamp
3758 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3759 llvm_i32_ty, llvm_i16_ty], [],
3760 "llvm.nvvm.sust.b.3d.i16.clamp">,
3761 ClangBuiltin<"__nvvm_sust_b_3d_i16_clamp">;
3762 def int_nvvm_sust_b_3d_i32_clamp
3763 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3764 llvm_i32_ty, llvm_i32_ty], [],
3765 "llvm.nvvm.sust.b.3d.i32.clamp">,
3766 ClangBuiltin<"__nvvm_sust_b_3d_i32_clamp">;
3767 def int_nvvm_sust_b_3d_i64_clamp
3768 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3769 llvm_i32_ty, llvm_i64_ty], [],
3770 "llvm.nvvm.sust.b.3d.i64.clamp">,
3771 ClangBuiltin<"__nvvm_sust_b_3d_i64_clamp">;
3772 def int_nvvm_sust_b_3d_v2i8_clamp
3773 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3774 llvm_i16_ty, llvm_i16_ty], [],
3775 "llvm.nvvm.sust.b.3d.v2i8.clamp">,
3776 ClangBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">;
3777 def int_nvvm_sust_b_3d_v2i16_clamp
3778 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3779 llvm_i16_ty, llvm_i16_ty], [],
3780 "llvm.nvvm.sust.b.3d.v2i16.clamp">,
3781 ClangBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">;
3782 def int_nvvm_sust_b_3d_v2i32_clamp
3783 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3784 llvm_i32_ty, llvm_i32_ty], [],
3785 "llvm.nvvm.sust.b.3d.v2i32.clamp">,
3786 ClangBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">;
3787 def int_nvvm_sust_b_3d_v2i64_clamp
3788 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3789 llvm_i64_ty, llvm_i64_ty], [],
3790 "llvm.nvvm.sust.b.3d.v2i64.clamp">,
3791 ClangBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">;
3792 def int_nvvm_sust_b_3d_v4i8_clamp
3793 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3794 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3795 "llvm.nvvm.sust.b.3d.v4i8.clamp">,
3796 ClangBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">;
3797 def int_nvvm_sust_b_3d_v4i16_clamp
3798 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3799 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3800 "llvm.nvvm.sust.b.3d.v4i16.clamp">,
3801 ClangBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">;
3802 def int_nvvm_sust_b_3d_v4i32_clamp
3803 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3804 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3805 "llvm.nvvm.sust.b.3d.v4i32.clamp">,
3806 ClangBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">;
3807
3808
3809 // .trap variant
3810 def int_nvvm_sust_b_1d_i8_trap
3811 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3812 "llvm.nvvm.sust.b.1d.i8.trap">,
3813 ClangBuiltin<"__nvvm_sust_b_1d_i8_trap">;
3814 def int_nvvm_sust_b_1d_i16_trap
3815 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3816 "llvm.nvvm.sust.b.1d.i16.trap">,
3817 ClangBuiltin<"__nvvm_sust_b_1d_i16_trap">;
3818 def int_nvvm_sust_b_1d_i32_trap
3819 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3820 "llvm.nvvm.sust.b.1d.i32.trap">,
3821 ClangBuiltin<"__nvvm_sust_b_1d_i32_trap">;
3822 def int_nvvm_sust_b_1d_i64_trap
3823 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3824 "llvm.nvvm.sust.b.1d.i64.trap">,
3825 ClangBuiltin<"__nvvm_sust_b_1d_i64_trap">;
3826 def int_nvvm_sust_b_1d_v2i8_trap
3827 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3828 "llvm.nvvm.sust.b.1d.v2i8.trap">,
3829 ClangBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
3830 def int_nvvm_sust_b_1d_v2i16_trap
3831 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3832 "llvm.nvvm.sust.b.1d.v2i16.trap">,
3833 ClangBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
3834 def int_nvvm_sust_b_1d_v2i32_trap
3835 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3836 "llvm.nvvm.sust.b.1d.v2i32.trap">,
3837 ClangBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
3838 def int_nvvm_sust_b_1d_v2i64_trap
3839 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3840 "llvm.nvvm.sust.b.1d.v2i64.trap">,
3841 ClangBuiltin<"__nvvm_sust_b_1d_v2i64_trap">;
3842 def int_nvvm_sust_b_1d_v4i8_trap
3843 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3844 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3845 "llvm.nvvm.sust.b.1d.v4i8.trap">,
3846 ClangBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
3847 def int_nvvm_sust_b_1d_v4i16_trap
3848 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3849 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3850 "llvm.nvvm.sust.b.1d.v4i16.trap">,
3851 ClangBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
3852 def int_nvvm_sust_b_1d_v4i32_trap
3853 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3854 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3855 "llvm.nvvm.sust.b.1d.v4i32.trap">,
3856 ClangBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
3857
3858
3859 def int_nvvm_sust_b_1d_array_i8_trap
3860 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3861 "llvm.nvvm.sust.b.1d.array.i8.trap">,
3862 ClangBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
3863 def int_nvvm_sust_b_1d_array_i16_trap
3864 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3865 "llvm.nvvm.sust.b.1d.array.i16.trap">,
3866 ClangBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
3867 def int_nvvm_sust_b_1d_array_i32_trap
3868 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3869 "llvm.nvvm.sust.b.1d.array.i32.trap">,
3870 ClangBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
3871 def int_nvvm_sust_b_1d_array_i64_trap
3872 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3873 "llvm.nvvm.sust.b.1d.array.i64.trap">,
3874 ClangBuiltin<"__nvvm_sust_b_1d_array_i64_trap">;
3875 def int_nvvm_sust_b_1d_array_v2i8_trap
3876 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3877 llvm_i16_ty, llvm_i16_ty], [],
3878 "llvm.nvvm.sust.b.1d.array.v2i8.trap">,
3879 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
3880 def int_nvvm_sust_b_1d_array_v2i16_trap
3881 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3882 llvm_i16_ty, llvm_i16_ty], [],
3883 "llvm.nvvm.sust.b.1d.array.v2i16.trap">,
3884 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
3885 def int_nvvm_sust_b_1d_array_v2i32_trap
3886 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3887 llvm_i32_ty, llvm_i32_ty], [],
3888 "llvm.nvvm.sust.b.1d.array.v2i32.trap">,
3889 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
3890 def int_nvvm_sust_b_1d_array_v2i64_trap
3891 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3892 llvm_i64_ty, llvm_i64_ty], [],
3893 "llvm.nvvm.sust.b.1d.array.v2i64.trap">,
3894 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">;
3895 def int_nvvm_sust_b_1d_array_v4i8_trap
3896 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3897 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3898 "llvm.nvvm.sust.b.1d.array.v4i8.trap">,
3899 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
3900 def int_nvvm_sust_b_1d_array_v4i16_trap
3901 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3902 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3903 "llvm.nvvm.sust.b.1d.array.v4i16.trap">,
3904 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
3905 def int_nvvm_sust_b_1d_array_v4i32_trap
3906 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3907 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3908 "llvm.nvvm.sust.b.1d.array.v4i32.trap">,
3909 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
3910
3911
3912 def int_nvvm_sust_b_2d_i8_trap
3913 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3914 "llvm.nvvm.sust.b.2d.i8.trap">,
3915 ClangBuiltin<"__nvvm_sust_b_2d_i8_trap">;
3916 def int_nvvm_sust_b_2d_i16_trap
3917 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3918 "llvm.nvvm.sust.b.2d.i16.trap">,
3919 ClangBuiltin<"__nvvm_sust_b_2d_i16_trap">;
3920 def int_nvvm_sust_b_2d_i32_trap
3921 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3922 "llvm.nvvm.sust.b.2d.i32.trap">,
3923 ClangBuiltin<"__nvvm_sust_b_2d_i32_trap">;
3924 def int_nvvm_sust_b_2d_i64_trap
3925 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3926 "llvm.nvvm.sust.b.2d.i64.trap">,
3927 ClangBuiltin<"__nvvm_sust_b_2d_i64_trap">;
3928 def int_nvvm_sust_b_2d_v2i8_trap
3929 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3930 llvm_i16_ty, llvm_i16_ty], [],
3931 "llvm.nvvm.sust.b.2d.v2i8.trap">,
3932 ClangBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
3933 def int_nvvm_sust_b_2d_v2i16_trap
3934 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3935 llvm_i16_ty, llvm_i16_ty], [],
3936 "llvm.nvvm.sust.b.2d.v2i16.trap">,
3937 ClangBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
3938 def int_nvvm_sust_b_2d_v2i32_trap
3939 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3940 llvm_i32_ty, llvm_i32_ty], [],
3941 "llvm.nvvm.sust.b.2d.v2i32.trap">,
3942 ClangBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
3943 def int_nvvm_sust_b_2d_v2i64_trap
3944 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3945 llvm_i64_ty, llvm_i64_ty], [],
3946 "llvm.nvvm.sust.b.2d.v2i64.trap">,
3947 ClangBuiltin<"__nvvm_sust_b_2d_v2i64_trap">;
3948 def int_nvvm_sust_b_2d_v4i8_trap
3949 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3950 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3951 "llvm.nvvm.sust.b.2d.v4i8.trap">,
3952 ClangBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
3953 def int_nvvm_sust_b_2d_v4i16_trap
3954 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3955 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3956 "llvm.nvvm.sust.b.2d.v4i16.trap">,
3957 ClangBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
3958 def int_nvvm_sust_b_2d_v4i32_trap
3959 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3960 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3961 "llvm.nvvm.sust.b.2d.v4i32.trap">,
3962 ClangBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
3963
3964
3965 def int_nvvm_sust_b_2d_array_i8_trap
3966 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3967 llvm_i32_ty, llvm_i16_ty], [],
3968 "llvm.nvvm.sust.b.2d.array.i8.trap">,
3969 ClangBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
3970 def int_nvvm_sust_b_2d_array_i16_trap
3971 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3972 llvm_i32_ty, llvm_i16_ty], [],
3973 "llvm.nvvm.sust.b.2d.array.i16.trap">,
3974 ClangBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
3975 def int_nvvm_sust_b_2d_array_i32_trap
3976 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3977 llvm_i32_ty, llvm_i32_ty], [],
3978 "llvm.nvvm.sust.b.2d.array.i32.trap">,
3979 ClangBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
3980 def int_nvvm_sust_b_2d_array_i64_trap
3981 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3982 llvm_i32_ty, llvm_i64_ty], [],
3983 "llvm.nvvm.sust.b.2d.array.i64.trap">,
3984 ClangBuiltin<"__nvvm_sust_b_2d_array_i64_trap">;
3985 def int_nvvm_sust_b_2d_array_v2i8_trap
3986 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3987 llvm_i16_ty, llvm_i16_ty], [],
3988 "llvm.nvvm.sust.b.2d.array.v2i8.trap">,
3989 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
3990 def int_nvvm_sust_b_2d_array_v2i16_trap
3991 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3992 llvm_i16_ty, llvm_i16_ty], [],
3993 "llvm.nvvm.sust.b.2d.array.v2i16.trap">,
3994 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
3995 def int_nvvm_sust_b_2d_array_v2i32_trap
3996 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3997 llvm_i32_ty, llvm_i32_ty], [],
3998 "llvm.nvvm.sust.b.2d.array.v2i32.trap">,
3999 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
4000 def int_nvvm_sust_b_2d_array_v2i64_trap
4001 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4002 llvm_i64_ty, llvm_i64_ty], [],
4003 "llvm.nvvm.sust.b.2d.array.v2i64.trap">,
4004 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">;
4005 def int_nvvm_sust_b_2d_array_v4i8_trap
4006 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4007 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4008 "llvm.nvvm.sust.b.2d.array.v4i8.trap">,
4009 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
4010 def int_nvvm_sust_b_2d_array_v4i16_trap
4011 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4012 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4013 "llvm.nvvm.sust.b.2d.array.v4i16.trap">,
4014 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
4015 def int_nvvm_sust_b_2d_array_v4i32_trap
4016 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4017 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4018 "llvm.nvvm.sust.b.2d.array.v4i32.trap">,
4019 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
4020
4021
4022 def int_nvvm_sust_b_3d_i8_trap
4023 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4024 llvm_i32_ty, llvm_i16_ty], [],
4025 "llvm.nvvm.sust.b.3d.i8.trap">,
4026 ClangBuiltin<"__nvvm_sust_b_3d_i8_trap">;
4027 def int_nvvm_sust_b_3d_i16_trap
4028 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4029 llvm_i32_ty, llvm_i16_ty], [],
4030 "llvm.nvvm.sust.b.3d.i16.trap">,
4031 ClangBuiltin<"__nvvm_sust_b_3d_i16_trap">;
4032 def int_nvvm_sust_b_3d_i32_trap
4033 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4034 llvm_i32_ty, llvm_i32_ty], [],
4035 "llvm.nvvm.sust.b.3d.i32.trap">,
4036 ClangBuiltin<"__nvvm_sust_b_3d_i32_trap">;
4037 def int_nvvm_sust_b_3d_i64_trap
4038 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4039 llvm_i32_ty, llvm_i64_ty], [],
4040 "llvm.nvvm.sust.b.3d.i64.trap">,
4041 ClangBuiltin<"__nvvm_sust_b_3d_i64_trap">;
4042 def int_nvvm_sust_b_3d_v2i8_trap
4043 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4044 llvm_i16_ty, llvm_i16_ty], [],
4045 "llvm.nvvm.sust.b.3d.v2i8.trap">,
4046 ClangBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
4047 def int_nvvm_sust_b_3d_v2i16_trap
4048 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4049 llvm_i16_ty, llvm_i16_ty], [],
4050 "llvm.nvvm.sust.b.3d.v2i16.trap">,
4051 ClangBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
4052 def int_nvvm_sust_b_3d_v2i32_trap
4053 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4054 llvm_i32_ty, llvm_i32_ty], [],
4055 "llvm.nvvm.sust.b.3d.v2i32.trap">,
4056 ClangBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
4057 def int_nvvm_sust_b_3d_v2i64_trap
4058 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4059 llvm_i64_ty, llvm_i64_ty], [],
4060 "llvm.nvvm.sust.b.3d.v2i64.trap">,
4061 ClangBuiltin<"__nvvm_sust_b_3d_v2i64_trap">;
4062 def int_nvvm_sust_b_3d_v4i8_trap
4063 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4064 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4065 "llvm.nvvm.sust.b.3d.v4i8.trap">,
4066 ClangBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
4067 def int_nvvm_sust_b_3d_v4i16_trap
4068 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4069 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4070 "llvm.nvvm.sust.b.3d.v4i16.trap">,
4071 ClangBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
4072 def int_nvvm_sust_b_3d_v4i32_trap
4073 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4074 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4075 "llvm.nvvm.sust.b.3d.v4i32.trap">,
4076 ClangBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
4077
4078
4079 // .zero variant
4080 def int_nvvm_sust_b_1d_i8_zero
4081 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4082 "llvm.nvvm.sust.b.1d.i8.zero">,
4083 ClangBuiltin<"__nvvm_sust_b_1d_i8_zero">;
4084 def int_nvvm_sust_b_1d_i16_zero
4085 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4086 "llvm.nvvm.sust.b.1d.i16.zero">,
4087 ClangBuiltin<"__nvvm_sust_b_1d_i16_zero">;
4088 def int_nvvm_sust_b_1d_i32_zero
4089 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4090 "llvm.nvvm.sust.b.1d.i32.zero">,
4091 ClangBuiltin<"__nvvm_sust_b_1d_i32_zero">;
4092 def int_nvvm_sust_b_1d_i64_zero
4093 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
4094 "llvm.nvvm.sust.b.1d.i64.zero">,
4095 ClangBuiltin<"__nvvm_sust_b_1d_i64_zero">;
4096 def int_nvvm_sust_b_1d_v2i8_zero
4097 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4098 "llvm.nvvm.sust.b.1d.v2i8.zero">,
4099 ClangBuiltin<"__nvvm_sust_b_1d_v2i8_zero">;
4100 def int_nvvm_sust_b_1d_v2i16_zero
4101 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4102 "llvm.nvvm.sust.b.1d.v2i16.zero">,
4103 ClangBuiltin<"__nvvm_sust_b_1d_v2i16_zero">;
4104 def int_nvvm_sust_b_1d_v2i32_zero
4105 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4106 "llvm.nvvm.sust.b.1d.v2i32.zero">,
4107 ClangBuiltin<"__nvvm_sust_b_1d_v2i32_zero">;
4108 def int_nvvm_sust_b_1d_v2i64_zero
4109 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
4110 "llvm.nvvm.sust.b.1d.v2i64.zero">,
4111 ClangBuiltin<"__nvvm_sust_b_1d_v2i64_zero">;
4112 def int_nvvm_sust_b_1d_v4i8_zero
4113 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4114 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4115 "llvm.nvvm.sust.b.1d.v4i8.zero">,
4116 ClangBuiltin<"__nvvm_sust_b_1d_v4i8_zero">;
4117 def int_nvvm_sust_b_1d_v4i16_zero
4118 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4119 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4120 "llvm.nvvm.sust.b.1d.v4i16.zero">,
4121 ClangBuiltin<"__nvvm_sust_b_1d_v4i16_zero">;
4122 def int_nvvm_sust_b_1d_v4i32_zero
4123 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4124 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4125 "llvm.nvvm.sust.b.1d.v4i32.zero">,
4126 ClangBuiltin<"__nvvm_sust_b_1d_v4i32_zero">;
4127
4128
4129 def int_nvvm_sust_b_1d_array_i8_zero
4130 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4131 "llvm.nvvm.sust.b.1d.array.i8.zero">,
4132 ClangBuiltin<"__nvvm_sust_b_1d_array_i8_zero">;
4133 def int_nvvm_sust_b_1d_array_i16_zero
4134 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4135 "llvm.nvvm.sust.b.1d.array.i16.zero">,
4136 ClangBuiltin<"__nvvm_sust_b_1d_array_i16_zero">;
4137 def int_nvvm_sust_b_1d_array_i32_zero
4138 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4139 "llvm.nvvm.sust.b.1d.array.i32.zero">,
4140 ClangBuiltin<"__nvvm_sust_b_1d_array_i32_zero">;
4141 def int_nvvm_sust_b_1d_array_i64_zero
4142 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4143 "llvm.nvvm.sust.b.1d.array.i64.zero">,
4144 ClangBuiltin<"__nvvm_sust_b_1d_array_i64_zero">;
4145 def int_nvvm_sust_b_1d_array_v2i8_zero
4146 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4147 llvm_i16_ty, llvm_i16_ty], [],
4148 "llvm.nvvm.sust.b.1d.array.v2i8.zero">,
4149 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">;
4150 def int_nvvm_sust_b_1d_array_v2i16_zero
4151 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4152 llvm_i16_ty, llvm_i16_ty], [],
4153 "llvm.nvvm.sust.b.1d.array.v2i16.zero">,
4154 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">;
4155 def int_nvvm_sust_b_1d_array_v2i32_zero
4156 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4157 llvm_i32_ty, llvm_i32_ty], [],
4158 "llvm.nvvm.sust.b.1d.array.v2i32.zero">,
4159 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">;
4160 def int_nvvm_sust_b_1d_array_v2i64_zero
4161 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4162 llvm_i64_ty, llvm_i64_ty], [],
4163 "llvm.nvvm.sust.b.1d.array.v2i64.zero">,
4164 ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">;
4165 def int_nvvm_sust_b_1d_array_v4i8_zero
4166 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4167 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4168 "llvm.nvvm.sust.b.1d.array.v4i8.zero">,
4169 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">;
4170 def int_nvvm_sust_b_1d_array_v4i16_zero
4171 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4172 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4173 "llvm.nvvm.sust.b.1d.array.v4i16.zero">,
4174 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">;
4175 def int_nvvm_sust_b_1d_array_v4i32_zero
4176 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4177 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4178 "llvm.nvvm.sust.b.1d.array.v4i32.zero">,
4179 ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">;
4180
4181
4182 def int_nvvm_sust_b_2d_i8_zero
4183 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4184 "llvm.nvvm.sust.b.2d.i8.zero">,
4185 ClangBuiltin<"__nvvm_sust_b_2d_i8_zero">;
4186 def int_nvvm_sust_b_2d_i16_zero
4187 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4188 "llvm.nvvm.sust.b.2d.i16.zero">,
4189 ClangBuiltin<"__nvvm_sust_b_2d_i16_zero">;
4190 def int_nvvm_sust_b_2d_i32_zero
4191 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4192 "llvm.nvvm.sust.b.2d.i32.zero">,
4193 ClangBuiltin<"__nvvm_sust_b_2d_i32_zero">;
4194 def int_nvvm_sust_b_2d_i64_zero
4195 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4196 "llvm.nvvm.sust.b.2d.i64.zero">,
4197 ClangBuiltin<"__nvvm_sust_b_2d_i64_zero">;
4198 def int_nvvm_sust_b_2d_v2i8_zero
4199 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4200 llvm_i16_ty, llvm_i16_ty], [],
4201 "llvm.nvvm.sust.b.2d.v2i8.zero">,
4202 ClangBuiltin<"__nvvm_sust_b_2d_v2i8_zero">;
4203 def int_nvvm_sust_b_2d_v2i16_zero
4204 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4205 llvm_i16_ty, llvm_i16_ty], [],
4206 "llvm.nvvm.sust.b.2d.v2i16.zero">,
4207 ClangBuiltin<"__nvvm_sust_b_2d_v2i16_zero">;
4208 def int_nvvm_sust_b_2d_v2i32_zero
4209 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4210 llvm_i32_ty, llvm_i32_ty], [],
4211 "llvm.nvvm.sust.b.2d.v2i32.zero">,
4212 ClangBuiltin<"__nvvm_sust_b_2d_v2i32_zero">;
4213 def int_nvvm_sust_b_2d_v2i64_zero
4214 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4215 llvm_i64_ty, llvm_i64_ty], [],
4216 "llvm.nvvm.sust.b.2d.v2i64.zero">,
4217 ClangBuiltin<"__nvvm_sust_b_2d_v2i64_zero">;
4218 def int_nvvm_sust_b_2d_v4i8_zero
4219 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4220 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4221 "llvm.nvvm.sust.b.2d.v4i8.zero">,
4222 ClangBuiltin<"__nvvm_sust_b_2d_v4i8_zero">;
4223 def int_nvvm_sust_b_2d_v4i16_zero
4224 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4225 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4226 "llvm.nvvm.sust.b.2d.v4i16.zero">,
4227 ClangBuiltin<"__nvvm_sust_b_2d_v4i16_zero">;
4228 def int_nvvm_sust_b_2d_v4i32_zero
4229 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4230 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4231 "llvm.nvvm.sust.b.2d.v4i32.zero">,
4232 ClangBuiltin<"__nvvm_sust_b_2d_v4i32_zero">;
4233
4234
4235 def int_nvvm_sust_b_2d_array_i8_zero
4236 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4237 llvm_i32_ty, llvm_i16_ty], [],
4238 "llvm.nvvm.sust.b.2d.array.i8.zero">,
4239 ClangBuiltin<"__nvvm_sust_b_2d_array_i8_zero">;
4240 def int_nvvm_sust_b_2d_array_i16_zero
4241 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4242 llvm_i32_ty, llvm_i16_ty], [],
4243 "llvm.nvvm.sust.b.2d.array.i16.zero">,
4244 ClangBuiltin<"__nvvm_sust_b_2d_array_i16_zero">;
4245 def int_nvvm_sust_b_2d_array_i32_zero
4246 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4247 llvm_i32_ty, llvm_i32_ty], [],
4248 "llvm.nvvm.sust.b.2d.array.i32.zero">,
4249 ClangBuiltin<"__nvvm_sust_b_2d_array_i32_zero">;
4250 def int_nvvm_sust_b_2d_array_i64_zero
4251 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4252 llvm_i32_ty, llvm_i64_ty], [],
4253 "llvm.nvvm.sust.b.2d.array.i64.zero">,
4254 ClangBuiltin<"__nvvm_sust_b_2d_array_i64_zero">;
4255 def int_nvvm_sust_b_2d_array_v2i8_zero
4256 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4257 llvm_i16_ty, llvm_i16_ty], [],
4258 "llvm.nvvm.sust.b.2d.array.v2i8.zero">,
4259 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">;
4260 def int_nvvm_sust_b_2d_array_v2i16_zero
4261 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4262 llvm_i16_ty, llvm_i16_ty], [],
4263 "llvm.nvvm.sust.b.2d.array.v2i16.zero">,
4264 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">;
4265 def int_nvvm_sust_b_2d_array_v2i32_zero
4266 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4267 llvm_i32_ty, llvm_i32_ty], [],
4268 "llvm.nvvm.sust.b.2d.array.v2i32.zero">,
4269 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">;
4270 def int_nvvm_sust_b_2d_array_v2i64_zero
4271 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4272 llvm_i64_ty, llvm_i64_ty], [],
4273 "llvm.nvvm.sust.b.2d.array.v2i64.zero">,
4274 ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">;
4275 def int_nvvm_sust_b_2d_array_v4i8_zero
4276 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4277 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4278 "llvm.nvvm.sust.b.2d.array.v4i8.zero">,
4279 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">;
4280 def int_nvvm_sust_b_2d_array_v4i16_zero
4281 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4282 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4283 "llvm.nvvm.sust.b.2d.array.v4i16.zero">,
4284 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">;
4285 def int_nvvm_sust_b_2d_array_v4i32_zero
4286 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4287 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4288 "llvm.nvvm.sust.b.2d.array.v4i32.zero">,
4289 ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">;
4290
4291
4292 def int_nvvm_sust_b_3d_i8_zero
4293 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4294 llvm_i32_ty, llvm_i16_ty], [],
4295 "llvm.nvvm.sust.b.3d.i8.zero">,
4296 ClangBuiltin<"__nvvm_sust_b_3d_i8_zero">;
4297 def int_nvvm_sust_b_3d_i16_zero
4298 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4299 llvm_i32_ty, llvm_i16_ty], [],
4300 "llvm.nvvm.sust.b.3d.i16.zero">,
4301 ClangBuiltin<"__nvvm_sust_b_3d_i16_zero">;
4302 def int_nvvm_sust_b_3d_i32_zero
4303 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4304 llvm_i32_ty, llvm_i32_ty], [],
4305 "llvm.nvvm.sust.b.3d.i32.zero">,
4306 ClangBuiltin<"__nvvm_sust_b_3d_i32_zero">;
4307 def int_nvvm_sust_b_3d_i64_zero
4308 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4309 llvm_i32_ty, llvm_i64_ty], [],
4310 "llvm.nvvm.sust.b.3d.i64.zero">,
4311 ClangBuiltin<"__nvvm_sust_b_3d_i64_zero">;
4312 def int_nvvm_sust_b_3d_v2i8_zero
4313 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4314 llvm_i16_ty, llvm_i16_ty], [],
4315 "llvm.nvvm.sust.b.3d.v2i8.zero">,
4316 ClangBuiltin<"__nvvm_sust_b_3d_v2i8_zero">;
4317 def int_nvvm_sust_b_3d_v2i16_zero
4318 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4319 llvm_i16_ty, llvm_i16_ty], [],
4320 "llvm.nvvm.sust.b.3d.v2i16.zero">,
4321 ClangBuiltin<"__nvvm_sust_b_3d_v2i16_zero">;
4322 def int_nvvm_sust_b_3d_v2i32_zero
4323 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4324 llvm_i32_ty, llvm_i32_ty], [],
4325 "llvm.nvvm.sust.b.3d.v2i32.zero">,
4326 ClangBuiltin<"__nvvm_sust_b_3d_v2i32_zero">;
4327 def int_nvvm_sust_b_3d_v2i64_zero
4328 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4329 llvm_i64_ty, llvm_i64_ty], [],
4330 "llvm.nvvm.sust.b.3d.v2i64.zero">,
4331 ClangBuiltin<"__nvvm_sust_b_3d_v2i64_zero">;
4332 def int_nvvm_sust_b_3d_v4i8_zero
4333 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4334 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4335 "llvm.nvvm.sust.b.3d.v4i8.zero">,
4336 ClangBuiltin<"__nvvm_sust_b_3d_v4i8_zero">;
4337 def int_nvvm_sust_b_3d_v4i16_zero
4338 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4339 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4340 "llvm.nvvm.sust.b.3d.v4i16.zero">,
4341 ClangBuiltin<"__nvvm_sust_b_3d_v4i16_zero">;
4342 def int_nvvm_sust_b_3d_v4i32_zero
4343 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4344 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4345 "llvm.nvvm.sust.b.3d.v4i32.zero">,
4346 ClangBuiltin<"__nvvm_sust_b_3d_v4i32_zero">;
4347
4348
4349
4350 // Formatted
4351
4352 def int_nvvm_sust_p_1d_i8_trap
4353 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4354 "llvm.nvvm.sust.p.1d.i8.trap">,
4355 ClangBuiltin<"__nvvm_sust_p_1d_i8_trap">;
4356 def int_nvvm_sust_p_1d_i16_trap
4357 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4358 "llvm.nvvm.sust.p.1d.i16.trap">,
4359 ClangBuiltin<"__nvvm_sust_p_1d_i16_trap">;
4360 def int_nvvm_sust_p_1d_i32_trap
4361 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4362 "llvm.nvvm.sust.p.1d.i32.trap">,
4363 ClangBuiltin<"__nvvm_sust_p_1d_i32_trap">;
4364 def int_nvvm_sust_p_1d_v2i8_trap
4365 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4366 "llvm.nvvm.sust.p.1d.v2i8.trap">,
4367 ClangBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
4368 def int_nvvm_sust_p_1d_v2i16_trap
4369 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4370 "llvm.nvvm.sust.p.1d.v2i16.trap">,
4371 ClangBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
4372 def int_nvvm_sust_p_1d_v2i32_trap
4373 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4374 "llvm.nvvm.sust.p.1d.v2i32.trap">,
4375 ClangBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
4376 def int_nvvm_sust_p_1d_v4i8_trap
4377 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4378 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4379 "llvm.nvvm.sust.p.1d.v4i8.trap">,
4380 ClangBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
4381 def int_nvvm_sust_p_1d_v4i16_trap
4382 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4383 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4384 "llvm.nvvm.sust.p.1d.v4i16.trap">,
4385 ClangBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
4386 def int_nvvm_sust_p_1d_v4i32_trap
4387 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4388 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4389 "llvm.nvvm.sust.p.1d.v4i32.trap">,
4390 ClangBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
4391
4392
4393 def int_nvvm_sust_p_1d_array_i8_trap
4394 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4395 "llvm.nvvm.sust.p.1d.array.i8.trap">,
4396 ClangBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
4397 def int_nvvm_sust_p_1d_array_i16_trap
4398 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4399 "llvm.nvvm.sust.p.1d.array.i16.trap">,
4400 ClangBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
4401 def int_nvvm_sust_p_1d_array_i32_trap
4402 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4403 "llvm.nvvm.sust.p.1d.array.i32.trap">,
4404 ClangBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
4405 def int_nvvm_sust_p_1d_array_v2i8_trap
4406 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4407 llvm_i16_ty, llvm_i16_ty], [],
4408 "llvm.nvvm.sust.p.1d.array.v2i8.trap">,
4409 ClangBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
4410 def int_nvvm_sust_p_1d_array_v2i16_trap
4411 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4412 llvm_i16_ty, llvm_i16_ty], [],
4413 "llvm.nvvm.sust.p.1d.array.v2i16.trap">,
4414 ClangBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
4415 def int_nvvm_sust_p_1d_array_v2i32_trap
4416 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4417 llvm_i32_ty, llvm_i32_ty], [],
4418 "llvm.nvvm.sust.p.1d.array.v2i32.trap">,
4419 ClangBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
4420 def int_nvvm_sust_p_1d_array_v4i8_trap
4421 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4422 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4423 "llvm.nvvm.sust.p.1d.array.v4i8.trap">,
4424 ClangBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
4425 def int_nvvm_sust_p_1d_array_v4i16_trap
4426 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4427 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4428 "llvm.nvvm.sust.p.1d.array.v4i16.trap">,
4429 ClangBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
4430 def int_nvvm_sust_p_1d_array_v4i32_trap
4431 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4432 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4433 "llvm.nvvm.sust.p.1d.array.v4i32.trap">,
4434 ClangBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
4435
4436
4437 def int_nvvm_sust_p_2d_i8_trap
4438 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4439 "llvm.nvvm.sust.p.2d.i8.trap">,
4440 ClangBuiltin<"__nvvm_sust_p_2d_i8_trap">;
4441 def int_nvvm_sust_p_2d_i16_trap
4442 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4443 "llvm.nvvm.sust.p.2d.i16.trap">,
4444 ClangBuiltin<"__nvvm_sust_p_2d_i16_trap">;
4445 def int_nvvm_sust_p_2d_i32_trap
4446 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4447 "llvm.nvvm.sust.p.2d.i32.trap">,
4448 ClangBuiltin<"__nvvm_sust_p_2d_i32_trap">;
4449 def int_nvvm_sust_p_2d_v2i8_trap
4450 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4451 llvm_i16_ty, llvm_i16_ty], [],
4452 "llvm.nvvm.sust.p.2d.v2i8.trap">,
4453 ClangBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
4454 def int_nvvm_sust_p_2d_v2i16_trap
4455 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4456 llvm_i16_ty, llvm_i16_ty], [],
4457 "llvm.nvvm.sust.p.2d.v2i16.trap">,
4458 ClangBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
4459 def int_nvvm_sust_p_2d_v2i32_trap
4460 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4461 llvm_i32_ty, llvm_i32_ty], [],
4462 "llvm.nvvm.sust.p.2d.v2i32.trap">,
4463 ClangBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
4464 def int_nvvm_sust_p_2d_v4i8_trap
4465 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4466 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4467 "llvm.nvvm.sust.p.2d.v4i8.trap">,
4468 ClangBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
4469 def int_nvvm_sust_p_2d_v4i16_trap
4470 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4471 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4472 "llvm.nvvm.sust.p.2d.v4i16.trap">,
4473 ClangBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
4474 def int_nvvm_sust_p_2d_v4i32_trap
4475 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4476 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4477 "llvm.nvvm.sust.p.2d.v4i32.trap">,
4478 ClangBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
4479
4480
4481 def int_nvvm_sust_p_2d_array_i8_trap
4482 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4483 llvm_i32_ty, llvm_i16_ty], [],
4484 "llvm.nvvm.sust.p.2d.array.i8.trap">,
4485 ClangBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
4486 def int_nvvm_sust_p_2d_array_i16_trap
4487 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4488 llvm_i32_ty, llvm_i16_ty], [],
4489 "llvm.nvvm.sust.p.2d.array.i16.trap">,
4490 ClangBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
4491 def int_nvvm_sust_p_2d_array_i32_trap
4492 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4493 llvm_i32_ty, llvm_i32_ty], [],
4494 "llvm.nvvm.sust.p.2d.array.i32.trap">,
4495 ClangBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
4496 def int_nvvm_sust_p_2d_array_v2i8_trap
4497 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4498 llvm_i16_ty, llvm_i16_ty], [],
4499 "llvm.nvvm.sust.p.2d.array.v2i8.trap">,
4500 ClangBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
4501 def int_nvvm_sust_p_2d_array_v2i16_trap
4502 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4503 llvm_i16_ty, llvm_i16_ty], [],
4504 "llvm.nvvm.sust.p.2d.array.v2i16.trap">,
4505 ClangBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
4506 def int_nvvm_sust_p_2d_array_v2i32_trap
4507 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4508 llvm_i32_ty, llvm_i32_ty], [],
4509 "llvm.nvvm.sust.p.2d.array.v2i32.trap">,
4510 ClangBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
4511 def int_nvvm_sust_p_2d_array_v4i8_trap
4512 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4513 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4514 "llvm.nvvm.sust.p.2d.array.v4i8.trap">,
4515 ClangBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
4516 def int_nvvm_sust_p_2d_array_v4i16_trap
4517 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4518 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4519 "llvm.nvvm.sust.p.2d.array.v4i16.trap">,
4520 ClangBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
4521 def int_nvvm_sust_p_2d_array_v4i32_trap
4522 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4523 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4524 "llvm.nvvm.sust.p.2d.array.v4i32.trap">,
4525 ClangBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
4526
4527
4528 def int_nvvm_sust_p_3d_i8_trap
4529 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4530 llvm_i32_ty, llvm_i16_ty], [],
4531 "llvm.nvvm.sust.p.3d.i8.trap">,
4532 ClangBuiltin<"__nvvm_sust_p_3d_i8_trap">;
4533 def int_nvvm_sust_p_3d_i16_trap
4534 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4535 llvm_i32_ty, llvm_i16_ty], [],
4536 "llvm.nvvm.sust.p.3d.i16.trap">,
4537 ClangBuiltin<"__nvvm_sust_p_3d_i16_trap">;
4538 def int_nvvm_sust_p_3d_i32_trap
4539 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4540 llvm_i32_ty, llvm_i32_ty], [],
4541 "llvm.nvvm.sust.p.3d.i32.trap">,
4542 ClangBuiltin<"__nvvm_sust_p_3d_i32_trap">;
4543 def int_nvvm_sust_p_3d_v2i8_trap
4544 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4545 llvm_i16_ty, llvm_i16_ty], [],
4546 "llvm.nvvm.sust.p.3d.v2i8.trap">,
4547 ClangBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
4548 def int_nvvm_sust_p_3d_v2i16_trap
4549 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4550 llvm_i16_ty, llvm_i16_ty], [],
4551 "llvm.nvvm.sust.p.3d.v2i16.trap">,
4552 ClangBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
4553 def int_nvvm_sust_p_3d_v2i32_trap
4554 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4555 llvm_i32_ty, llvm_i32_ty], [],
4556 "llvm.nvvm.sust.p.3d.v2i32.trap">,
4557 ClangBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
4558 def int_nvvm_sust_p_3d_v4i8_trap
4559 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4560 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4561 "llvm.nvvm.sust.p.3d.v4i8.trap">,
4562 ClangBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
4563 def int_nvvm_sust_p_3d_v4i16_trap
4564 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4565 llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4566 "llvm.nvvm.sust.p.3d.v4i16.trap">,
4567 ClangBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
4568 def int_nvvm_sust_p_3d_v4i32_trap
4569 : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4570 llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4571 "llvm.nvvm.sust.p.3d.v4i32.trap">,
4572 ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
4573
4574 def int_nvvm_swap_lo_hi_b64
4575 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
4576 [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
4577 ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
4578
4579
4580 // Accessing special registers.
4581
4582 class PTXReadSRegIntrinsicNB_r32
4583 : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
4584 class PTXReadSRegIntrinsic_r32<string name>
4585 : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4586
4587 multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
4588 // FIXME: Do we need the 128-bit integer type version?
4589 // def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
4590
4591 // FIXME: Enable this once v4i32 support is enabled in back-end.
4592 // def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
4593 foreach suffix = ["_x", "_y", "_z", "_w"] in
4594 def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
4595 }
4596
4597 // Same, but without automatic clang builtins. It will be used for
4598 // registers that require particular GPU or PTX version.
4599 multiclass PTXReadSRegIntrinsicNB_v4i32 {
4600 foreach suffix = ["_x", "_y", "_z", "_w"] in
4601 def suffix : PTXReadSRegIntrinsicNB_r32;
4602 }
4603
4604 class PTXReadSRegIntrinsic_r64<string name>
4605 : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4606 ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4607
4608 // Intrinsics to read registers with non-constant values. E.g. the values that
4609 // do change over the kernel lifetime. Such reads should not be CSE'd.
4610 class PTXReadNCSRegIntrinsic_r32<string name>
4611 : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4612 ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4613 class PTXReadNCSRegIntrinsic_r64<string name>
4614 : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4615 ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4616
4617 defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
4618 defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
4619
4620 def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
4621 def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
4622 def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
4623
4624 defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
4625 defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
4626
4627 def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
4628 def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
4629 def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
4630
4631 def int_nvvm_read_ptx_sreg_lanemask_eq :
4632 PTXReadSRegIntrinsic_r32<"lanemask_eq">;
4633 def int_nvvm_read_ptx_sreg_lanemask_le :
4634 PTXReadSRegIntrinsic_r32<"lanemask_le">;
4635 def int_nvvm_read_ptx_sreg_lanemask_lt :
4636 PTXReadSRegIntrinsic_r32<"lanemask_lt">;
4637 def int_nvvm_read_ptx_sreg_lanemask_ge :
4638 PTXReadSRegIntrinsic_r32<"lanemask_ge">;
4639 def int_nvvm_read_ptx_sreg_lanemask_gt :
4640 PTXReadSRegIntrinsic_r32<"lanemask_gt">;
4641
4642 def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
4643 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
4644
4645 def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
4646
4647 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
4648 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
4649 def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
4650 def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
4651
4652 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
4653
4654 // sm90+, PTX7.8+
4655 defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
4656 defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
4657 defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
4658 defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
4659
4660 def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
4661 def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
4662
4663 //
4664 // SHUFFLE
4665 //
4666 // Generate intrinsics for all variants of shfl instruction.
4667 foreach sync = [false, true] in {
4668 foreach mode = ["up", "down", "bfly", "idx"] in {
4669 foreach type = ["i32", "f32"] in {
4670 foreach return_pred = [false, true] in {
4671 foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
4672 if i.withGccBuiltin then {
4673 def i.Name : ClangBuiltin<i.Builtin>,
4674 Intrinsic<i.RetTy, i.ArgsTy,
4675 [IntrInaccessibleMemOnly, IntrConvergent,
4676 IntrNoCallback],
4677 i.IntrName>;
4678 }
4679 if i.withoutGccBuiltin then {
4680 def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
4681 [IntrInaccessibleMemOnly, IntrConvergent,
4682 IntrNoCallback], i.IntrName>;
4683 }
4684 }
4685 }
4686 }
4687 }
4688 }
4689
4690 //
4691 // VOTE
4692 //
4693
4694 // vote.all pred
4695 def int_nvvm_vote_all :
4696 Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4697 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">,
4698 ClangBuiltin<"__nvvm_vote_all">;
4699 // vote.any pred
4700 def int_nvvm_vote_any :
4701 Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4702 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">,
4703 ClangBuiltin<"__nvvm_vote_any">;
4704 // vote.uni pred
4705 def int_nvvm_vote_uni :
4706 Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4707 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">,
4708 ClangBuiltin<"__nvvm_vote_uni">;
4709 // vote.ballot pred
4710 def int_nvvm_vote_ballot :
4711 Intrinsic<[llvm_i32_ty], [llvm_i1_ty],
4712 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">,
4713 ClangBuiltin<"__nvvm_vote_ballot">;
4714
4715 //
4716 // VOTE.SYNC
4717 //
4718
4719 // vote.sync.all mask, pred
4720 def int_nvvm_vote_all_sync :
4721 Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4722 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">,
4723 ClangBuiltin<"__nvvm_vote_all_sync">;
4724 // vote.sync.any mask, pred
4725 def int_nvvm_vote_any_sync :
4726 Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4727 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">,
4728 ClangBuiltin<"__nvvm_vote_any_sync">;
4729 // vote.sync.uni mask, pred
4730 def int_nvvm_vote_uni_sync :
4731 Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4732 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">,
4733 ClangBuiltin<"__nvvm_vote_uni_sync">;
4734 // vote.sync.ballot mask, pred
4735 def int_nvvm_vote_ballot_sync :
4736 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty],
4737 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">,
4738 ClangBuiltin<"__nvvm_vote_ballot_sync">;
4739
4740 //
4741 // ACTIVEMASK
4742 //
4743 def int_nvvm_activemask :
4744 Intrinsic<[llvm_i32_ty], [],
4745 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback, IntrHasSideEffects], "llvm.nvvm.activemask">,
4746 ClangBuiltin<"__nvvm_activemask">;
4747
4748 //
4749 // MATCH.SYNC
4750 //
4751 // match.any.sync.b32 mask, value
4752 def int_nvvm_match_any_sync_i32 :
4753 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4754 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">,
4755 ClangBuiltin<"__nvvm_match_any_sync_i32">;
4756 // match.any.sync.b64 mask, value
4757 def int_nvvm_match_any_sync_i64 :
4758 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
4759 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">,
4760 ClangBuiltin<"__nvvm_match_any_sync_i64">;
4761
4762 // match.all instruction have two variants -- one returns a single value, another
4763 // returns a pair {value, predicate}. We currently only implement the latter as
4764 // that's the variant exposed by CUDA API.
4765
4766 // match.all.sync.b32p mask, value
4767 def int_nvvm_match_all_sync_i32p :
4768 Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
4769 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">;
4770 // match.all.sync.b64p mask, value
4771 def int_nvvm_match_all_sync_i64p :
4772 Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
4773 [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">;
4774
4775 //
4776 // ELECT.SYNC
4777 //
4778 // elect.sync dst|pred, membermask
4779 def int_nvvm_elect_sync :
4780 DefaultAttrsIntrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty],
4781 [IntrInaccessibleMemOnly, IntrConvergent]>;
4782
4783 //
4784 // REDUX.SYNC
4785 //
4786 // redux.sync.min.u32 dst, src, membermask;
4787 def int_nvvm_redux_sync_umin : ClangBuiltin<"__nvvm_redux_sync_umin">,
4788 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4789 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4790
4791 // redux.sync.max.u32 dst, src, membermask;
4792 def int_nvvm_redux_sync_umax : ClangBuiltin<"__nvvm_redux_sync_umax">,
4793 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4794 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4795
4796 // redux.sync.add.s32 dst, src, membermask;
4797 def int_nvvm_redux_sync_add : ClangBuiltin<"__nvvm_redux_sync_add">,
4798 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4799 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4800
4801 // redux.sync.min.s32 dst, src, membermask;
4802 def int_nvvm_redux_sync_min : ClangBuiltin<"__nvvm_redux_sync_min">,
4803 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4804 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4805
4806 // redux.sync.max.s32 dst, src, membermask;
4807 def int_nvvm_redux_sync_max : ClangBuiltin<"__nvvm_redux_sync_max">,
4808 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4809 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4810
4811 // redux.sync.and.b32 dst, src, membermask;
4812 def int_nvvm_redux_sync_and : ClangBuiltin<"__nvvm_redux_sync_and">,
4813 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4814 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4815
4816 // redux.sync.xor.b32 dst, src, membermask;
4817 def int_nvvm_redux_sync_xor : ClangBuiltin<"__nvvm_redux_sync_xor">,
4818 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4819 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4820
4821 // redux.sync.or.b32 dst, src, membermask;
4822 def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
4823 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4824 [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4825
4826 //
4827 // WGMMA fence instructions
4828 //
4829 // wgmma.fence.sync.aligned;
4830 def int_nvvm_wgmma_fence_sync_aligned
4831 : Intrinsic<[], [], [IntrConvergent]>;
4832
4833 // wgmma.commit_group.sync.aligned;
4834 def int_nvvm_wgmma_commit_group_sync_aligned
4835 : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">;
4836
4837 // wgmma.wait_group.sync.aligned N;
4838 def int_nvvm_wgmma_wait_group_sync_aligned
4839 : Intrinsic<[], [llvm_i64_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">;
4840
4841 //
4842 // WMMA instructions
4843 //
4844 // WMMA.LOAD
4845 class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
4846 : Intrinsic<Frag.regs,
4847 !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
4848 [IntrWillReturn, IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4849 WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
4850
4851 // WMMA.STORE.D
4852 class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
4853 : Intrinsic<[],
4854 !listconcat(
4855 [llvm_anyptr_ty],
4856 Frag.regs,
4857 !if(WithStride, [llvm_i32_ty], [])),
4858 [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4859 WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
4860
4861 // Create all load/store variants
4862 foreach layout = ["row", "col"] in {
4863 foreach stride = [0, 1] in {
4864 foreach frag = NVVM_MMA_OPS.all_ld_ops in
4865 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4866 def WMMA_NAME_LDST<"load", frag, layout, stride>.record
4867 : NVVM_WMMA_LD<frag, layout, stride>;
4868 foreach frag = NVVM_MMA_OPS.all_st_ops in
4869 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4870 def WMMA_NAME_LDST<"store", frag, layout, stride>.record
4871 : NVVM_WMMA_ST<frag, layout, stride>;
4872 }
4873 }
4874
4875 // WMMA.MMA
4876 class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite, string rnd, string b1op,
4877 WMMA_REGS A, WMMA_REGS B,
4878 WMMA_REGS C, WMMA_REGS D>
4879 : Intrinsic<D.regs,
4880 !listconcat(A.regs, B.regs, C.regs),
4881 [IntrNoMem, IntrNoCallback],
4882 WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, A, B, C, D>.llvm>;
4883
4884 foreach layout_a = ["row", "col"] in {
4885 foreach layout_b = ["row", "col"] in {
4886 foreach satf = [0, 1] in {
4887 foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
4888 foreach op = NVVM_MMA_OPS.all_wmma_ops in {
4889 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4890 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
4891 def WMMA_NAME<layout_a, layout_b, satf, rnd, b1op,
4892 op[0], op[1], op[2], op[3]>.record
4893 : NVVM_WMMA_MMA<layout_a, layout_b, satf, rnd, b1op,
4894 op[0], op[1], op[2], op[3]>;
4895 }
4896 } // b1op
4897 } // op
4898 } // rnd
4899 } // satf
4900 } // layout_b
4901 } // layout_a
4902
4903 // MMA
4904 class NVVM_MMA<string ALayout, string BLayout, int Satfinite, string b1op,
4905 WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
4906 : Intrinsic<D.regs,
4907 !listconcat(A.regs, B.regs, C.regs),
4908 [IntrNoMem, IntrNoCallback],
4909 MMA_NAME<ALayout, BLayout, Satfinite, b1op, A, B, C, D>.llvm>;
4910
4911 foreach layout_a = ["row", "col"] in {
4912 foreach layout_b = ["row", "col"] in {
4913 foreach satf = [0, 1] in {
4914 foreach op = NVVM_MMA_OPS.all_mma_ops in {
4915 foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4916 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
4917 def MMA_NAME<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>.record
4918 : NVVM_MMA<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>;
4919 }
4920 } // b1op
4921 } // op
4922 } // satf
4923 } // layout_b
4924 } // layout_a
4925
4926 // LDMATRIX
4927 class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
4928 : Intrinsic<Frag.regs, [llvm_anyptr_ty],
4929 [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>,
4930 NoCapture<ArgIndex<0>>],
4931 LDMATRIX_NAME<Frag, Transposed>.intr>;
4932
4933 foreach transposed = [0, 1] in {
4934 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
4935 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
4936 def LDMATRIX_NAME<frag, transposed>.record
4937 : NVVM_LDMATRIX<frag, transposed>;
4938 }
4939 }
4940 }
4941
4942 def int_nvvm_mapa
4943 : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
4944 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4945 "llvm.nvvm.mapa">;
4946 def int_nvvm_mapa_shared_cluster
4947 : DefaultAttrsIntrinsic<[llvm_shared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
4948 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4949 "llvm.nvvm.mapa.shared.cluster">;
4950 def int_nvvm_getctarank
4951 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
4952 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4953 "llvm.nvvm.getctarank">;
4954 def int_nvvm_getctarank_shared_cluster
4955 : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty],
4956 [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4957 "llvm.nvvm.getctarank.shared.cluster">;
4958 def int_nvvm_is_explicit_cluster
4959 : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
4960 [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
4961 "llvm.nvvm.is_explicit_cluster">;
4962
4963 // Setmaxnreg inc/dec intrinsics
4964 def int_nvvm_setmaxnreg_inc_sync_aligned_u32
4965 : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4966 [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4967 "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">;
4968 def int_nvvm_setmaxnreg_dec_sync_aligned_u32
4969 : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4970 [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4971 "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">;
4972
4973 // Exit
4974 def int_nvvm_exit : ClangBuiltin<"__nvvm_exit">,
4975 Intrinsic<[], [], [IntrConvergent, IntrInaccessibleMemOnly, IntrNoReturn]>;
4976
4977 // Intrinsics for Tensor Copy using TMA
4978 // G2S -> From Global to Shared memory variants
4979 // S2G -> From Shared to Global memory variants
4980 foreach dim = [1, 2, 3, 4, 5] in {
4981 foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4982 foreach g2s = [CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, mode>] in
4983 def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>;
4984 foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, mode>] in
4985 def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>;
4986 foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>] in
4987 def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>;
4988 }
4989 }
4990
4991 // Intrinsics for TMA Copy with reduction
4992 foreach dim = [1, 2, 3, 4, 5] in {
4993 foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4994 foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in {
4995 foreach reduce = [CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, mode, red_op>] in
4996 def reduce.Name : DefaultAttrsIntrinsic<[], reduce.ArgsTy, reduce.IntrProp>;
4997 }
4998 }
4999 }
5000
5001 // Intrinsics for Bulk Copy using TMA (non-tensor)
5002 // From Global to Shared Cluster
5003 def int_nvvm_cp_async_bulk_global_to_shared_cluster
5004 : DefaultAttrsIntrinsic<[],
5005 [llvm_shared_ptr_ty, // dst_smem_ptr
5006 llvm_shared_ptr_ty, // mbarrier_ptr
5007 llvm_global_ptr_ty, // src_gmem_ptr
5008 llvm_i32_ty, // copy_size
5009 llvm_i16_ty, // cta_mask
5010 llvm_i64_ty, // cache_hint
5011 llvm_i1_ty, // Flag for cta_mask
5012 llvm_i1_ty], // Flag for cache_hint
5013 [IntrConvergent, IntrArgMemOnly,
5014 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5015 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5016 NoCapture<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
5017 ImmArg<ArgIndex<7>>]>;
5018
5019 // From Shared CTA to Shared Cluster
5020 def int_nvvm_cp_async_bulk_shared_cta_to_cluster
5021 : DefaultAttrsIntrinsic<[],
5022 [llvm_shared_ptr_ty, // dst_smem_ptr
5023 llvm_shared_ptr_ty, // mbarrier_ptr
5024 llvm_shared_ptr_ty, // src_smem_ptr
5025 llvm_i32_ty], // copy_size
5026 [IntrConvergent, IntrArgMemOnly,
5027 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5028 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5029 NoCapture<ArgIndex<2>>]>;
5030
5031 // From Shared CTA to Global memory
5032 def int_nvvm_cp_async_bulk_shared_cta_to_global
5033 : DefaultAttrsIntrinsic<[],
5034 [llvm_global_ptr_ty, // dst_gmem_ptr
5035 llvm_shared_ptr_ty, // src_smem_ptr
5036 llvm_i32_ty, // copy_size
5037 llvm_i64_ty, // cache_hint
5038 llvm_i1_ty], // Flag for cache_hint
5039 [IntrConvergent, IntrArgMemOnly,
5040 WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
5041 NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5042 ImmArg<ArgIndex<4>>]>;
5043
5044 // Intrinsics for Bulk Copy Prefetch L2
5045 def int_nvvm_cp_async_bulk_prefetch_L2
5046 : DefaultAttrsIntrinsic<[],
5047 [llvm_global_ptr_ty, // src_gmem_ptr
5048 llvm_i32_ty, // copy_size
5049 llvm_i64_ty, // cache_hint
5050 llvm_i1_ty], // Flag for cache_hint
5051 [IntrConvergent, IntrArgMemOnly,
5052 NoCapture<ArgIndex<0>>, ReadOnly<ArgIndex<0>>,
5053 ImmArg<ArgIndex<3>>]>;
5054
5055 def int_nvvm_griddepcontrol_launch_dependents: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5056 def int_nvvm_griddepcontrol_wait: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5057
5058 } // let TargetPrefix = "nvvm"