Back to home page

EIC code displayed by LXR

 
 

    


Warning, /include/llvm/IR/IntrinsicsNVVM.td is written in an unsupported language. File is not indexed.

0001 //===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file defines all of the NVVM-specific intrinsics for use with NVPTX.
0010 //
0011 //===----------------------------------------------------------------------===//
0012 
0013 // The following intrinsics were once defined here, but are now auto-upgraded
0014 // to target-generic LLVM intrinsics.
0015 //
0016 //   * llvm.nvvm.brev32  --> llvm.bitreverse.i32
0017 //   * llvm.nvvm.brev64  --> llvm.bitreverse.i64
0018 //   * llvm.nvvm.clz.i   --> llvm.ctlz.i32
0019 //   * llvm.nvvm.clz.ll  --> trunc i64 llvm.ctlz.i64(x) to i32
0020 //   * llvm.nvvm.popc.i  --> llvm.ctpop.i32
0021 //   * llvm.nvvm.popc.ll --> trunc i64 llvm.ctpop.i64 to i32
0022 //   * llvm.nvvm.abs.i   --> select(x >= -x, x, -x)
0023 //   * llvm.nvvm.abs.ll  --> ibid.
0024 //   * llvm.nvvm.max.i   --> select(x sge y, x, y)
0025 //   * llvm.nvvm.max.ll  --> ibid.
0026 //   * llvm.nvvm.max.ui  --> select(x uge y, x, y)
0027 //   * llvm.nvvm.max.ull --> ibid.
0028 //   * llvm.nvvm.max.i   --> select(x sle y, x, y)
0029 //   * llvm.nvvm.max.ll  --> ibid.
0030 //   * llvm.nvvm.max.ui  --> select(x ule y, x, y)
0031 //   * llvm.nvvm.max.ull --> ibid.
0032 //   * llvm.nvvm.h2f     --> llvm.convert.to.fp16.f32
0033 //   * llvm.nvvm.bitcast.f2i         --> bitcast
0034 //   * llvm.nvvm.bitcast.i2f         --> ibid.
0035 //   * llvm.nvvm.bitcast.d2ll        --> ibid.
0036 //   * llvm.nvvm.bitcast.ll2d        --> ibid.
0037 //   * llvm.nvvm.ptr.gen.to.global   --> addrspacecast
0038 //   * llvm.nvvm.ptr.gen.to.shared   --> ibid.
0039 //   * llvm.nvvm.ptr.gen.to.constant --> ibid.
0040 //   * llvm.nvvm.ptr.gen.to.local    --> ibid.
0041 //   * llvm.nvvm.ptr.global.to.gen   --> ibid.
0042 //   * llvm.nvvm.ptr.shared.to.gen   --> ibid.
0043 //   * llvm.nvvm.ptr.constant.to.gen --> ibid.
0044 //   * llvm.nvvm.ptr.local.to.gen    --> ibid.
0045 //   * llvm.nvvm.ldg.global.i        --> load addrspace(1) !load.invariant
0046 //   * llvm.nvvm.ldg.global.f        --> ibid.
0047 //   * llvm.nvvm.ldg.global.p        --> ibid.
0048 
0049 def llvm_global_ptr_ty  : LLVMQualPointerType<1>;  // (global)ptr
0050 def llvm_shared_ptr_ty  : LLVMQualPointerType<3>;  // (shared)ptr
0051 
0052 //
0053 // MISC
0054 //
0055 
0056 // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
0057 // Geom: m<M>n<N>k<K>. E.g. m8n32k16
0058 // Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
0059 // PtxEltType: PTX type for the element.
0060 class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
0061   string geom = Geom;
0062   string frag = Frag;
0063   string ptx_elt_type = PtxEltType;
0064   string gft = Geom#":"#Frag#":"#ptx_elt_type;
0065   string ft = frag#":"#ptx_elt_type;
0066   list<LLVMType> regs = !cond(
0067     // mma fp ops use smaller fragments than wmma fp ops
0068     !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2),
0069     !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2),
0070     !eq(gft,"m16n8k8:a:f16") : !listsplat(llvm_v2f16_ty, 2),
0071     !eq(gft,"m16n8k8:b:f16") : [llvm_v2f16_ty],
0072     !eq(gft,"m16n8k8:c:f16") : !listsplat(llvm_v2f16_ty, 2),
0073     !eq(gft,"m16n8k8:d:f16") : !listsplat(llvm_v2f16_ty, 2),
0074     !eq(gft,"m16n8k8:c:f32") : !listsplat(llvm_float_ty, 4),
0075     !eq(gft,"m16n8k8:d:f32") : !listsplat(llvm_float_ty, 4),
0076     !eq(gft,"m16n8k16:a:f16") : !listsplat(llvm_v2f16_ty, 4),
0077     !eq(gft,"m16n8k16:b:f16") : !listsplat(llvm_v2f16_ty, 2),
0078     !eq(gft,"m16n8k16:c:f16") : !listsplat(llvm_v2f16_ty, 2),
0079     !eq(gft,"m16n8k16:d:f16") : !listsplat(llvm_v2f16_ty, 2),
0080     !eq(gft,"m16n8k16:c:f32") : !listsplat(llvm_float_ty, 4),
0081     !eq(gft,"m16n8k16:d:f32") : !listsplat(llvm_float_ty, 4),
0082     !eq(gft,"m16n8k4:c:f32") : !listsplat(llvm_float_ty, 4),
0083     !eq(gft,"m16n8k4:d:f32") : !listsplat(llvm_float_ty, 4),
0084 
0085     // wmma fp16 -> fp16/fp32 @  m16n16k16/m8n32k16/m32n8k16
0086     // All other supported geometries use the same fragment format for f32 and
0087     // f16, so we only need to consider {fragment, type}.
0088     !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8),
0089     !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8),
0090     !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4),
0091     !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4),
0092     !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8),
0093     !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8),
0094 
0095     // wmma tf32 -> s32 @ m16n16k8
0096     !eq(gft,"m16n16k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
0097     !eq(gft,"m16n16k8:b:tf32") : !listsplat(llvm_i32_ty, 4),
0098 
0099     // mma tf32 -> s32 @ m16n16k8/m16n8k8
0100     !eq(gft,"m16n8k4:a:tf32") : !listsplat(llvm_i32_ty, 2),
0101     !eq(gft,"m16n8k4:b:tf32") : [llvm_i32_ty],
0102     !eq(gft,"m16n8k8:a:tf32") : !listsplat(llvm_i32_ty, 4),
0103     !eq(gft,"m16n8k8:b:tf32") : !listsplat(llvm_i32_ty, 2),
0104 
0105     !eq(gft,"m8n8k4:a:f64") : [llvm_double_ty],
0106     !eq(gft,"m8n8k4:b:f64") : [llvm_double_ty],
0107     !eq(gft,"m8n8k4:c:f64") : !listsplat(llvm_double_ty, 2),
0108     !eq(gft,"m8n8k4:d:f64") : !listsplat(llvm_double_ty, 2),
0109 
0110     // wmma bf16 -> s32 @ m16n16k16/m8n32k16/m32n8k16
0111     !eq(gft,"m16n16k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
0112     !eq(gft,"m16n16k16:b:bf16") : !listsplat(llvm_i32_ty, 4),
0113     !eq(gft,"m8n32k16:a:bf16") : !listsplat(llvm_i32_ty, 2),
0114     !eq(gft,"m8n32k16:b:bf16") : !listsplat(llvm_i32_ty, 8),
0115     !eq(gft,"m32n8k16:a:bf16") : !listsplat(llvm_i32_ty, 8),
0116     !eq(gft,"m32n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
0117 
0118     // mma bf16 -> s32 @ m16n8k16/m16n8k8
0119     !eq(gft,"m16n8k16:a:bf16") : !listsplat(llvm_i32_ty, 4),
0120     !eq(gft,"m16n8k16:b:bf16") : !listsplat(llvm_i32_ty, 2),
0121     !eq(gft,"m16n8k8:a:bf16") : !listsplat(llvm_i32_ty, 2),
0122     !eq(gft,"m16n8k8:b:bf16") : [llvm_i32_ty],
0123 
0124     // wmma u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
0125     !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2),
0126     !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2),
0127     !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2),
0128     !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2),
0129     !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0130     !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0131 
0132     !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty],
0133     !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty],
0134     !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4),
0135     !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4),
0136     !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0137     !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0138 
0139     !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4),
0140     !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4),
0141     !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty],
0142     !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty],
0143     !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8),
0144     !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8),
0145 
0146     // mma u8/s8 -> s32 @ m8n8k16/m16n8k16/m16n8k32
0147     !eq(gft,"m8n8k16:a:u8") : [llvm_i32_ty],
0148     !eq(gft,"m8n8k16:a:s8") : [llvm_i32_ty],
0149     !eq(gft,"m8n8k16:b:u8") : [llvm_i32_ty],
0150     !eq(gft,"m8n8k16:b:s8") : [llvm_i32_ty],
0151     !eq(gft,"m8n8k16:c:s32") : !listsplat(llvm_i32_ty, 2),
0152     !eq(gft,"m8n8k16:d:s32") : !listsplat(llvm_i32_ty, 2),
0153 
0154     !eq(gft,"m16n8k16:a:u8") : !listsplat(llvm_i32_ty, 2),
0155     !eq(gft,"m16n8k16:a:s8") : !listsplat(llvm_i32_ty, 2),
0156     !eq(gft,"m16n8k16:b:u8") : [llvm_i32_ty],
0157     !eq(gft,"m16n8k16:b:s8") : [llvm_i32_ty],
0158     !eq(gft,"m16n8k16:c:s32") : !listsplat(llvm_i32_ty, 4),
0159     !eq(gft,"m16n8k16:d:s32") : !listsplat(llvm_i32_ty, 4),
0160 
0161     !eq(gft,"m16n8k32:a:u8") : !listsplat(llvm_i32_ty, 4),
0162     !eq(gft,"m16n8k32:a:s8") : !listsplat(llvm_i32_ty, 4),
0163     !eq(gft,"m16n8k32:b:u8") : !listsplat(llvm_i32_ty, 2),
0164     !eq(gft,"m16n8k32:b:s8") : !listsplat(llvm_i32_ty, 2),
0165     !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
0166     !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
0167 
0168     // wmma/mma u4/s4 -> s32 @ m8n8k32 (u4/s4)
0169     !eq(gft,"m8n8k32:a:u4") : [llvm_i32_ty],
0170     !eq(gft,"m8n8k32:a:s4") : [llvm_i32_ty],
0171     !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty],
0172     !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty],
0173     !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2),
0174     !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2),
0175 
0176     !eq(gft,"m16n8k32:a:u4") : !listsplat(llvm_i32_ty, 2),
0177     !eq(gft,"m16n8k32:a:s4") : !listsplat(llvm_i32_ty, 2),
0178     !eq(gft,"m16n8k32:b:u4") : [llvm_i32_ty],
0179     !eq(gft,"m16n8k32:b:s4") : [llvm_i32_ty],
0180     !eq(gft,"m16n8k32:c:s32") : !listsplat(llvm_i32_ty, 4),
0181     !eq(gft,"m16n8k32:d:s32") : !listsplat(llvm_i32_ty, 4),
0182 
0183     !eq(gft,"m16n8k64:a:u4") : !listsplat(llvm_i32_ty, 4),
0184     !eq(gft,"m16n8k64:a:s4") : !listsplat(llvm_i32_ty, 4),
0185     !eq(gft,"m16n8k64:b:u4") : !listsplat(llvm_i32_ty, 2),
0186     !eq(gft,"m16n8k64:b:s4") : !listsplat(llvm_i32_ty, 2),
0187     !eq(gft,"m16n8k64:c:s32") : !listsplat(llvm_i32_ty, 4),
0188     !eq(gft,"m16n8k64:d:s32") : !listsplat(llvm_i32_ty, 4),
0189 
0190     // wmma/mma b1 -> s32 @ m8n8k128(b1)
0191     !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty],
0192     !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty],
0193     !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2),
0194     !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2),
0195 
0196     !eq(gft,"m16n8k128:a:b1") : !listsplat(llvm_i32_ty, 2),
0197     !eq(gft,"m16n8k128:b:b1") : [llvm_i32_ty],
0198     !eq(gft,"m16n8k128:c:s32") : !listsplat(llvm_i32_ty, 4),
0199     !eq(gft,"m16n8k128:d:s32") : !listsplat(llvm_i32_ty, 4),
0200 
0201     !eq(gft,"m16n8k256:a:b1") : !listsplat(llvm_i32_ty, 4),
0202     !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
0203     !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
0204     !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
0205 
0206     // ldmatrix b16 -> s32 @ m8n8
0207     !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
0208     !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
0209     !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
0210   );
0211 }
0212 
0213 class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
0214   string intr = "llvm.nvvm.wmma."
0215                 # Frag.geom
0216                 # "." # Op
0217                 # "." # Frag.frag
0218                 # "." # Layout
0219                 # !if(WithStride, ".stride", "")
0220                 # "." # Frag.ptx_elt_type
0221                 ;
0222   // TODO(tra): record name should ideally use the same field order as the intrinsic.
0223   // E.g. string record = !subst("llvm", "int",
0224   //                      !subst(".", "_", llvm));
0225   string record = "int_nvvm_wmma_"
0226                 # Frag.geom
0227                 # "_" # Op
0228                 # "_" # Frag.frag
0229                 # "_" # Frag.ptx_elt_type
0230                 # "_" # Layout
0231                 # !if(WithStride, "_stride", "");
0232 }
0233 
0234 class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0235   list<WMMA_REGS> id_frags = !cond(
0236      // FP16 ops are identified by accumulator & result type.
0237      !eq(A.ptx_elt_type, "f16") : [D, C],
0238      // other ops are identified by input types.
0239      !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B],
0240      true: [A]
0241      );
0242    string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type));
0243 }
0244 
0245 class WMMA_NAME<string ALayout, string BLayout, int Satfinite, string Rnd, string b1op,
0246                 WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0247   string signature = MMA_SIGNATURE<A, B, C, D>.ret;
0248   string llvm = "llvm.nvvm.wmma."
0249                 # A.geom
0250                 # ".mma"
0251                 # b1op
0252                 # "." # ALayout
0253                 # "." # BLayout
0254                 # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "")
0255                 # signature
0256                 # !if(Satfinite, ".satfinite", "");
0257 
0258   string record = !subst(".", "_",
0259                   !subst("llvm.", "int_", llvm));
0260 }
0261 
0262 class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
0263                WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> {
0264   string signature = MMA_SIGNATURE<A, B, C, D>.ret;
0265   string llvm = "llvm.nvvm.mma"
0266                 # b1op
0267                 # "." # A.geom
0268                 # "." # ALayout
0269                 # "." # BLayout
0270                 # !if(Satfinite, ".satfinite", "")
0271                 # signature;
0272   string record = !subst(".", "_",
0273                   !subst("llvm.", "int_", llvm));
0274 }
0275 
0276 class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
0277   string intr = "llvm.nvvm.ldmatrix.sync.aligned"
0278                 # "." # Frag.geom
0279                 # "." # Frag.frag
0280                 # !if(Trans, ".trans", "")
0281                 # "." # Frag.ptx_elt_type
0282                 ;
0283   string record = !subst(".", "_",
0284                   !subst("llvm.", "int_", intr));
0285 }
0286 
0287 // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
0288 //   Geom: list of supported geometries.
0289 //   TypeN: PTX type of the corresponding fragment's element.
0290 //   TypeB and TypeD may be empty if it must match that of TypeA or TypeC.
0291 class MMA_OPS<list<string> Geom, list<string> TypeA, list<string> TypeB,
0292             list<string> TypeC, list<string> TypeD> {
0293   list<list<WMMA_REGS>> ret =
0294      !foldl([]<list<WMMA_REGS>>, Geom, t1, geom, !listconcat(t1,
0295      !foldl([]<list<WMMA_REGS>>, TypeA, t2, type_a, !listconcat(t2,
0296      !foldl([]<list<WMMA_REGS>>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3,
0297      !foldl([]<list<WMMA_REGS>>, TypeC, t4, type_c, !listconcat(t4,
0298      !foldl([]<list<WMMA_REGS>>, !if(!size(TypeD), TypeD, [type_c]), t5, type_d, !listconcat(t5,
0299             [[WMMA_REGS<geom, "a", type_a>,
0300               WMMA_REGS<geom, "b", type_b>,
0301               WMMA_REGS<geom, "c", type_c>,
0302               WMMA_REGS<geom, "d", type_d>]]))))))))));
0303    // Debugging aid for readable representation of the list above.
0304    list<list<string>> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]);
0305 }
0306 
0307 class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
0308   list<WMMA_REGS> ret =
0309      !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
0310      !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
0311      !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
0312             [WMMA_REGS<geom, frag, type>]))))));
0313    // Debugging aid for readable representation of the list above.
0314    list<string> ops = !foreach(x, ret, x.gft);
0315 }
0316 
0317 class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
0318   list<WMMA_REGS> ret =
0319      !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
0320      !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
0321      !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
0322             [WMMA_REGS<geom, frag, type>]))))));
0323    // Debugging aid for readable representation of the list above.
0324    list<string> ops = !foreach(x, ret, x.gft);
0325 }
0326 
0327 // Creates list of valid combinations of fragments. This is the main list that
0328 // drives generation of corresponding intrinsics and instructions.
0329 class NVVM_MMA_OPS {
0330   list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
0331             ["m16n16k8"],
0332             ["tf32"], [], ["f32"], []>.ret;
0333   list<list<WMMA_REGS>> bf16_wmma_ops = MMA_OPS<
0334             ["m16n16k16", "m32n8k16", "m8n32k16"],
0335             ["bf16"], [], ["f32"], []>.ret;
0336   list<list<WMMA_REGS>> f64_wmma_ops = MMA_OPS<
0337             ["m8n8k4"],
0338             ["f64"], [], ["f64"], []>.ret;
0339   list<list<WMMA_REGS>> fp_wmma_ops = MMA_OPS<
0340             ["m16n16k16", "m32n8k16", "m8n32k16"],
0341             ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
0342   list<list<WMMA_REGS>> int_wmma_ops = MMA_OPS<
0343             ["m16n16k16", "m32n8k16", "m8n32k16"],
0344             ["s8", "u8"], [], ["s32"], []>.ret;
0345   list<list<WMMA_REGS>> subint_wmma_ops = MMA_OPS<
0346             ["m8n8k32"],
0347             ["s4", "u4"], [], ["s32"], []>.ret;
0348   list<list<WMMA_REGS>> bit_wmma_ops = MMA_OPS<
0349             ["m8n8k128"],
0350             ["b1"], [], ["s32"], []>.ret;
0351   list<list<WMMA_REGS>> all_wmma_ops = !listconcat(
0352             tf32_wmma_ops, bf16_wmma_ops, f64_wmma_ops,
0353             fp_wmma_ops, int_wmma_ops, subint_wmma_ops, bit_wmma_ops);
0354 
0355   list<list<WMMA_REGS>> tf32_mma_ops = MMA_OPS<
0356             ["m16n8k4", "m16n8k8"],
0357             ["tf32"], [], ["f32"], []>.ret;
0358   list<list<WMMA_REGS>> bf16_mma_ops = MMA_OPS<
0359             ["m16n8k16", "m16n8k8"],
0360             ["bf16"], [], ["f32"], []>.ret;
0361   list<list<WMMA_REGS>> f64_mma_ops = MMA_OPS<
0362             ["m8n8k4"],
0363             ["f64"], [], ["f64"], []>.ret;
0364   list<list<WMMA_REGS>> fp_mma_ops = MMA_OPS<
0365             ["m8n8k4", "m16n8k8", "m16n8k16"],
0366             ["f16"], [], ["f16", "f32"], ["f16", "f32"]>.ret;
0367   list<list<WMMA_REGS>> int_mma_ops = MMA_OPS<
0368             ["m8n8k16", "m16n8k16", "m16n8k32"],
0369             ["s8", "u8"], ["s8", "u8"], ["s32"], []>.ret;
0370   list<list<WMMA_REGS>> subint_mma_ops = MMA_OPS<
0371             ["m8n8k32", "m16n8k32", "m16n8k64"],
0372             ["s4", "u4"], ["s4", "u4"], ["s32"], []>.ret;
0373   list<list<WMMA_REGS>> bit_mma_ops = MMA_OPS<
0374             ["m8n8k128", "m16n8k128", "m16n8k256"],
0375             ["b1"], [], ["s32"], []>.ret;
0376   list<list<WMMA_REGS>> all_mma_ops = !listconcat(
0377             tf32_mma_ops, bf16_mma_ops, f64_mma_ops,
0378             fp_mma_ops, int_mma_ops, subint_mma_ops, bit_mma_ops);
0379 
0380   list<WMMA_REGS> ldst_ab_ops = MMA_LDST_OPS<
0381             ["m16n16k16", "m32n8k16", "m8n32k16"],
0382             ["a", "b"], ["f16", "u8", "s8", "bf16"]>.ret;
0383   list<WMMA_REGS> ldst_cd_ops = MMA_LDST_OPS<
0384             ["m16n16k16", "m32n8k16", "m8n32k16"],
0385             ["c", "d"], ["f16", "f32", "s32"]>.ret;
0386   list<WMMA_REGS> ldst_tf32_ab_ops = MMA_LDST_OPS<
0387             ["m16n16k8"],
0388             ["a", "b"], ["tf32"]>.ret;
0389   list<WMMA_REGS> ldst_tf32_cd_ops = MMA_LDST_OPS<
0390             ["m16n16k8"],
0391             ["c", "d"], ["f32"]>.ret;
0392   list<WMMA_REGS> ldst_f64_abcd_ops = MMA_LDST_OPS<
0393             ["m8n8k4"],
0394             ["a", "b", "c", "d"], ["f64"]>.ret;
0395   list<WMMA_REGS> ldst_subint_ab_ops = MMA_LDST_OPS<
0396             ["m8n8k32"], ["a", "b"], ["s4","u4"]>.ret;
0397   list<WMMA_REGS> ldst_bit_ab_ops = MMA_LDST_OPS<
0398             ["m8n8k128"], ["a", "b"], ["b1"]>.ret;
0399   list<WMMA_REGS> ldst_subint_cd_ops = MMA_LDST_OPS<
0400             ["m8n8k32", "m8n8k128"],  ["c", "d"], ["s32"]>.ret;
0401   list<WMMA_REGS> all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops,
0402                                              ldst_tf32_ab_ops,
0403                                              ldst_tf32_cd_ops,
0404                                              ldst_f64_abcd_ops,
0405                                              ldst_subint_ab_ops,
0406                                              ldst_bit_ab_ops,
0407                                              ldst_subint_cd_ops);
0408   // Separate A/B/C fragments (loads) from D (stores).
0409   list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
0410   list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
0411 
0412   list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
0413     ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
0414   list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
0415 }
0416 
0417 def NVVM_MMA_OPS : NVVM_MMA_OPS;
0418 
0419 // Returns true if this combination of fragment and layout for WMMA load/store
0420 // ops is supported; false otherwise.
0421 // E.g.
0422 // if NVVM_WMMA_LDST_SUPPORTED<...>.ret then
0423 //   def : FOO<>; // The record will only be defined for supported ops.
0424 //
0425 class NVVM_WMMA_LDST_SUPPORTED<WMMA_REGS frag, string layout> {
0426   string f = frag.frag;
0427   string t = frag.ptx_elt_type;
0428 
0429   bit ret = !cond(
0430     // Sub-int load and store requires A fragment to be of row layout and B
0431     // fragments to be of column layout.
0432     !and(!or(!eq(t, "b1"),
0433              !eq(t, "u4"),
0434              !eq(t, "s4")),
0435          !or(!and(!eq(f, "a"),
0436                   !ne(layout, "row")),
0437              !and(!eq(f, "b"),
0438                   !ne(layout, "col")))) : false,
0439     true: true
0440   );
0441 }
0442 
0443 // Returns true if this combination of layout/satf/rnd for WMMA ops is
0444 // supported; false otherwise.
0445 // E.g.
0446 // if NVVM_WMMA_SUPPORTED<...>.ret then
0447 //   def : FOO<>; // The record will only be defined for supported ops.
0448 //
0449 class NVVM_WMMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf, string rnd> {
0450   // WMMA ops check both layouts.
0451   string layout = layout_a # ":" # layout_b;
0452   string t = frags[0].ptx_elt_type;
0453 
0454   bit ret = !cond(
0455     // only f64 wmma functions support rnd options
0456     // any non f64 type that uses a rnd value is invalid
0457     !and(!ne(t, "f64"), !ne(rnd, "")) : false,
0458 
0459     // satf is only valid for select types
0460     !and(!eq(satf, 1),
0461          !ne(t, "s8"),
0462          !ne(t, "u8"),
0463          !ne(t, "s4"),
0464          !ne(t, "u4"),
0465          !ne(t, "f16")): false,
0466 
0467     // Sub-int wmma requires row/column layout
0468     !and(!or(!eq(t, "s4"),
0469              !eq(t, "u4"),
0470              !eq(t, "b1")),
0471          !ne(layout, "row:col")) : false,
0472     true: true
0473   );
0474 }
0475 
0476 class NVVM_MMA_B1OPS<list<WMMA_REGS> frags> {
0477   list<string> ret = !cond(
0478     !eq(frags[0].ptx_elt_type, "b1") : [".xor.popc", ".and.popc"],
0479     true: [""]
0480   );
0481 }
0482 
0483 // Returns true if this combination of layout/satf for MMA ops is supported;
0484 // false otherwise.
0485 // E.g.
0486 // if NVVM_MMA_SUPPORTED<...>.ret then
0487 //   def : FOO<>; // The record will only be defined for supported ops.
0488 //
0489 class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b, int satf> {
0490   // MMA ops check both layouts.
0491   string layout = layout_a # ":" # layout_b;
0492   string a_type = frags[0].ptx_elt_type;
0493   string b_type = frags[1].ptx_elt_type;
0494   string c_type = frags[2].ptx_elt_type;
0495   string d_type = frags[3].ptx_elt_type;
0496   string geom = frags[0].geom;
0497 
0498   // gcd is a shortcut used to identify instructions that depend on
0499   // geom+frag_c+frag_d.
0500   string gcd = geom # ":" # c_type # d_type;
0501   bit ret = !cond(
0502 
0503     // Limit satf to valid types
0504     !and(!eq(satf, 1),
0505          !ne(a_type, "s8"),
0506          !ne(a_type, "u8"),
0507          !ne(a_type, "s4"),
0508          !ne(a_type, "u4")): false,
0509 
0510     // m8n8k4 has no C=f32 D=f16 variant.
0511     !eq(gcd, "m8n8k4:f32f16"): false,
0512 
0513     // only m8n8k4 for f16 does not require row:col layout
0514     !and(!ne(layout, "row:col"),
0515          !or(!ne(geom, "m8n8k4"),
0516              !ne(a_type, "f16"))) : false,
0517 
0518     // m16n8k8 requires A and B to be the same type and C and D to be the same
0519     // type.
0520     !and(!eq(geom, "m16n8k8"),
0521          !or(!ne(a_type, b_type),
0522              !ne(c_type, d_type))): false,
0523 
0524     // m16n8k8 requires C and D to be the same type.
0525     !and(!eq(geom, "m16n8k8"),
0526          !ne(c_type, d_type)): false,
0527 
0528     // All other are OK.
0529     true: true
0530   );
0531 }
0532 
0533 // Returns true if the fragment is valid for ldmatrix ops is supported;
0534 // false otherwise.
0535 // E.g.
0536 // if NVVM_LDMATRIX_SUPPORTED<...>.ret then
0537 //   def : FOO<>; // The record will only be defined for supported ops.
0538 //
0539 class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
0540   string g = frag.geom;
0541   string t = frag.ptx_elt_type;
0542 
0543   bit ret = !cond(
0544     // Only currently support m8n8 and b16
0545     !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
0546     true: false
0547   );
0548 }
0549 
0550 class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
0551   string Suffix = !if(sync, "sync_", "")
0552                   # mode # "_"
0553                   # type
0554                   # !if(return_pred, "p", "");
0555 
0556   string Name = "int_nvvm_shfl_" # Suffix;
0557   string Builtin = "__nvvm_shfl_" # Suffix;
0558   string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
0559   bit withGccBuiltin = !not(return_pred);
0560   bit withoutGccBuiltin = return_pred;
0561   LLVMType OpType = !cond(
0562     !eq(type,"i32"): llvm_i32_ty,
0563     !eq(type,"f32"): llvm_float_ty);
0564   list<LLVMType> RetTy = !if(return_pred, [OpType, llvm_i1_ty], [OpType]);
0565   list<LLVMType> ArgsTy = !if(sync,
0566     [llvm_i32_ty, OpType, llvm_i32_ty, llvm_i32_ty],
0567     [OpType, llvm_i32_ty, llvm_i32_ty]);
0568 }
0569 
0570 class CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, string mode> {
0571   string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d";
0572 
0573   bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
0574   int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
0575   list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
0576   list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0577   list<LLVMType> ArgsTy = !listconcat(
0578                           [llvm_shared_ptr_ty,  // dst_smem_ptr
0579                            llvm_shared_ptr_ty,  // mbarrier_smem_ptr
0580                            llvm_ptr_ty],        // tensormap_ptr
0581                            TensorDimsTy,        // actual tensor dims
0582                            Im2ColOffsetsTy,     // im2col offsets
0583                           [llvm_i16_ty,         // cta_mask
0584                            llvm_i64_ty,         // cache_hint
0585                            llvm_i1_ty,          // Flag for cta_mask
0586                            llvm_i1_ty]          // Flag for cache_hint
0587                           );
0588 
0589   int TempFlagsStartIdx = !add(dim, 5);
0590   int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
0591   list<IntrinsicProperty> IntrProp = [IntrConvergent,
0592         WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
0593         NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, NoCapture<ArgIndex<2>>,
0594         ImmArg<ArgIndex<FlagsStartIdx>>,
0595         ImmArg<ArgIndex<!add(FlagsStartIdx, 1)>>];
0596 }
0597 
0598 class CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, string mode> {
0599   string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d";
0600 
0601   list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0602   list<LLVMType> ArgsTy = !listconcat(
0603                           [llvm_shared_ptr_ty,  // src_smem_ptr
0604                            llvm_ptr_ty],        // tensormap_ptr
0605                            TensorDimsTy,        // actual tensor dims
0606                           [llvm_i64_ty,         // cache_hint
0607                            llvm_i1_ty]          // Flag for cache_hint
0608                           );
0609   int FlagsStartIdx = !add(dim, 3);
0610   list<IntrinsicProperty> IntrProp = [IntrConvergent,
0611         ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
0612         NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
0613         ImmArg<ArgIndex<FlagsStartIdx>>];
0614 }
0615 
0616 class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> {
0617   string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d";
0618 
0619   bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0);
0620   int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0);
0621   list<LLVMType> Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets);
0622   list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0623   list<LLVMType> ArgsTy = !listconcat(
0624                           [llvm_ptr_ty],     // tensormap_ptr
0625                            TensorDimsTy,     // actual tensor dims
0626                            Im2ColOffsetsTy,  // im2col offsets
0627                           [llvm_i64_ty,      // cache_hint
0628                            llvm_i1_ty]       // Flag for cache_hint
0629                           );
0630 
0631   int TempFlagsStartIdx = !add(dim, 2);
0632   int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets);
0633   list<IntrinsicProperty> IntrProp = [IntrConvergent,
0634         ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
0635         ImmArg<ArgIndex<FlagsStartIdx>>];
0636 }
0637 
0638 class CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, string mode, string op> {
0639   string Suffix = op # "_" # mode # "_" # dim # "d";
0640   string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix;
0641 
0642   list<LLVMType> TensorDimsTy = !listsplat(llvm_i32_ty, dim);
0643   list<LLVMType> ArgsTy = !listconcat(
0644                           [llvm_shared_ptr_ty,  // src_smem_ptr
0645                            llvm_ptr_ty],        // tensormap_ptr
0646                            TensorDimsTy,        // actual tensor dims
0647                           [llvm_i64_ty,         // cache_hint
0648                            llvm_i1_ty]          // Flag for cache_hint
0649                           );
0650   int FlagsStartIdx = !add(dim, 3);
0651   list<IntrinsicProperty> IntrProp = [IntrConvergent,
0652         ReadOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
0653         NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
0654         ImmArg<ArgIndex<FlagsStartIdx>>];
0655 }
0656 
0657 let TargetPrefix = "nvvm" in {
0658   def int_nvvm_prmt : ClangBuiltin<"__nvvm_prmt">,
0659       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0660         [IntrNoMem, IntrSpeculatable]>;
0661 
0662   def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">,
0663       DefaultAttrsIntrinsic<[], [llvm_i32_ty],
0664                             [IntrConvergent, IntrNoMem, IntrHasSideEffects]>;
0665 
0666 //
0667 // Min Max
0668 //
0669 
0670   foreach operation = ["min", "max"] in {
0671     def int_nvvm_f # operation # _d :
0672       ClangBuiltin<!strconcat("__nvvm_f", operation, "_d")>,
0673       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0674         [IntrNoMem, IntrSpeculatable, Commutative]>;
0675 
0676     foreach variant = ["_f", "_ftz_f", "_nan_f", "_ftz_nan_f",
0677       "_xorsign_abs_f", "_ftz_xorsign_abs_f", "_nan_xorsign_abs_f",
0678       "_ftz_nan_xorsign_abs_f"] in {
0679       def int_nvvm_f # operation # variant :
0680         ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0681         DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0682           [IntrNoMem, IntrSpeculatable, Commutative]>;
0683     }
0684 
0685     foreach variant = ["_f16", "_ftz_f16", "_nan_f16", "_ftz_nan_f16",
0686       "_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
0687       "_ftz_nan_xorsign_abs_f16"] in {
0688       def int_nvvm_f # operation # variant :
0689         DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
0690           [IntrNoMem, IntrSpeculatable, Commutative]>;
0691     }
0692 
0693     foreach variant = ["_f16x2", "_ftz_f16x2", "_nan_f16x2",
0694       "_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
0695       "_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
0696       def int_nvvm_f # operation # variant :
0697         DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
0698           [IntrNoMem, IntrSpeculatable, Commutative]>;
0699     }
0700 
0701     foreach variant = ["_bf16", "_ftz_bf16", "_nan_bf16", "_ftz_nan_bf16",
0702       "_xorsign_abs_bf16", "_ftz_xorsign_abs_bf16", "_nan_xorsign_abs_bf16",
0703       "_ftz_nan_xorsign_abs_bf16"] in {
0704       def int_nvvm_f # operation # variant :
0705         ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0706         DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty],
0707           [IntrNoMem, IntrSpeculatable, Commutative]>;
0708     }
0709 
0710     foreach variant = ["_bf16x2", "_ftz_bf16x2", "_nan_bf16x2",
0711       "_ftz_nan_bf16x2", "_xorsign_abs_bf16x2", "_ftz_xorsign_abs_bf16x2",
0712       "_nan_xorsign_abs_bf16x2", "_ftz_nan_xorsign_abs_bf16x2"]  in {
0713       def int_nvvm_f # operation # variant :
0714         ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
0715         DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty],
0716           [IntrNoMem, IntrSpeculatable, Commutative]>;
0717     }
0718   }
0719 
0720 //
0721 // Multiplication
0722 //
0723 
0724   def int_nvvm_mulhi_s : ClangBuiltin<"__nvvm_mulhi_s">,
0725       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
0726         [IntrNoMem, IntrSpeculatable, Commutative]>;
0727   def int_nvvm_mulhi_us : ClangBuiltin<"__nvvm_mulhi_us">,
0728       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
0729         [IntrNoMem, IntrSpeculatable, Commutative]>;
0730 
0731   def int_nvvm_mulhi_i : ClangBuiltin<"__nvvm_mulhi_i">,
0732       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0733         [IntrNoMem, IntrSpeculatable, Commutative]>;
0734   def int_nvvm_mulhi_ui : ClangBuiltin<"__nvvm_mulhi_ui">,
0735       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0736         [IntrNoMem, IntrSpeculatable, Commutative]>;
0737 
0738   def int_nvvm_mulhi_ll : ClangBuiltin<"__nvvm_mulhi_ll">,
0739       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
0740         [IntrNoMem, IntrSpeculatable, Commutative]>;
0741   def int_nvvm_mulhi_ull : ClangBuiltin<"__nvvm_mulhi_ull">,
0742       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
0743         [IntrNoMem, IntrSpeculatable, Commutative]>;
0744 
0745   def int_nvvm_mul_rn_ftz_f : ClangBuiltin<"__nvvm_mul_rn_ftz_f">,
0746       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0747         [IntrNoMem, IntrSpeculatable, Commutative]>;
0748   def int_nvvm_mul_rn_f : ClangBuiltin<"__nvvm_mul_rn_f">,
0749       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0750         [IntrNoMem, IntrSpeculatable, Commutative]>;
0751   def int_nvvm_mul_rz_ftz_f : ClangBuiltin<"__nvvm_mul_rz_ftz_f">,
0752       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0753         [IntrNoMem, IntrSpeculatable, Commutative]>;
0754   def int_nvvm_mul_rz_f : ClangBuiltin<"__nvvm_mul_rz_f">,
0755       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0756         [IntrNoMem, IntrSpeculatable, Commutative]>;
0757   def int_nvvm_mul_rm_ftz_f : ClangBuiltin<"__nvvm_mul_rm_ftz_f">,
0758       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0759         [IntrNoMem, IntrSpeculatable, Commutative]>;
0760   def int_nvvm_mul_rm_f : ClangBuiltin<"__nvvm_mul_rm_f">,
0761       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0762         [IntrNoMem, IntrSpeculatable, Commutative]>;
0763   def int_nvvm_mul_rp_ftz_f : ClangBuiltin<"__nvvm_mul_rp_ftz_f">,
0764       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0765         [IntrNoMem, IntrSpeculatable, Commutative]>;
0766   def int_nvvm_mul_rp_f : ClangBuiltin<"__nvvm_mul_rp_f">,
0767       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0768         [IntrNoMem, IntrSpeculatable, Commutative]>;
0769 
0770   def int_nvvm_mul_rn_d : ClangBuiltin<"__nvvm_mul_rn_d">,
0771       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0772         [IntrNoMem, IntrSpeculatable, Commutative]>;
0773   def int_nvvm_mul_rz_d : ClangBuiltin<"__nvvm_mul_rz_d">,
0774       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0775         [IntrNoMem, IntrSpeculatable, Commutative]>;
0776   def int_nvvm_mul_rm_d : ClangBuiltin<"__nvvm_mul_rm_d">,
0777       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0778         [IntrNoMem, IntrSpeculatable, Commutative]>;
0779   def int_nvvm_mul_rp_d : ClangBuiltin<"__nvvm_mul_rp_d">,
0780       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0781         [IntrNoMem, IntrSpeculatable, Commutative]>;
0782 
0783   def int_nvvm_mul24_i : ClangBuiltin<"__nvvm_mul24_i">,
0784       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0785         [IntrNoMem, IntrSpeculatable, Commutative]>;
0786   def int_nvvm_mul24_ui : ClangBuiltin<"__nvvm_mul24_ui">,
0787       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
0788         [IntrNoMem, IntrSpeculatable, Commutative]>;
0789 
0790 //
0791 // Div
0792 //
0793 
0794   def int_nvvm_div_approx_ftz_f : ClangBuiltin<"__nvvm_div_approx_ftz_f">,
0795       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0796         [IntrNoMem]>;
0797   def int_nvvm_div_approx_f : ClangBuiltin<"__nvvm_div_approx_f">,
0798       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0799         [IntrNoMem]>;
0800 
0801   def int_nvvm_div_rn_ftz_f : ClangBuiltin<"__nvvm_div_rn_ftz_f">,
0802       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0803         [IntrNoMem]>;
0804   def int_nvvm_div_rn_f : ClangBuiltin<"__nvvm_div_rn_f">,
0805       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0806         [IntrNoMem]>;
0807 
0808   def int_nvvm_div_rz_ftz_f : ClangBuiltin<"__nvvm_div_rz_ftz_f">,
0809       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0810         [IntrNoMem]>;
0811   def int_nvvm_div_rz_f : ClangBuiltin<"__nvvm_div_rz_f">,
0812       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0813         [IntrNoMem]>;
0814 
0815   def int_nvvm_div_rm_ftz_f : ClangBuiltin<"__nvvm_div_rm_ftz_f">,
0816       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0817         [IntrNoMem]>;
0818   def int_nvvm_div_rm_f : ClangBuiltin<"__nvvm_div_rm_f">,
0819       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0820         [IntrNoMem]>;
0821 
0822   def int_nvvm_div_rp_ftz_f : ClangBuiltin<"__nvvm_div_rp_ftz_f">,
0823       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0824         [IntrNoMem]>;
0825   def int_nvvm_div_rp_f : ClangBuiltin<"__nvvm_div_rp_f">,
0826       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0827         [IntrNoMem]>;
0828 
0829   def int_nvvm_div_rn_d : ClangBuiltin<"__nvvm_div_rn_d">,
0830       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0831         [IntrNoMem]>;
0832   def int_nvvm_div_rz_d : ClangBuiltin<"__nvvm_div_rz_d">,
0833       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0834         [IntrNoMem]>;
0835   def int_nvvm_div_rm_d : ClangBuiltin<"__nvvm_div_rm_d">,
0836       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0837         [IntrNoMem]>;
0838   def int_nvvm_div_rp_d : ClangBuiltin<"__nvvm_div_rp_d">,
0839       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
0840         [IntrNoMem]>;
0841 
0842   def int_nvvm_div_full : ClangBuiltin<"__nvvm_div_full">,
0843       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0844         [IntrNoMem]>;
0845   def int_nvvm_div_full_ftz : ClangBuiltin<"__nvvm_div_full_ftz">,
0846       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
0847         [IntrNoMem]>;
0848 
0849 //
0850 // Sad
0851 //
0852 
0853   def int_nvvm_sad_s : ClangBuiltin<"__nvvm_sad_s">,
0854       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
0855         [IntrNoMem, Commutative, IntrSpeculatable]>;
0856   def int_nvvm_sad_us : ClangBuiltin<"__nvvm_sad_us">,
0857       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
0858         [IntrNoMem, Commutative, IntrSpeculatable]>;
0859 
0860   def int_nvvm_sad_i : ClangBuiltin<"__nvvm_sad_i">,
0861       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0862         [IntrNoMem, Commutative, IntrSpeculatable]>;
0863   def int_nvvm_sad_ui : ClangBuiltin<"__nvvm_sad_ui">,
0864       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
0865         [IntrNoMem, Commutative, IntrSpeculatable]>;
0866 
0867   def int_nvvm_sad_ll : ClangBuiltin<"__nvvm_sad_ll">,
0868       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
0869         [IntrNoMem, Commutative, IntrSpeculatable]>;
0870   def int_nvvm_sad_ull : ClangBuiltin<"__nvvm_sad_ull">,
0871       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
0872         [IntrNoMem, Commutative, IntrSpeculatable]>;
0873 
0874 
0875 //
0876 // Floor  Ceil
0877 //
0878 
0879   def int_nvvm_floor_ftz_f : ClangBuiltin<"__nvvm_floor_ftz_f">,
0880       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0881   def int_nvvm_floor_f : ClangBuiltin<"__nvvm_floor_f">,
0882       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0883   def int_nvvm_floor_d : ClangBuiltin<"__nvvm_floor_d">,
0884       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0885 
0886   def int_nvvm_ceil_ftz_f : ClangBuiltin<"__nvvm_ceil_ftz_f">,
0887       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0888   def int_nvvm_ceil_f : ClangBuiltin<"__nvvm_ceil_f">,
0889       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0890   def int_nvvm_ceil_d : ClangBuiltin<"__nvvm_ceil_d">,
0891       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0892 
0893 //
0894 // Abs
0895 //
0896 
0897   def int_nvvm_fabs_ftz_f : ClangBuiltin<"__nvvm_fabs_ftz_f">,
0898       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0899   def int_nvvm_fabs_f : ClangBuiltin<"__nvvm_fabs_f">,
0900       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0901   def int_nvvm_fabs_d : ClangBuiltin<"__nvvm_fabs_d">,
0902       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0903 
0904 //
0905 // Abs, Neg bf16, bf16x2
0906 //
0907 
0908   foreach unary = ["abs", "neg"] in {
0909     def int_nvvm_ # unary # _bf16 :
0910       ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16")>,
0911       DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
0912     def int_nvvm_ # unary # _bf16x2 :
0913       ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16x2")>,
0914       DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty], [IntrNoMem]>;
0915   }
0916 
0917 //
0918 // Round
0919 //
0920 
0921   def int_nvvm_round_ftz_f : ClangBuiltin<"__nvvm_round_ftz_f">,
0922       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0923   def int_nvvm_round_f : ClangBuiltin<"__nvvm_round_f">,
0924       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0925 
0926   def int_nvvm_round_d : ClangBuiltin<"__nvvm_round_d">,
0927       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0928 
0929 //
0930 // Trunc
0931 //
0932 
0933   def int_nvvm_trunc_ftz_f : ClangBuiltin<"__nvvm_trunc_ftz_f">,
0934       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0935   def int_nvvm_trunc_f : ClangBuiltin<"__nvvm_trunc_f">,
0936       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0937 
0938   def int_nvvm_trunc_d : ClangBuiltin<"__nvvm_trunc_d">,
0939       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0940 
0941 //
0942 // Saturate
0943 //
0944 
0945   def int_nvvm_saturate_ftz_f : ClangBuiltin<"__nvvm_saturate_ftz_f">,
0946       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0947   def int_nvvm_saturate_f : ClangBuiltin<"__nvvm_saturate_f">,
0948       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
0949 
0950   def int_nvvm_saturate_d : ClangBuiltin<"__nvvm_saturate_d">,
0951       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
0952 
0953 //
0954 // Exp2  Log2
0955 //
0956 
0957   def int_nvvm_ex2_approx_ftz_f : ClangBuiltin<"__nvvm_ex2_approx_ftz_f">,
0958       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0959   def int_nvvm_ex2_approx_f : ClangBuiltin<"__nvvm_ex2_approx_f">,
0960       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0961   def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
0962       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
0963   def int_nvvm_ex2_approx_f16 :
0964       DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
0965   def int_nvvm_ex2_approx_f16x2 :
0966       DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
0967 
0968   def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
0969       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0970   def int_nvvm_lg2_approx_f : ClangBuiltin<"__nvvm_lg2_approx_f">,
0971       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0972   def int_nvvm_lg2_approx_d : ClangBuiltin<"__nvvm_lg2_approx_d">,
0973       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
0974 
0975 //
0976 // Sin  Cos
0977 //
0978 
0979   def int_nvvm_sin_approx_ftz_f : ClangBuiltin<"__nvvm_sin_approx_ftz_f">,
0980       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0981   def int_nvvm_sin_approx_f : ClangBuiltin<"__nvvm_sin_approx_f">,
0982       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0983 
0984   def int_nvvm_cos_approx_ftz_f : ClangBuiltin<"__nvvm_cos_approx_ftz_f">,
0985       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0986   def int_nvvm_cos_approx_f : ClangBuiltin<"__nvvm_cos_approx_f">,
0987       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
0988 
0989 //
0990 // Fma
0991 //
0992 
0993   foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
0994     "_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
0995     def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_half_ty],
0996       [llvm_half_ty, llvm_half_ty, llvm_half_ty],
0997       [IntrNoMem, IntrSpeculatable]>;
0998   }
0999 
1000   foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
1001     "_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
1002     def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_v2f16_ty],
1003       [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
1004       [IntrNoMem, IntrSpeculatable]>;
1005   }
1006 
1007   foreach variant = ["_rn_bf16", "_rn_ftz_bf16", "_rn_sat_bf16",
1008     "_rn_ftz_sat_bf16", "_rn_relu_bf16", "_rn_ftz_relu_bf16"] in {
1009     def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1010       DefaultAttrsIntrinsic<[llvm_bfloat_ty],
1011         [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty],
1012         [IntrNoMem, IntrSpeculatable]>;
1013   }
1014 
1015   foreach variant = ["_rn_bf16x2", "_rn_ftz_bf16x2", "_rn_sat_bf16x2",
1016     "_rn_ftz_sat_bf16x2", "_rn_relu_bf16x2", "_rn_ftz_relu_bf16x2"] in {
1017     def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1018       DefaultAttrsIntrinsic<[llvm_v2bf16_ty],
1019         [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty],
1020         [IntrNoMem, IntrSpeculatable]>;
1021   }
1022 
1023   foreach variant = ["_rn_ftz_f", "_rn_f", "_rz_ftz_f", "_rz_f", "_rm_ftz_f",
1024     "_rm_f", "_rp_ftz_f", "_rp_f"] in {
1025     def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1026       DefaultAttrsIntrinsic<[llvm_float_ty],
1027         [llvm_float_ty, llvm_float_ty, llvm_float_ty],
1028         [IntrNoMem, IntrSpeculatable]>;
1029   }
1030 
1031   foreach variant = ["_rn_d", "_rz_d", "_rm_d", "_rp_d"] in {
1032     def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
1033       DefaultAttrsIntrinsic<[llvm_double_ty],
1034         [llvm_double_ty, llvm_double_ty, llvm_double_ty],
1035         [IntrNoMem, IntrSpeculatable]>;
1036   }
1037 
1038 //
1039 // Rcp
1040 //
1041 
1042   def int_nvvm_rcp_rn_ftz_f : ClangBuiltin<"__nvvm_rcp_rn_ftz_f">,
1043       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1044   def int_nvvm_rcp_rn_f : ClangBuiltin<"__nvvm_rcp_rn_f">,
1045       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1046   def int_nvvm_rcp_rz_ftz_f : ClangBuiltin<"__nvvm_rcp_rz_ftz_f">,
1047       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1048   def int_nvvm_rcp_rz_f : ClangBuiltin<"__nvvm_rcp_rz_f">,
1049       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1050   def int_nvvm_rcp_rm_ftz_f : ClangBuiltin<"__nvvm_rcp_rm_ftz_f">,
1051       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1052   def int_nvvm_rcp_rm_f : ClangBuiltin<"__nvvm_rcp_rm_f">,
1053       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1054   def int_nvvm_rcp_rp_ftz_f : ClangBuiltin<"__nvvm_rcp_rp_ftz_f">,
1055       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1056   def int_nvvm_rcp_rp_f : ClangBuiltin<"__nvvm_rcp_rp_f">,
1057       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1058 
1059   def int_nvvm_rcp_rn_d : ClangBuiltin<"__nvvm_rcp_rn_d">,
1060       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1061   def int_nvvm_rcp_rz_d : ClangBuiltin<"__nvvm_rcp_rz_d">,
1062       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1063   def int_nvvm_rcp_rm_d : ClangBuiltin<"__nvvm_rcp_rm_d">,
1064       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1065   def int_nvvm_rcp_rp_d : ClangBuiltin<"__nvvm_rcp_rp_d">,
1066       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1067 
1068   def int_nvvm_rcp_approx_ftz_f : ClangBuiltin<"__nvvm_rcp_approx_ftz_f">,
1069       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1070   def int_nvvm_rcp_approx_ftz_d : ClangBuiltin<"__nvvm_rcp_approx_ftz_d">,
1071       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1072 
1073 //
1074 // Sqrt
1075 //
1076 
1077   def int_nvvm_sqrt_f : ClangBuiltin<"__nvvm_sqrt_f">,
1078       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1079   def int_nvvm_sqrt_rn_ftz_f : ClangBuiltin<"__nvvm_sqrt_rn_ftz_f">,
1080       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1081   def int_nvvm_sqrt_rn_f : ClangBuiltin<"__nvvm_sqrt_rn_f">,
1082       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1083   def int_nvvm_sqrt_rz_ftz_f : ClangBuiltin<"__nvvm_sqrt_rz_ftz_f">,
1084       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1085   def int_nvvm_sqrt_rz_f : ClangBuiltin<"__nvvm_sqrt_rz_f">,
1086       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1087   def int_nvvm_sqrt_rm_ftz_f : ClangBuiltin<"__nvvm_sqrt_rm_ftz_f">,
1088       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1089   def int_nvvm_sqrt_rm_f : ClangBuiltin<"__nvvm_sqrt_rm_f">,
1090       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1091   def int_nvvm_sqrt_rp_ftz_f : ClangBuiltin<"__nvvm_sqrt_rp_ftz_f">,
1092       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1093   def int_nvvm_sqrt_rp_f : ClangBuiltin<"__nvvm_sqrt_rp_f">,
1094       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1095   def int_nvvm_sqrt_approx_ftz_f : ClangBuiltin<"__nvvm_sqrt_approx_ftz_f">,
1096       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1097   def int_nvvm_sqrt_approx_f : ClangBuiltin<"__nvvm_sqrt_approx_f">,
1098       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1099 
1100   def int_nvvm_sqrt_rn_d : ClangBuiltin<"__nvvm_sqrt_rn_d">,
1101       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1102   def int_nvvm_sqrt_rz_d : ClangBuiltin<"__nvvm_sqrt_rz_d">,
1103       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1104   def int_nvvm_sqrt_rm_d : ClangBuiltin<"__nvvm_sqrt_rm_d">,
1105       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1106   def int_nvvm_sqrt_rp_d : ClangBuiltin<"__nvvm_sqrt_rp_d">,
1107       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1108 
1109 //
1110 // Rsqrt
1111 //
1112 
1113   def int_nvvm_rsqrt_approx_ftz_f : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
1114       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1115   def int_nvvm_rsqrt_approx_ftz_d : ClangBuiltin<"__nvvm_rsqrt_approx_ftz_d">,
1116       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1117   def int_nvvm_rsqrt_approx_f : ClangBuiltin<"__nvvm_rsqrt_approx_f">,
1118       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
1119   def int_nvvm_rsqrt_approx_d : ClangBuiltin<"__nvvm_rsqrt_approx_d">,
1120       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
1121 
1122 //
1123 // Add
1124 //
1125 
1126   def int_nvvm_add_rn_ftz_f : ClangBuiltin<"__nvvm_add_rn_ftz_f">,
1127       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1128         [IntrNoMem, IntrSpeculatable, Commutative]>;
1129   def int_nvvm_add_rn_f : ClangBuiltin<"__nvvm_add_rn_f">,
1130       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1131         [IntrNoMem, IntrSpeculatable, Commutative]>;
1132   def int_nvvm_add_rz_ftz_f : ClangBuiltin<"__nvvm_add_rz_ftz_f">,
1133       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1134         [IntrNoMem, IntrSpeculatable, Commutative]>;
1135   def int_nvvm_add_rz_f : ClangBuiltin<"__nvvm_add_rz_f">,
1136       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1137         [IntrNoMem, IntrSpeculatable, Commutative]>;
1138   def int_nvvm_add_rm_ftz_f : ClangBuiltin<"__nvvm_add_rm_ftz_f">,
1139       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1140         [IntrNoMem, IntrSpeculatable, Commutative]>;
1141   def int_nvvm_add_rm_f : ClangBuiltin<"__nvvm_add_rm_f">,
1142       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1143         [IntrNoMem, IntrSpeculatable, Commutative]>;
1144   def int_nvvm_add_rp_ftz_f : ClangBuiltin<"__nvvm_add_rp_ftz_f">,
1145       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1146         [IntrNoMem, IntrSpeculatable, Commutative]>;
1147   def int_nvvm_add_rp_f : ClangBuiltin<"__nvvm_add_rp_f">,
1148       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
1149         [IntrNoMem, IntrSpeculatable, Commutative]>;
1150 
1151   def int_nvvm_add_rn_d : ClangBuiltin<"__nvvm_add_rn_d">,
1152       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1153         [IntrNoMem, IntrSpeculatable, Commutative]>;
1154   def int_nvvm_add_rz_d : ClangBuiltin<"__nvvm_add_rz_d">,
1155       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1156         [IntrNoMem, IntrSpeculatable, Commutative]>;
1157   def int_nvvm_add_rm_d : ClangBuiltin<"__nvvm_add_rm_d">,
1158       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1159         [IntrNoMem, IntrSpeculatable, Commutative]>;
1160   def int_nvvm_add_rp_d : ClangBuiltin<"__nvvm_add_rp_d">,
1161       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
1162         [IntrNoMem, IntrSpeculatable, Commutative]>;
1163 
1164 //
1165 // Dot Product
1166 //
1167   foreach a_type = ["s", "u"] in {
1168     foreach b_type = ["s", "u"] in {
1169       def int_nvvm_idp4a_ # a_type # _ # b_type :
1170           DefaultAttrsIntrinsic<[llvm_i32_ty],
1171               [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1172               [IntrNoMem, IntrSpeculatable]>;
1173       def int_nvvm_idp2a_ # a_type # _ # b_type :
1174           DefaultAttrsIntrinsic<[llvm_i32_ty],
1175             [llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
1176             [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>;
1177     }
1178   }
1179 
1180 //
1181 // Funnel-shift
1182 //
1183   foreach direction = ["l", "r"] in
1184     def int_nvvm_fsh # direction # _clamp :
1185       DefaultAttrsIntrinsic<[llvm_anyint_ty],
1186         [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
1187         [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
1188 
1189 //
1190 // FLO - Find Leading One
1191 //
1192   foreach sign = ["s", "u"] in
1193     def int_nvvm_flo_ # sign :
1194       DefaultAttrsIntrinsic<[llvm_i32_ty],
1195         [llvm_anyint_ty, llvm_i1_ty],
1196         [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>]>;
1197 
1198 //
1199 // Convert
1200 //
1201 
1202   def int_nvvm_d2f_rn_ftz : ClangBuiltin<"__nvvm_d2f_rn_ftz">,
1203       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1204   def int_nvvm_d2f_rn : ClangBuiltin<"__nvvm_d2f_rn">,
1205       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1206   def int_nvvm_d2f_rz_ftz : ClangBuiltin<"__nvvm_d2f_rz_ftz">,
1207       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1208   def int_nvvm_d2f_rz : ClangBuiltin<"__nvvm_d2f_rz">,
1209       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1210   def int_nvvm_d2f_rm_ftz : ClangBuiltin<"__nvvm_d2f_rm_ftz">,
1211       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1212   def int_nvvm_d2f_rm : ClangBuiltin<"__nvvm_d2f_rm">,
1213       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1214   def int_nvvm_d2f_rp_ftz : ClangBuiltin<"__nvvm_d2f_rp_ftz">,
1215       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1216   def int_nvvm_d2f_rp : ClangBuiltin<"__nvvm_d2f_rp">,
1217       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1218 
1219   def int_nvvm_d2i_rn : ClangBuiltin<"__nvvm_d2i_rn">,
1220       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1221   def int_nvvm_d2i_rz : ClangBuiltin<"__nvvm_d2i_rz">,
1222       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1223   def int_nvvm_d2i_rm : ClangBuiltin<"__nvvm_d2i_rm">,
1224       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1225   def int_nvvm_d2i_rp : ClangBuiltin<"__nvvm_d2i_rp">,
1226       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1227 
1228   def int_nvvm_d2ui_rn : ClangBuiltin<"__nvvm_d2ui_rn">,
1229       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1230   def int_nvvm_d2ui_rz : ClangBuiltin<"__nvvm_d2ui_rz">,
1231       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1232   def int_nvvm_d2ui_rm : ClangBuiltin<"__nvvm_d2ui_rm">,
1233       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1234   def int_nvvm_d2ui_rp : ClangBuiltin<"__nvvm_d2ui_rp">,
1235       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1236 
1237   def int_nvvm_i2d_rn : ClangBuiltin<"__nvvm_i2d_rn">,
1238       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1239   def int_nvvm_i2d_rz : ClangBuiltin<"__nvvm_i2d_rz">,
1240       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1241   def int_nvvm_i2d_rm : ClangBuiltin<"__nvvm_i2d_rm">,
1242       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1243   def int_nvvm_i2d_rp : ClangBuiltin<"__nvvm_i2d_rp">,
1244       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1245 
1246   def int_nvvm_ui2d_rn : ClangBuiltin<"__nvvm_ui2d_rn">,
1247       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1248   def int_nvvm_ui2d_rz : ClangBuiltin<"__nvvm_ui2d_rz">,
1249       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1250   def int_nvvm_ui2d_rm : ClangBuiltin<"__nvvm_ui2d_rm">,
1251       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1252   def int_nvvm_ui2d_rp : ClangBuiltin<"__nvvm_ui2d_rp">,
1253       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1254 
1255   def int_nvvm_f2i_rn_ftz : ClangBuiltin<"__nvvm_f2i_rn_ftz">,
1256       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1257   def int_nvvm_f2i_rn : ClangBuiltin<"__nvvm_f2i_rn">,
1258       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1259   def int_nvvm_f2i_rz_ftz : ClangBuiltin<"__nvvm_f2i_rz_ftz">,
1260       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1261   def int_nvvm_f2i_rz : ClangBuiltin<"__nvvm_f2i_rz">,
1262       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1263   def int_nvvm_f2i_rm_ftz : ClangBuiltin<"__nvvm_f2i_rm_ftz">,
1264       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1265   def int_nvvm_f2i_rm : ClangBuiltin<"__nvvm_f2i_rm">,
1266       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1267   def int_nvvm_f2i_rp_ftz : ClangBuiltin<"__nvvm_f2i_rp_ftz">,
1268       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1269   def int_nvvm_f2i_rp : ClangBuiltin<"__nvvm_f2i_rp">,
1270       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1271 
1272   def int_nvvm_f2ui_rn_ftz : ClangBuiltin<"__nvvm_f2ui_rn_ftz">,
1273       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1274   def int_nvvm_f2ui_rn : ClangBuiltin<"__nvvm_f2ui_rn">,
1275       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1276   def int_nvvm_f2ui_rz_ftz : ClangBuiltin<"__nvvm_f2ui_rz_ftz">,
1277       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1278   def int_nvvm_f2ui_rz : ClangBuiltin<"__nvvm_f2ui_rz">,
1279       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1280   def int_nvvm_f2ui_rm_ftz : ClangBuiltin<"__nvvm_f2ui_rm_ftz">,
1281       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1282   def int_nvvm_f2ui_rm : ClangBuiltin<"__nvvm_f2ui_rm">,
1283       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1284   def int_nvvm_f2ui_rp_ftz : ClangBuiltin<"__nvvm_f2ui_rp_ftz">,
1285       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1286   def int_nvvm_f2ui_rp : ClangBuiltin<"__nvvm_f2ui_rp">,
1287       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1288 
1289   def int_nvvm_i2f_rn : ClangBuiltin<"__nvvm_i2f_rn">,
1290       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1291   def int_nvvm_i2f_rz : ClangBuiltin<"__nvvm_i2f_rz">,
1292       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1293   def int_nvvm_i2f_rm : ClangBuiltin<"__nvvm_i2f_rm">,
1294       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1295   def int_nvvm_i2f_rp : ClangBuiltin<"__nvvm_i2f_rp">,
1296       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1297 
1298   def int_nvvm_ui2f_rn : ClangBuiltin<"__nvvm_ui2f_rn">,
1299       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1300   def int_nvvm_ui2f_rz : ClangBuiltin<"__nvvm_ui2f_rz">,
1301       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1302   def int_nvvm_ui2f_rm : ClangBuiltin<"__nvvm_ui2f_rm">,
1303       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1304   def int_nvvm_ui2f_rp : ClangBuiltin<"__nvvm_ui2f_rp">,
1305       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
1306 
1307   def int_nvvm_lohi_i2d : ClangBuiltin<"__nvvm_lohi_i2d">,
1308       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
1309         [IntrNoMem, IntrSpeculatable, Commutative]>;
1310 
1311   def int_nvvm_d2i_lo : ClangBuiltin<"__nvvm_d2i_lo">,
1312       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1313   def int_nvvm_d2i_hi : ClangBuiltin<"__nvvm_d2i_hi">,
1314       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1315 
1316   def int_nvvm_f2ll_rn_ftz : ClangBuiltin<"__nvvm_f2ll_rn_ftz">,
1317       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1318   def int_nvvm_f2ll_rn : ClangBuiltin<"__nvvm_f2ll_rn">,
1319       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1320   def int_nvvm_f2ll_rz_ftz : ClangBuiltin<"__nvvm_f2ll_rz_ftz">,
1321       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1322   def int_nvvm_f2ll_rz : ClangBuiltin<"__nvvm_f2ll_rz">,
1323       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1324   def int_nvvm_f2ll_rm_ftz : ClangBuiltin<"__nvvm_f2ll_rm_ftz">,
1325       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1326   def int_nvvm_f2ll_rm : ClangBuiltin<"__nvvm_f2ll_rm">,
1327       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1328   def int_nvvm_f2ll_rp_ftz : ClangBuiltin<"__nvvm_f2ll_rp_ftz">,
1329       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1330   def int_nvvm_f2ll_rp : ClangBuiltin<"__nvvm_f2ll_rp">,
1331       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1332 
1333   def int_nvvm_f2ull_rn_ftz : ClangBuiltin<"__nvvm_f2ull_rn_ftz">,
1334       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1335   def int_nvvm_f2ull_rn : ClangBuiltin<"__nvvm_f2ull_rn">,
1336       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1337   def int_nvvm_f2ull_rz_ftz : ClangBuiltin<"__nvvm_f2ull_rz_ftz">,
1338       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1339   def int_nvvm_f2ull_rz : ClangBuiltin<"__nvvm_f2ull_rz">,
1340       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1341   def int_nvvm_f2ull_rm_ftz : ClangBuiltin<"__nvvm_f2ull_rm_ftz">,
1342       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1343   def int_nvvm_f2ull_rm : ClangBuiltin<"__nvvm_f2ull_rm">,
1344       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1345   def int_nvvm_f2ull_rp_ftz : ClangBuiltin<"__nvvm_f2ull_rp_ftz">,
1346       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1347   def int_nvvm_f2ull_rp : ClangBuiltin<"__nvvm_f2ull_rp">,
1348       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1349 
1350   def int_nvvm_d2ll_rn : ClangBuiltin<"__nvvm_d2ll_rn">,
1351       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1352   def int_nvvm_d2ll_rz : ClangBuiltin<"__nvvm_d2ll_rz">,
1353       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1354   def int_nvvm_d2ll_rm : ClangBuiltin<"__nvvm_d2ll_rm">,
1355       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1356   def int_nvvm_d2ll_rp : ClangBuiltin<"__nvvm_d2ll_rp">,
1357       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1358 
1359   def int_nvvm_d2ull_rn : ClangBuiltin<"__nvvm_d2ull_rn">,
1360       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1361   def int_nvvm_d2ull_rz : ClangBuiltin<"__nvvm_d2ull_rz">,
1362       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1363   def int_nvvm_d2ull_rm : ClangBuiltin<"__nvvm_d2ull_rm">,
1364       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1365   def int_nvvm_d2ull_rp : ClangBuiltin<"__nvvm_d2ull_rp">,
1366       DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
1367 
1368   def int_nvvm_ll2f_rn : ClangBuiltin<"__nvvm_ll2f_rn">,
1369       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1370   def int_nvvm_ll2f_rz : ClangBuiltin<"__nvvm_ll2f_rz">,
1371       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1372   def int_nvvm_ll2f_rm : ClangBuiltin<"__nvvm_ll2f_rm">,
1373       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1374   def int_nvvm_ll2f_rp : ClangBuiltin<"__nvvm_ll2f_rp">,
1375       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1376   def int_nvvm_ull2f_rn : ClangBuiltin<"__nvvm_ull2f_rn">,
1377       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1378   def int_nvvm_ull2f_rz : ClangBuiltin<"__nvvm_ull2f_rz">,
1379       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1380   def int_nvvm_ull2f_rm : ClangBuiltin<"__nvvm_ull2f_rm">,
1381       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1382   def int_nvvm_ull2f_rp : ClangBuiltin<"__nvvm_ull2f_rp">,
1383       DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1384 
1385   def int_nvvm_ll2d_rn : ClangBuiltin<"__nvvm_ll2d_rn">,
1386       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1387   def int_nvvm_ll2d_rz : ClangBuiltin<"__nvvm_ll2d_rz">,
1388       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1389   def int_nvvm_ll2d_rm : ClangBuiltin<"__nvvm_ll2d_rm">,
1390       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1391   def int_nvvm_ll2d_rp : ClangBuiltin<"__nvvm_ll2d_rp">,
1392       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1393   def int_nvvm_ull2d_rn : ClangBuiltin<"__nvvm_ull2d_rn">,
1394       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1395   def int_nvvm_ull2d_rz : ClangBuiltin<"__nvvm_ull2d_rz">,
1396       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1397   def int_nvvm_ull2d_rm : ClangBuiltin<"__nvvm_ull2d_rm">,
1398       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1399   def int_nvvm_ull2d_rp : ClangBuiltin<"__nvvm_ull2d_rp">,
1400       DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
1401 
1402   def int_nvvm_f2h_rn_ftz : ClangBuiltin<"__nvvm_f2h_rn_ftz">,
1403       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1404   def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">,
1405       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
1406 
1407   def int_nvvm_bf2h_rn_ftz : ClangBuiltin<"__nvvm_bf2h_rn_ftz">,
1408       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1409   def int_nvvm_bf2h_rn : ClangBuiltin<"__nvvm_bf2h_rn">,
1410       DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
1411 
1412   def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">,
1413        Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1414   def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
1415       Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1416   def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">,
1417       Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1418   def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
1419       Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
1420 
1421   def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">,
1422       Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1423   def int_nvvm_ff2f16x2_rn_relu : ClangBuiltin<"__nvvm_ff2f16x2_rn_relu">,
1424       Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1425   def int_nvvm_ff2f16x2_rz : ClangBuiltin<"__nvvm_ff2f16x2_rz">,
1426       Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1427   def int_nvvm_ff2f16x2_rz_relu : ClangBuiltin<"__nvvm_ff2f16x2_rz_relu">,
1428       Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1429 
1430   def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">,
1431       Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1432   def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">,
1433       Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1434   def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">,
1435       Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1436   def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">,
1437        Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1438 
1439   def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">,
1440       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1441   def int_nvvm_f2tf32_rna_satfinite : ClangBuiltin<"__nvvm_f2tf32_rna_satfinite">,
1442       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1443   def int_nvvm_f2tf32_rn : ClangBuiltin<"__nvvm_f2tf32_rn">,
1444       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1445   def int_nvvm_f2tf32_rn_relu : ClangBuiltin<"__nvvm_f2tf32_rn_relu">,
1446       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1447   def int_nvvm_f2tf32_rn_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_satfinite">,
1448       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1449   def int_nvvm_f2tf32_rn_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_relu_satfinite">,
1450       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1451   def int_nvvm_f2tf32_rz : ClangBuiltin<"__nvvm_f2tf32_rz">,
1452       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1453   def int_nvvm_f2tf32_rz_relu : ClangBuiltin<"__nvvm_f2tf32_rz_relu">,
1454       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1455   def int_nvvm_f2tf32_rz_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_satfinite">,
1456       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1457   def int_nvvm_f2tf32_rz_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_relu_satfinite">,
1458       Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1459 
1460   def int_nvvm_ff_to_e4m3x2_rn : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn">,
1461       Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1462   def int_nvvm_ff_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn_relu">,
1463       Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1464   def int_nvvm_ff_to_e5m2x2_rn : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn">,
1465       Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1466   def int_nvvm_ff_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_ff_to_e5m2x2_rn_relu">,
1467       Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
1468 
1469   def int_nvvm_f16x2_to_e4m3x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn">,
1470       Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1471   def int_nvvm_f16x2_to_e4m3x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e4m3x2_rn_relu">,
1472       Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1473   def int_nvvm_f16x2_to_e5m2x2_rn : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn">,
1474       Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1475   def int_nvvm_f16x2_to_e5m2x2_rn_relu : ClangBuiltin<"__nvvm_f16x2_to_e5m2x2_rn_relu">,
1476       Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>;
1477 
1478   def int_nvvm_e4m3x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn">,
1479       Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1480   def int_nvvm_e4m3x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e4m3x2_to_f16x2_rn_relu">,
1481       Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1482   def int_nvvm_e5m2x2_to_f16x2_rn : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn">,
1483       Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1484   def int_nvvm_e5m2x2_to_f16x2_rn_relu : ClangBuiltin<"__nvvm_e5m2x2_to_f16x2_rn_relu">,
1485       Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>;
1486 
1487 // FNS
1488 
1489   def int_nvvm_fns : ClangBuiltin<"__nvvm_fns">,
1490       DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1491                 [IntrNoMem]>;
1492 
1493 // Atomics not available as llvm intrinsics.
1494   def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
1495           [llvm_anyptr_ty, llvm_i32_ty],
1496                                       [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1497   def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
1498           [llvm_anyptr_ty, llvm_i32_ty],
1499                                       [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1500 
1501   class SCOPED_ATOMIC2_impl<LLVMType elty>
1502         : Intrinsic<[elty],
1503           [llvm_anyptr_ty, LLVMMatchType<0>],
1504           [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1505   class SCOPED_ATOMIC3_impl<LLVMType elty>
1506         : Intrinsic<[elty],
1507           [llvm_anyptr_ty, LLVMMatchType<0>,
1508            LLVMMatchType<0>],
1509           [IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
1510 
1511   multiclass PTXAtomicWithScope2<LLVMType elty> {
1512     def _cta : SCOPED_ATOMIC2_impl<elty>;
1513     def _sys : SCOPED_ATOMIC2_impl<elty>;
1514   }
1515   multiclass PTXAtomicWithScope3<LLVMType elty> {
1516     def _cta : SCOPED_ATOMIC3_impl<elty>;
1517     def _sys : SCOPED_ATOMIC3_impl<elty>;
1518   }
1519   multiclass PTXAtomicWithScope2_fi {
1520     defm _f: PTXAtomicWithScope2<llvm_anyfloat_ty>;
1521     defm _i: PTXAtomicWithScope2<llvm_anyint_ty>;
1522   }
1523   defm int_nvvm_atomic_add_gen   : PTXAtomicWithScope2_fi;
1524   defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1525   defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1526   defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2<llvm_anyint_ty>;
1527   defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1528   defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1529   defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1530   defm int_nvvm_atomic_or_gen_i  : PTXAtomicWithScope2<llvm_anyint_ty>;
1531   defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
1532   defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
1533 
1534 // Bar.Sync
1535 
1536   // The builtin for "bar.sync 0" is called __syncthreads.  Unlike most of the
1537   // intrinsics in this file, this one is a user-facing API.
1538   def int_nvvm_barrier0 : ClangBuiltin<"__syncthreads">,
1539       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1540   // Synchronize all threads in the CTA at barrier 'n'.
1541   def int_nvvm_barrier_n : ClangBuiltin<"__nvvm_bar_n">,
1542       Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1543   // Synchronize 'm', a multiple of warp size, (arg 2) threads in
1544   // the CTA at barrier 'n' (arg 1).
1545   def int_nvvm_barrier : ClangBuiltin<"__nvvm_bar">,
1546       Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1547   def int_nvvm_barrier0_popc : ClangBuiltin<"__nvvm_bar0_popc">,
1548       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1549   def int_nvvm_barrier0_and : ClangBuiltin<"__nvvm_bar0_and">,
1550       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1551   def int_nvvm_barrier0_or : ClangBuiltin<"__nvvm_bar0_or">,
1552       Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
1553 
1554   def int_nvvm_bar_sync :
1555       Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1556       ClangBuiltin<"__nvvm_bar_sync">;
1557   def int_nvvm_bar_warp_sync :
1558       Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1559       ClangBuiltin<"__nvvm_bar_warp_sync">;
1560 
1561   // barrier.sync id[, cnt]
1562   def int_nvvm_barrier_sync :
1563       Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1564       ClangBuiltin<"__nvvm_barrier_sync">;
1565   def int_nvvm_barrier_sync_cnt :
1566       Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
1567       ClangBuiltin<"__nvvm_barrier_sync_cnt">;
1568 
1569   // barrier.cluster.[wait, arrive, arrive.relaxed]
1570   def int_nvvm_barrier_cluster_arrive :
1571       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1572   def int_nvvm_barrier_cluster_arrive_relaxed :
1573       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1574   def int_nvvm_barrier_cluster_wait :
1575       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1576 
1577   // 'aligned' versions of the above barrier.cluster.* intrinsics
1578   def int_nvvm_barrier_cluster_arrive_aligned :
1579       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1580   def int_nvvm_barrier_cluster_arrive_relaxed_aligned :
1581       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1582   def int_nvvm_barrier_cluster_wait_aligned :
1583       Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
1584 
1585   // Membar
1586   def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
1587       Intrinsic<[], [], [IntrNoCallback]>;
1588   def int_nvvm_membar_gl : ClangBuiltin<"__nvvm_membar_gl">,
1589       Intrinsic<[], [], [IntrNoCallback]>;
1590   def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">,
1591       Intrinsic<[], [], [IntrNoCallback]>;
1592   def int_nvvm_fence_sc_cluster:
1593       Intrinsic<[], [], [IntrNoCallback]>;
1594 
1595 // Proxy fence (uni-directional)
1596 foreach scope = ["cta", "cluster", "gpu", "sys"] in {
1597 
1598   def int_nvvm_fence_proxy_tensormap_generic_release_ # scope:
1599         Intrinsic<[], [], [IntrNoCallback],
1600         "llvm.nvvm.fence.proxy.tensormap_generic.release." # scope>;
1601 
1602   def int_nvvm_fence_proxy_tensormap_generic_acquire_ # scope:
1603         Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
1604                   [IntrNoCallback, IntrArgMemOnly, ImmArg<ArgIndex<1>>],
1605                   "llvm.nvvm.fence.proxy.tensormap_generic.acquire." # scope>;
1606 
1607 }
1608 
1609 // Async Copy
1610 def int_nvvm_cp_async_mbarrier_arrive :
1611     ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
1612     Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1613 def int_nvvm_cp_async_mbarrier_arrive_shared :
1614     ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
1615     Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1616 def int_nvvm_cp_async_mbarrier_arrive_noinc :
1617     ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
1618     Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1619 def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
1620     ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
1621     Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1622 
1623 multiclass CP_ASYNC_SHARED_GLOBAL<string n, string cc> {
1624   def NAME: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty],
1625         [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1626         WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1627         "llvm.nvvm.cp.async." # cc # ".shared.global." # n>;
1628   def _s: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty],
1629         [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
1630         WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
1631         "llvm.nvvm.cp.async." # cc # ".shared.global." # n # ".s">;
1632 }
1633 
1634 defm int_nvvm_cp_async_ca_shared_global_4 : CP_ASYNC_SHARED_GLOBAL<"4", "ca">;
1635 defm int_nvvm_cp_async_ca_shared_global_8 : CP_ASYNC_SHARED_GLOBAL<"8", "ca">;
1636 defm int_nvvm_cp_async_ca_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "ca">;
1637 defm int_nvvm_cp_async_cg_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "cg">;
1638 
1639 def int_nvvm_cp_async_commit_group :
1640     ClangBuiltin<"__nvvm_cp_async_commit_group">,
1641     Intrinsic<[],[],[]>;
1642 
1643 def int_nvvm_cp_async_wait_group :
1644     ClangBuiltin<"__nvvm_cp_async_wait_group">,
1645     Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1646 
1647 def int_nvvm_cp_async_wait_all :
1648     ClangBuiltin<"__nvvm_cp_async_wait_all">,
1649     Intrinsic<[],[],[]>;
1650 
1651 // cp.async.bulk variants of the commit/wait group
1652 def int_nvvm_cp_async_bulk_commit_group :
1653     Intrinsic<[],[],[]>;
1654 
1655 def int_nvvm_cp_async_bulk_wait_group :
1656     Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1657 
1658 def int_nvvm_cp_async_bulk_wait_group_read :
1659     Intrinsic<[],[llvm_i32_ty],[ImmArg<ArgIndex<0>>]>;
1660 
1661 // mbarrier
1662 def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
1663     Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1664 def int_nvvm_mbarrier_init_shared :
1665     ClangBuiltin<"__nvvm_mbarrier_init_shared">,
1666     Intrinsic<[],[llvm_shared_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1667 
1668 def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">,
1669     Intrinsic<[],[llvm_ptr_ty],
1670     [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1671     WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1672 def int_nvvm_mbarrier_inval_shared :
1673     ClangBuiltin<"__nvvm_mbarrier_inval_shared">,
1674     Intrinsic<[],[llvm_shared_ptr_ty],
1675     [IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
1676     WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
1677 
1678 def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">,
1679     Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1680 def int_nvvm_mbarrier_arrive_shared :
1681     ClangBuiltin<"__nvvm_mbarrier_arrive_shared">,
1682     Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1683 def int_nvvm_mbarrier_arrive_noComplete :
1684     ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
1685     Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1686 def int_nvvm_mbarrier_arrive_noComplete_shared :
1687     ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
1688     Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1689     llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1690 
1691 def int_nvvm_mbarrier_arrive_drop :
1692     ClangBuiltin<"__nvvm_mbarrier_arrive_drop">,
1693     Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1694 def int_nvvm_mbarrier_arrive_drop_shared :
1695     ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
1696     Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
1697 def int_nvvm_mbarrier_arrive_drop_noComplete :
1698     ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
1699     Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1700 def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
1701     ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
1702     Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
1703     llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
1704 
1705 def int_nvvm_mbarrier_test_wait :
1706     ClangBuiltin<"__nvvm_mbarrier_test_wait">,
1707     Intrinsic<[llvm_i1_ty],[llvm_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1708 def int_nvvm_mbarrier_test_wait_shared :
1709     ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">,
1710     Intrinsic<[llvm_i1_ty],[llvm_shared_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
1711 
1712 def int_nvvm_mbarrier_pending_count :
1713     ClangBuiltin<"__nvvm_mbarrier_pending_count">,
1714     Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>;
1715 
1716 // Generated within nvvm. Use for ldu on sm_20 or later.  Second arg is the
1717 // pointer's alignment.
1718 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
1719   [llvm_anyptr_ty, llvm_i32_ty],
1720   [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1721   "llvm.nvvm.ldu.global.i">;
1722 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
1723   [llvm_anyptr_ty, llvm_i32_ty],
1724   [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1725   "llvm.nvvm.ldu.global.f">;
1726 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
1727   [llvm_anyptr_ty, llvm_i32_ty],
1728   [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
1729   "llvm.nvvm.ldu.global.p">;
1730 
1731 // Used in nvvm internally to help address space opt and ptx code generation
1732 // This is for params that are passed to kernel functions by pointer by-val.
1733 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
1734                                      [llvm_anyptr_ty],
1735                                    [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1736                                    "llvm.nvvm.ptr.gen.to.param">;
1737 
1738 // sm70+, PTX7.7+
1739 def int_nvvm_ptr_param_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
1740                                      [llvm_anyptr_ty],
1741                                    [IntrNoMem, IntrSpeculatable, IntrNoCallback],
1742                                    "llvm.nvvm.ptr.param.to.gen">;
1743 
1744 // Move intrinsics, used in nvvm internally
1745 
1746 def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
1747   "llvm.nvvm.move.i16">;
1748 def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
1749   "llvm.nvvm.move.i32">;
1750 def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
1751   "llvm.nvvm.move.i64">;
1752 def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
1753   [IntrNoMem], "llvm.nvvm.move.float">;
1754 def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
1755   [IntrNoMem], "llvm.nvvm.move.double">;
1756 def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
1757   [IntrNoMem, NoCapture<ArgIndex<0>>], "llvm.nvvm.move.ptr">;
1758 
1759 
1760 // For getting the handle from a texture or surface variable
1761 def int_nvvm_texsurf_handle
1762   : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty],
1763               [IntrNoMem], "llvm.nvvm.texsurf.handle">;
1764 def int_nvvm_texsurf_handle_internal
1765   : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
1766               [IntrNoMem], "llvm.nvvm.texsurf.handle.internal">;
1767 
1768 /// Error / Warn
1769 def int_nvvm_compiler_error :
1770     Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
1771 def int_nvvm_compiler_warn :
1772     Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
1773 
1774 def int_nvvm_reflect :
1775   Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem], "llvm.nvvm.reflect">,
1776   ClangBuiltin<"__nvvm_reflect">;
1777 
1778 // isspacep.{const, global, local, shared}
1779 def int_nvvm_isspacep_const
1780   : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1781               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1782               "llvm.nvvm.isspacep.const">,
1783     ClangBuiltin<"__nvvm_isspacep_const">;
1784 def int_nvvm_isspacep_global
1785   : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1786               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1787               "llvm.nvvm.isspacep.global">,
1788     ClangBuiltin<"__nvvm_isspacep_global">;
1789 def int_nvvm_isspacep_local
1790   : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1791               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1792               "llvm.nvvm.isspacep.local">,
1793     ClangBuiltin<"__nvvm_isspacep_local">;
1794 def int_nvvm_isspacep_shared
1795   : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1796               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1797               "llvm.nvvm.isspacep.shared">,
1798     ClangBuiltin<"__nvvm_isspacep_shared">;
1799 def int_nvvm_isspacep_shared_cluster
1800   : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
1801               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
1802               "llvm.nvvm.isspacep.shared.cluster">;
1803 
1804 // Environment register read
1805 def int_nvvm_read_ptx_sreg_envreg0
1806   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1807               "llvm.nvvm.read.ptx.sreg.envreg0">,
1808     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
1809 def int_nvvm_read_ptx_sreg_envreg1
1810   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1811               "llvm.nvvm.read.ptx.sreg.envreg1">,
1812     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
1813 def int_nvvm_read_ptx_sreg_envreg2
1814   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1815               "llvm.nvvm.read.ptx.sreg.envreg2">,
1816     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
1817 def int_nvvm_read_ptx_sreg_envreg3
1818   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1819               "llvm.nvvm.read.ptx.sreg.envreg3">,
1820     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
1821 def int_nvvm_read_ptx_sreg_envreg4
1822   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1823               "llvm.nvvm.read.ptx.sreg.envreg4">,
1824     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
1825 def int_nvvm_read_ptx_sreg_envreg5
1826   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1827               "llvm.nvvm.read.ptx.sreg.envreg5">,
1828     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
1829 def int_nvvm_read_ptx_sreg_envreg6
1830   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1831               "llvm.nvvm.read.ptx.sreg.envreg6">,
1832     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
1833 def int_nvvm_read_ptx_sreg_envreg7
1834   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1835               "llvm.nvvm.read.ptx.sreg.envreg7">,
1836     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
1837 def int_nvvm_read_ptx_sreg_envreg8
1838   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1839               "llvm.nvvm.read.ptx.sreg.envreg8">,
1840     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
1841 def int_nvvm_read_ptx_sreg_envreg9
1842   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1843               "llvm.nvvm.read.ptx.sreg.envreg9">,
1844     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
1845 def int_nvvm_read_ptx_sreg_envreg10
1846   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1847               "llvm.nvvm.read.ptx.sreg.envreg10">,
1848     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
1849 def int_nvvm_read_ptx_sreg_envreg11
1850   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1851               "llvm.nvvm.read.ptx.sreg.envreg11">,
1852     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
1853 def int_nvvm_read_ptx_sreg_envreg12
1854   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1855               "llvm.nvvm.read.ptx.sreg.envreg12">,
1856     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
1857 def int_nvvm_read_ptx_sreg_envreg13
1858   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1859               "llvm.nvvm.read.ptx.sreg.envreg13">,
1860     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
1861 def int_nvvm_read_ptx_sreg_envreg14
1862   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1863               "llvm.nvvm.read.ptx.sreg.envreg14">,
1864     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
1865 def int_nvvm_read_ptx_sreg_envreg15
1866   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1867               "llvm.nvvm.read.ptx.sreg.envreg15">,
1868     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
1869 def int_nvvm_read_ptx_sreg_envreg16
1870   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1871               "llvm.nvvm.read.ptx.sreg.envreg16">,
1872     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
1873 def int_nvvm_read_ptx_sreg_envreg17
1874   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1875               "llvm.nvvm.read.ptx.sreg.envreg17">,
1876     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
1877 def int_nvvm_read_ptx_sreg_envreg18
1878   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1879               "llvm.nvvm.read.ptx.sreg.envreg18">,
1880     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
1881 def int_nvvm_read_ptx_sreg_envreg19
1882   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1883               "llvm.nvvm.read.ptx.sreg.envreg19">,
1884     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
1885 def int_nvvm_read_ptx_sreg_envreg20
1886   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1887               "llvm.nvvm.read.ptx.sreg.envreg20">,
1888     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
1889 def int_nvvm_read_ptx_sreg_envreg21
1890   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1891               "llvm.nvvm.read.ptx.sreg.envreg21">,
1892     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
1893 def int_nvvm_read_ptx_sreg_envreg22
1894   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1895               "llvm.nvvm.read.ptx.sreg.envreg22">,
1896     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
1897 def int_nvvm_read_ptx_sreg_envreg23
1898   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1899               "llvm.nvvm.read.ptx.sreg.envreg23">,
1900     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
1901 def int_nvvm_read_ptx_sreg_envreg24
1902   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1903               "llvm.nvvm.read.ptx.sreg.envreg24">,
1904     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
1905 def int_nvvm_read_ptx_sreg_envreg25
1906   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1907               "llvm.nvvm.read.ptx.sreg.envreg25">,
1908     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
1909 def int_nvvm_read_ptx_sreg_envreg26
1910   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1911               "llvm.nvvm.read.ptx.sreg.envreg26">,
1912     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
1913 def int_nvvm_read_ptx_sreg_envreg27
1914   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1915               "llvm.nvvm.read.ptx.sreg.envreg27">,
1916     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
1917 def int_nvvm_read_ptx_sreg_envreg28
1918   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1919               "llvm.nvvm.read.ptx.sreg.envreg28">,
1920     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
1921 def int_nvvm_read_ptx_sreg_envreg29
1922   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1923               "llvm.nvvm.read.ptx.sreg.envreg29">,
1924     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
1925 def int_nvvm_read_ptx_sreg_envreg30
1926   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1927               "llvm.nvvm.read.ptx.sreg.envreg30">,
1928     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
1929 def int_nvvm_read_ptx_sreg_envreg31
1930   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
1931               "llvm.nvvm.read.ptx.sreg.envreg31">,
1932     ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
1933 
1934 
1935 // Texture Fetch
1936 // texmode_independent
1937 def int_nvvm_tex_1d_v4f32_s32
1938   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1939               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1940               "llvm.nvvm.tex.1d.v4f32.s32">;
1941 def int_nvvm_tex_1d_v4f32_f32
1942   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1943               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1944               "llvm.nvvm.tex.1d.v4f32.f32">;
1945 def int_nvvm_tex_1d_level_v4f32_f32
1946   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1947               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1948               "llvm.nvvm.tex.1d.level.v4f32.f32">;
1949 def int_nvvm_tex_1d_grad_v4f32_f32
1950   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1951               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1952                llvm_float_ty], [],
1953               "llvm.nvvm.tex.1d.grad.v4f32.f32">;
1954 def int_nvvm_tex_1d_v4s32_s32
1955   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1956               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1957               "llvm.nvvm.tex.1d.v4s32.s32">;
1958 def int_nvvm_tex_1d_v4s32_f32
1959   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1960               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1961               "llvm.nvvm.tex.1d.v4s32.f32">;
1962 def int_nvvm_tex_1d_level_v4s32_f32
1963   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1964               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1965               "llvm.nvvm.tex.1d.level.v4s32.f32">;
1966 def int_nvvm_tex_1d_grad_v4s32_f32
1967   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1968               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1969                llvm_float_ty], [],
1970               "llvm.nvvm.tex.1d.grad.v4s32.f32">;
1971 def int_nvvm_tex_1d_v4u32_s32
1972   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1973               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [],
1974               "llvm.nvvm.tex.1d.v4u32.s32">;
1975 def int_nvvm_tex_1d_v4u32_f32
1976   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1977               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], [],
1978               "llvm.nvvm.tex.1d.v4u32.f32">;
1979 def int_nvvm_tex_1d_level_v4u32_f32
1980   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1981               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
1982               "llvm.nvvm.tex.1d.level.v4u32.f32">;
1983 def int_nvvm_tex_1d_grad_v4u32_f32
1984   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1985               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
1986                llvm_float_ty], [],
1987               "llvm.nvvm.tex.1d.grad.v4u32.f32">;
1988 
1989 def int_nvvm_tex_1d_array_v4f32_s32
1990   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1991               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
1992               "llvm.nvvm.tex.1d.array.v4f32.s32">;
1993 def int_nvvm_tex_1d_array_v4f32_f32
1994   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1995               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
1996               "llvm.nvvm.tex.1d.array.v4f32.f32">;
1997 def int_nvvm_tex_1d_array_level_v4f32_f32
1998   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
1999               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2000                llvm_float_ty], [],
2001               "llvm.nvvm.tex.1d.array.level.v4f32.f32">;
2002 def int_nvvm_tex_1d_array_grad_v4f32_f32
2003   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2004               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2005                llvm_float_ty, llvm_float_ty], [],
2006               "llvm.nvvm.tex.1d.array.grad.v4f32.f32">;
2007 def int_nvvm_tex_1d_array_v4s32_s32
2008   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2009               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2010               "llvm.nvvm.tex.1d.array.v4s32.s32">;
2011 def int_nvvm_tex_1d_array_v4s32_f32
2012   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2013               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2014               "llvm.nvvm.tex.1d.array.v4s32.f32">;
2015 def int_nvvm_tex_1d_array_level_v4s32_f32
2016   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2017               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2018                llvm_float_ty], [],
2019               "llvm.nvvm.tex.1d.array.level.v4s32.f32">;
2020 def int_nvvm_tex_1d_array_grad_v4s32_f32
2021   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2022               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2023                llvm_float_ty, llvm_float_ty], [],
2024               "llvm.nvvm.tex.1d.array.grad.v4s32.f32">;
2025 def int_nvvm_tex_1d_array_v4u32_s32
2026   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2027               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2028               "llvm.nvvm.tex.1d.array.v4u32.s32">;
2029 def int_nvvm_tex_1d_array_v4u32_f32
2030   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2031               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2032               "llvm.nvvm.tex.1d.array.v4u32.f32">;
2033 def int_nvvm_tex_1d_array_level_v4u32_f32
2034   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2035               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2036                llvm_float_ty], [],
2037               "llvm.nvvm.tex.1d.array.level.v4u32.f32">;
2038 def int_nvvm_tex_1d_array_grad_v4u32_f32
2039   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2040               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2041                llvm_float_ty, llvm_float_ty], [],
2042               "llvm.nvvm.tex.1d.array.grad.v4u32.f32">;
2043 
2044 def int_nvvm_tex_2d_v4f32_s32
2045   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2046               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2047               "llvm.nvvm.tex.2d.v4f32.s32">;
2048 def int_nvvm_tex_2d_v4f32_f32
2049   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2050               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2051               "llvm.nvvm.tex.2d.v4f32.f32">;
2052 def int_nvvm_tex_2d_level_v4f32_f32
2053   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2054               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2055                llvm_float_ty], [],
2056               "llvm.nvvm.tex.2d.level.v4f32.f32">;
2057 def int_nvvm_tex_2d_grad_v4f32_f32
2058   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2059               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2060                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2061               "llvm.nvvm.tex.2d.grad.v4f32.f32">;
2062 def int_nvvm_tex_2d_v4s32_s32
2063   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2064               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2065               "llvm.nvvm.tex.2d.v4s32.s32">;
2066 def int_nvvm_tex_2d_v4s32_f32
2067   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2068               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2069               "llvm.nvvm.tex.2d.v4s32.f32">;
2070 def int_nvvm_tex_2d_level_v4s32_f32
2071   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2072               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2073                llvm_float_ty], [],
2074               "llvm.nvvm.tex.2d.level.v4s32.f32">;
2075 def int_nvvm_tex_2d_grad_v4s32_f32
2076   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2077               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2078                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2079               "llvm.nvvm.tex.2d.grad.v4s32.f32">;
2080 def int_nvvm_tex_2d_v4u32_s32
2081   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2082               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2083               "llvm.nvvm.tex.2d.v4u32.s32">;
2084 def int_nvvm_tex_2d_v4u32_f32
2085   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2086               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2087               "llvm.nvvm.tex.2d.v4u32.f32">;
2088 def int_nvvm_tex_2d_level_v4u32_f32
2089   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2090               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2091                llvm_float_ty], [],
2092               "llvm.nvvm.tex.2d.level.v4u32.f32">;
2093 def int_nvvm_tex_2d_grad_v4u32_f32
2094   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2095               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2096                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2097               "llvm.nvvm.tex.2d.grad.v4u32.f32">;
2098 
2099 def int_nvvm_tex_2d_array_v4f32_s32
2100   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2101               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2102                llvm_i32_ty], [],
2103               "llvm.nvvm.tex.2d.array.v4f32.s32">;
2104 def int_nvvm_tex_2d_array_v4f32_f32
2105   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2106               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2107                llvm_float_ty], [],
2108               "llvm.nvvm.tex.2d.array.v4f32.f32">;
2109 def int_nvvm_tex_2d_array_level_v4f32_f32
2110   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2111               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2112                llvm_float_ty, llvm_float_ty], [],
2113               "llvm.nvvm.tex.2d.array.level.v4f32.f32">;
2114 def int_nvvm_tex_2d_array_grad_v4f32_f32
2115   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2116               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2117                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2118                llvm_float_ty], [],
2119               "llvm.nvvm.tex.2d.array.grad.v4f32.f32">;
2120 def int_nvvm_tex_2d_array_v4s32_s32
2121   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2122               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2123                llvm_i32_ty], [],
2124               "llvm.nvvm.tex.2d.array.v4s32.s32">;
2125 def int_nvvm_tex_2d_array_v4s32_f32
2126   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2127               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2128                llvm_float_ty], [],
2129               "llvm.nvvm.tex.2d.array.v4s32.f32">;
2130 def int_nvvm_tex_2d_array_level_v4s32_f32
2131   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2132               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2133                llvm_float_ty, llvm_float_ty], [],
2134               "llvm.nvvm.tex.2d.array.level.v4s32.f32">;
2135 def int_nvvm_tex_2d_array_grad_v4s32_f32
2136   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2137               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2138                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2139                llvm_float_ty], [],
2140               "llvm.nvvm.tex.2d.array.grad.v4s32.f32">;
2141 def int_nvvm_tex_2d_array_v4u32_s32
2142   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2143               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2144                llvm_i32_ty], [],
2145               "llvm.nvvm.tex.2d.array.v4u32.s32">;
2146 def int_nvvm_tex_2d_array_v4u32_f32
2147   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2148               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2149                llvm_float_ty], [],
2150               "llvm.nvvm.tex.2d.array.v4u32.f32">;
2151 def int_nvvm_tex_2d_array_level_v4u32_f32
2152   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2153               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2154                llvm_float_ty, llvm_float_ty], [],
2155               "llvm.nvvm.tex.2d.array.level.v4u32.f32">;
2156 def int_nvvm_tex_2d_array_grad_v4u32_f32
2157   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2158               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2159                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2160                llvm_float_ty], [],
2161               "llvm.nvvm.tex.2d.array.grad.v4u32.f32">;
2162 
2163 def int_nvvm_tex_3d_v4f32_s32
2164   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2165               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2166               [], "llvm.nvvm.tex.3d.v4f32.s32">;
2167 def int_nvvm_tex_3d_v4f32_f32
2168   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2169               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2170                llvm_float_ty], [],
2171               "llvm.nvvm.tex.3d.v4f32.f32">;
2172 def int_nvvm_tex_3d_level_v4f32_f32
2173   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2174               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2175                llvm_float_ty, llvm_float_ty], [],
2176               "llvm.nvvm.tex.3d.level.v4f32.f32">;
2177 def int_nvvm_tex_3d_grad_v4f32_f32
2178   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2179               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2180                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2181                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2182               "llvm.nvvm.tex.3d.grad.v4f32.f32">;
2183 def int_nvvm_tex_3d_v4s32_s32
2184   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2185               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2186               [], "llvm.nvvm.tex.3d.v4s32.s32">;
2187 def int_nvvm_tex_3d_v4s32_f32
2188   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2189               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2190                llvm_float_ty], [],
2191               "llvm.nvvm.tex.3d.v4s32.f32">;
2192 def int_nvvm_tex_3d_level_v4s32_f32
2193   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2194               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2195                llvm_float_ty, llvm_float_ty], [],
2196               "llvm.nvvm.tex.3d.level.v4s32.f32">;
2197 def int_nvvm_tex_3d_grad_v4s32_f32
2198   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2199               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2200                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2201                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2202               "llvm.nvvm.tex.3d.grad.v4s32.f32">;
2203 def int_nvvm_tex_3d_v4u32_s32
2204   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2205               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2206               [], "llvm.nvvm.tex.3d.v4u32.s32">;
2207 def int_nvvm_tex_3d_v4u32_f32
2208   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2209               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2210                llvm_float_ty], [],
2211               "llvm.nvvm.tex.3d.v4u32.f32">;
2212 def int_nvvm_tex_3d_level_v4u32_f32
2213   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2214               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2215                llvm_float_ty, llvm_float_ty], [],
2216               "llvm.nvvm.tex.3d.level.v4u32.f32">;
2217 def int_nvvm_tex_3d_grad_v4u32_f32
2218   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2219               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2220                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2221                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2222               "llvm.nvvm.tex.3d.grad.v4u32.f32">;
2223 
2224 def int_nvvm_tex_cube_v4f32_f32
2225   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2226               [llvm_i64_ty, llvm_i64_ty,
2227                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2228               "llvm.nvvm.tex.cube.v4f32.f32">;
2229 def int_nvvm_tex_cube_level_v4f32_f32
2230   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2231               [llvm_i64_ty, llvm_i64_ty,
2232                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2233               "llvm.nvvm.tex.cube.level.v4f32.f32">;
2234 def int_nvvm_tex_cube_v4s32_f32
2235   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2236               [llvm_i64_ty, llvm_i64_ty,
2237                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2238               "llvm.nvvm.tex.cube.v4s32.f32">;
2239 def int_nvvm_tex_cube_level_v4s32_f32
2240   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2241               [llvm_i64_ty, llvm_i64_ty,
2242                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2243               "llvm.nvvm.tex.cube.level.v4s32.f32">;
2244 def int_nvvm_tex_cube_v4u32_f32
2245   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2246               [llvm_i64_ty, llvm_i64_ty,
2247                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2248               "llvm.nvvm.tex.cube.v4u32.f32">;
2249 def int_nvvm_tex_cube_level_v4u32_f32
2250   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2251               [llvm_i64_ty, llvm_i64_ty,
2252                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2253               "llvm.nvvm.tex.cube.level.v4u32.f32">;
2254 
2255 def int_nvvm_tex_cube_array_v4f32_f32
2256   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2257               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2258                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2259               "llvm.nvvm.tex.cube.array.v4f32.f32">;
2260 def int_nvvm_tex_cube_array_level_v4f32_f32
2261   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2262               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2263                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2264               "llvm.nvvm.tex.cube.array.level.v4f32.f32">;
2265 def int_nvvm_tex_cube_array_v4s32_f32
2266   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2267               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2268                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2269               "llvm.nvvm.tex.cube.array.v4s32.f32">;
2270 def int_nvvm_tex_cube_array_level_v4s32_f32
2271   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2272               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2273                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2274               "llvm.nvvm.tex.cube.array.level.v4s32.f32">;
2275 def int_nvvm_tex_cube_array_v4u32_f32
2276   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2277               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2278                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2279               "llvm.nvvm.tex.cube.array.v4u32.f32">;
2280 def int_nvvm_tex_cube_array_level_v4u32_f32
2281   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2282               [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty,
2283                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2284               "llvm.nvvm.tex.cube.array.level.v4u32.f32">;
2285 
2286 def int_nvvm_tld4_r_2d_v4f32_f32
2287   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2288               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2289               "llvm.nvvm.tld4.r.2d.v4f32.f32">;
2290 def int_nvvm_tld4_g_2d_v4f32_f32
2291   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2292               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2293               "llvm.nvvm.tld4.g.2d.v4f32.f32">;
2294 def int_nvvm_tld4_b_2d_v4f32_f32
2295   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2296               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2297               "llvm.nvvm.tld4.b.2d.v4f32.f32">;
2298 def int_nvvm_tld4_a_2d_v4f32_f32
2299   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2300               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2301               "llvm.nvvm.tld4.a.2d.v4f32.f32">;
2302 def int_nvvm_tld4_r_2d_v4s32_f32
2303   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2304               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2305               "llvm.nvvm.tld4.r.2d.v4s32.f32">;
2306 def int_nvvm_tld4_g_2d_v4s32_f32
2307   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2308               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2309               "llvm.nvvm.tld4.g.2d.v4s32.f32">;
2310 def int_nvvm_tld4_b_2d_v4s32_f32
2311   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2312               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2313               "llvm.nvvm.tld4.b.2d.v4s32.f32">;
2314 def int_nvvm_tld4_a_2d_v4s32_f32
2315   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2316               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2317               "llvm.nvvm.tld4.a.2d.v4s32.f32">;
2318 def int_nvvm_tld4_r_2d_v4u32_f32
2319   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2320               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2321               "llvm.nvvm.tld4.r.2d.v4u32.f32">;
2322 def int_nvvm_tld4_g_2d_v4u32_f32
2323   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2324               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2325               "llvm.nvvm.tld4.g.2d.v4u32.f32">;
2326 def int_nvvm_tld4_b_2d_v4u32_f32
2327   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2328               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2329               "llvm.nvvm.tld4.b.2d.v4u32.f32">;
2330 def int_nvvm_tld4_a_2d_v4u32_f32
2331   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2332               [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2333               "llvm.nvvm.tld4.a.2d.v4u32.f32">;
2334 
2335 
2336 // texmode_unified
2337 def int_nvvm_tex_unified_1d_v4f32_s32
2338   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2339               [llvm_i64_ty, llvm_i32_ty], [],
2340               "llvm.nvvm.tex.unified.1d.v4f32.s32">;
2341 def int_nvvm_tex_unified_1d_v4f32_f32
2342   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2343               [llvm_i64_ty, llvm_float_ty], [],
2344               "llvm.nvvm.tex.unified.1d.v4f32.f32">;
2345 def int_nvvm_tex_unified_1d_level_v4f32_f32
2346   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2347               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2348               "llvm.nvvm.tex.unified.1d.level.v4f32.f32">;
2349 def int_nvvm_tex_unified_1d_grad_v4f32_f32
2350   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2351               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2352                llvm_float_ty], [],
2353               "llvm.nvvm.tex.unified.1d.grad.v4f32.f32">;
2354 def int_nvvm_tex_unified_1d_v4s32_s32
2355   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2356               [llvm_i64_ty, llvm_i32_ty], [],
2357               "llvm.nvvm.tex.unified.1d.v4s32.s32">;
2358 def int_nvvm_tex_unified_1d_v4s32_f32
2359   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2360               [llvm_i64_ty, llvm_float_ty], [],
2361               "llvm.nvvm.tex.unified.1d.v4s32.f32">;
2362 def int_nvvm_tex_unified_1d_level_v4s32_f32
2363   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2364               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2365               "llvm.nvvm.tex.unified.1d.level.v4s32.f32">;
2366 def int_nvvm_tex_unified_1d_grad_v4s32_f32
2367   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2368               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2369                llvm_float_ty], [],
2370               "llvm.nvvm.tex.unified.1d.grad.v4s32.f32">;
2371 def int_nvvm_tex_unified_1d_v4u32_s32
2372   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2373               [llvm_i64_ty, llvm_i32_ty], [],
2374               "llvm.nvvm.tex.unified.1d.v4u32.s32">;
2375 def int_nvvm_tex_unified_1d_v4u32_f32
2376   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2377               [llvm_i64_ty, llvm_float_ty], [],
2378               "llvm.nvvm.tex.unified.1d.v4u32.f32">;
2379 def int_nvvm_tex_unified_1d_level_v4u32_f32
2380   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2381               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2382               "llvm.nvvm.tex.unified.1d.level.v4u32.f32">;
2383 def int_nvvm_tex_unified_1d_grad_v4u32_f32
2384   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2385               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2386                llvm_float_ty], [],
2387               "llvm.nvvm.tex.unified.1d.grad.v4u32.f32">;
2388 
2389 def int_nvvm_tex_unified_1d_array_v4f32_s32
2390   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2391               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2392               "llvm.nvvm.tex.unified.1d.array.v4f32.s32">;
2393 def int_nvvm_tex_unified_1d_array_v4f32_f32
2394   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2395               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2396               "llvm.nvvm.tex.unified.1d.array.v4f32.f32">;
2397 def int_nvvm_tex_unified_1d_array_level_v4f32_f32
2398   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2399               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2400                llvm_float_ty], [],
2401               "llvm.nvvm.tex.unified.1d.array.level.v4f32.f32">;
2402 def int_nvvm_tex_unified_1d_array_grad_v4f32_f32
2403   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2404               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2405                llvm_float_ty, llvm_float_ty], [],
2406               "llvm.nvvm.tex.unified.1d.array.grad.v4f32.f32">;
2407 def int_nvvm_tex_unified_1d_array_v4s32_s32
2408   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2409               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2410               "llvm.nvvm.tex.unified.1d.array.v4s32.s32">;
2411 def int_nvvm_tex_unified_1d_array_v4s32_f32
2412   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2413               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2414               "llvm.nvvm.tex.unified.1d.array.v4s32.f32">;
2415 def int_nvvm_tex_unified_1d_array_level_v4s32_f32
2416   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2417               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2418                llvm_float_ty], [],
2419               "llvm.nvvm.tex.unified.1d.array.level.v4s32.f32">;
2420 def int_nvvm_tex_unified_1d_array_grad_v4s32_f32
2421   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2422               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2423                llvm_float_ty, llvm_float_ty], [],
2424               "llvm.nvvm.tex.unified.1d.array.grad.v4s32.f32">;
2425 def int_nvvm_tex_unified_1d_array_v4u32_s32
2426   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2427               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2428               "llvm.nvvm.tex.unified.1d.array.v4u32.s32">;
2429 def int_nvvm_tex_unified_1d_array_v4u32_f32
2430   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2431               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], [],
2432               "llvm.nvvm.tex.unified.1d.array.v4u32.f32">;
2433 def int_nvvm_tex_unified_1d_array_level_v4u32_f32
2434   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2435               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2436                llvm_float_ty], [],
2437               "llvm.nvvm.tex.unified.1d.array.level.v4u32.f32">;
2438 def int_nvvm_tex_unified_1d_array_grad_v4u32_f32
2439   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2440               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2441                llvm_float_ty, llvm_float_ty], [],
2442               "llvm.nvvm.tex.unified.1d.array.grad.v4u32.f32">;
2443 
2444 def int_nvvm_tex_unified_2d_v4f32_s32
2445   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2446               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2447               "llvm.nvvm.tex.unified.2d.v4f32.s32">;
2448 def int_nvvm_tex_unified_2d_v4f32_f32
2449   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2450               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2451               "llvm.nvvm.tex.unified.2d.v4f32.f32">;
2452 def int_nvvm_tex_unified_2d_level_v4f32_f32
2453   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2454               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2455                llvm_float_ty], [],
2456               "llvm.nvvm.tex.unified.2d.level.v4f32.f32">;
2457 def int_nvvm_tex_unified_2d_grad_v4f32_f32
2458   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2459               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2460                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2461               "llvm.nvvm.tex.unified.2d.grad.v4f32.f32">;
2462 def int_nvvm_tex_unified_2d_v4s32_s32
2463   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2464               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2465               "llvm.nvvm.tex.unified.2d.v4s32.s32">;
2466 def int_nvvm_tex_unified_2d_v4s32_f32
2467   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2468               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2469               "llvm.nvvm.tex.unified.2d.v4s32.f32">;
2470 def int_nvvm_tex_unified_2d_level_v4s32_f32
2471   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2472               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2473                llvm_float_ty], [],
2474               "llvm.nvvm.tex.unified.2d.level.v4s32.f32">;
2475 def int_nvvm_tex_unified_2d_grad_v4s32_f32
2476   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2477               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2478                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2479               "llvm.nvvm.tex.unified.2d.grad.v4s32.f32">;
2480 def int_nvvm_tex_unified_2d_v4u32_s32
2481   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2482               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2483               "llvm.nvvm.tex.unified.2d.v4u32.s32">;
2484 def int_nvvm_tex_unified_2d_v4u32_f32
2485   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2486               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2487               "llvm.nvvm.tex.unified.2d.v4u32.f32">;
2488 def int_nvvm_tex_unified_2d_level_v4u32_f32
2489   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2490               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2491                llvm_float_ty], [],
2492               "llvm.nvvm.tex.unified.2d.level.v4u32.f32">;
2493 def int_nvvm_tex_unified_2d_grad_v4u32_f32
2494   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2495               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2496                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2497               "llvm.nvvm.tex.unified.2d.grad.v4u32.f32">;
2498 
2499 def int_nvvm_tex_unified_2d_array_v4f32_s32
2500   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2501               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2502                llvm_i32_ty], [],
2503               "llvm.nvvm.tex.unified.2d.array.v4f32.s32">;
2504 def int_nvvm_tex_unified_2d_array_v4f32_f32
2505   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2506               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2507                llvm_float_ty], [],
2508               "llvm.nvvm.tex.unified.2d.array.v4f32.f32">;
2509 def int_nvvm_tex_unified_2d_array_level_v4f32_f32
2510   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2511               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2512                llvm_float_ty, llvm_float_ty], [],
2513               "llvm.nvvm.tex.unified.2d.array.level.v4f32.f32">;
2514 def int_nvvm_tex_unified_2d_array_grad_v4f32_f32
2515   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2516               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2517                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2518                llvm_float_ty], [],
2519               "llvm.nvvm.tex.unified.2d.array.grad.v4f32.f32">;
2520 def int_nvvm_tex_unified_2d_array_v4s32_s32
2521   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2522               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2523                llvm_i32_ty], [],
2524               "llvm.nvvm.tex.unified.2d.array.v4s32.s32">;
2525 def int_nvvm_tex_unified_2d_array_v4s32_f32
2526   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2527               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2528                llvm_float_ty], [],
2529               "llvm.nvvm.tex.unified.2d.array.v4s32.f32">;
2530 def int_nvvm_tex_unified_2d_array_level_v4s32_f32
2531   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2532               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2533                llvm_float_ty, llvm_float_ty], [],
2534               "llvm.nvvm.tex.unified.2d.array.level.v4s32.f32">;
2535 def int_nvvm_tex_unified_2d_array_grad_v4s32_f32
2536   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2537               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2538                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2539                llvm_float_ty], [],
2540               "llvm.nvvm.tex.unified.2d.array.grad.v4s32.f32">;
2541 def int_nvvm_tex_unified_2d_array_v4u32_s32
2542   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2543               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
2544                llvm_i32_ty], [],
2545               "llvm.nvvm.tex.unified.2d.array.v4u32.s32">;
2546 def int_nvvm_tex_unified_2d_array_v4u32_f32
2547   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2548               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2549                llvm_float_ty], [],
2550               "llvm.nvvm.tex.unified.2d.array.v4u32.f32">;
2551 def int_nvvm_tex_unified_2d_array_level_v4u32_f32
2552   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2553               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2554                llvm_float_ty, llvm_float_ty], [],
2555               "llvm.nvvm.tex.unified.2d.array.level.v4u32.f32">;
2556 def int_nvvm_tex_unified_2d_array_grad_v4u32_f32
2557   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2558               [llvm_i64_ty, llvm_i32_ty, llvm_float_ty,
2559                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2560                llvm_float_ty], [],
2561               "llvm.nvvm.tex.unified.2d.array.grad.v4u32.f32">;
2562 
2563 def int_nvvm_tex_unified_3d_v4f32_s32
2564   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2565               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2566               [], "llvm.nvvm.tex.unified.3d.v4f32.s32">;
2567 def int_nvvm_tex_unified_3d_v4f32_f32
2568   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2569               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2570                llvm_float_ty], [],
2571               "llvm.nvvm.tex.unified.3d.v4f32.f32">;
2572 def int_nvvm_tex_unified_3d_level_v4f32_f32
2573   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2574               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2575                llvm_float_ty, llvm_float_ty], [],
2576               "llvm.nvvm.tex.unified.3d.level.v4f32.f32">;
2577 def int_nvvm_tex_unified_3d_grad_v4f32_f32
2578   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2579               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2580                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2581                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2582               "llvm.nvvm.tex.unified.3d.grad.v4f32.f32">;
2583 def int_nvvm_tex_unified_3d_v4s32_s32
2584   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2585               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2586               [], "llvm.nvvm.tex.unified.3d.v4s32.s32">;
2587 def int_nvvm_tex_unified_3d_v4s32_f32
2588   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2589               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2590                llvm_float_ty], [],
2591               "llvm.nvvm.tex.unified.3d.v4s32.f32">;
2592 def int_nvvm_tex_unified_3d_level_v4s32_f32
2593   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2594               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2595                llvm_float_ty, llvm_float_ty], [],
2596               "llvm.nvvm.tex.unified.3d.level.v4s32.f32">;
2597 def int_nvvm_tex_unified_3d_grad_v4s32_f32
2598   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2599               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2600                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2601                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2602               "llvm.nvvm.tex.unified.3d.grad.v4s32.f32">;
2603 def int_nvvm_tex_unified_3d_v4u32_s32
2604   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2605               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2606               [], "llvm.nvvm.tex.unified.3d.v4u32.s32">;
2607 def int_nvvm_tex_unified_3d_v4u32_f32
2608   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2609               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2610                llvm_float_ty], [],
2611               "llvm.nvvm.tex.unified.3d.v4u32.f32">;
2612 def int_nvvm_tex_unified_3d_level_v4u32_f32
2613   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2614               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2615                llvm_float_ty, llvm_float_ty], [],
2616               "llvm.nvvm.tex.unified.3d.level.v4u32.f32">;
2617 def int_nvvm_tex_unified_3d_grad_v4u32_f32
2618   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2619               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2620                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2621                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2622               "llvm.nvvm.tex.unified.3d.grad.v4u32.f32">;
2623 
2624 def int_nvvm_tex_unified_cube_v4f32_f32
2625   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2626               [llvm_i64_ty,
2627                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2628               "llvm.nvvm.tex.unified.cube.v4f32.f32">;
2629 def int_nvvm_tex_unified_cube_level_v4f32_f32
2630   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2631               [llvm_i64_ty,
2632                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2633               "llvm.nvvm.tex.unified.cube.level.v4f32.f32">;
2634 def int_nvvm_tex_unified_cube_v4s32_f32
2635   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2636               [llvm_i64_ty,
2637                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2638               "llvm.nvvm.tex.unified.cube.v4s32.f32">;
2639 def int_nvvm_tex_unified_cube_level_v4s32_f32
2640   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2641               [llvm_i64_ty,
2642                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2643               "llvm.nvvm.tex.unified.cube.level.v4s32.f32">;
2644 def int_nvvm_tex_unified_cube_v4u32_f32
2645   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2646               [llvm_i64_ty,
2647                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2648               "llvm.nvvm.tex.unified.cube.v4u32.f32">;
2649 def int_nvvm_tex_unified_cube_level_v4u32_f32
2650   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2651               [llvm_i64_ty,
2652                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2653               "llvm.nvvm.tex.unified.cube.level.v4u32.f32">;
2654 
2655 def int_nvvm_tex_unified_cube_array_v4f32_f32
2656   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2657               [llvm_i64_ty, llvm_i32_ty,
2658                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2659               "llvm.nvvm.tex.unified.cube.array.v4f32.f32">;
2660 def int_nvvm_tex_unified_cube_array_level_v4f32_f32
2661   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2662               [llvm_i64_ty, llvm_i32_ty,
2663                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2664               "llvm.nvvm.tex.unified.cube.array.level.v4f32.f32">;
2665 def int_nvvm_tex_unified_cube_array_v4s32_f32
2666   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2667               [llvm_i64_ty, llvm_i32_ty,
2668                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2669               "llvm.nvvm.tex.unified.cube.array.v4s32.f32">;
2670 def int_nvvm_tex_unified_cube_array_level_v4s32_f32
2671   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2672               [llvm_i64_ty, llvm_i32_ty,
2673                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2674               "llvm.nvvm.tex.unified.cube.array.level.v4s32.f32">;
2675 def int_nvvm_tex_unified_cube_array_v4u32_f32
2676   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2677               [llvm_i64_ty, llvm_i32_ty,
2678                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2679               "llvm.nvvm.tex.unified.cube.array.v4u32.f32">;
2680 def int_nvvm_tex_unified_cube_array_level_v4u32_f32
2681   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2682               [llvm_i64_ty, llvm_i32_ty,
2683                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2684               "llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
2685 
2686 def int_nvvm_tex_unified_cube_grad_v4f32_f32
2687   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2688               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2689                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2690                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2691               "llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
2692 def int_nvvm_tex_unified_cube_grad_v4s32_f32
2693   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2694               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2695                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2696                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2697               "llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
2698 def int_nvvm_tex_unified_cube_grad_v4u32_f32
2699   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2700               [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
2701                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2702                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2703               "llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
2704 
2705 def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
2706   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2707               [llvm_i64_ty, llvm_i32_ty,
2708               llvm_float_ty, llvm_float_ty,
2709                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2710                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2711               "llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
2712 def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
2713   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2714               [llvm_i64_ty, llvm_i32_ty,
2715               llvm_float_ty, llvm_float_ty,
2716                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2717                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2718               "llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
2719 def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
2720   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2721               [llvm_i64_ty, llvm_i32_ty,
2722               llvm_float_ty, llvm_float_ty,
2723                llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
2724                llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
2725               "llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
2726 
2727 def int_nvvm_tld4_unified_r_2d_v4f32_f32
2728   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2729               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2730               "llvm.nvvm.tld4.unified.r.2d.v4f32.f32">;
2731 def int_nvvm_tld4_unified_g_2d_v4f32_f32
2732   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2733               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2734               "llvm.nvvm.tld4.unified.g.2d.v4f32.f32">;
2735 def int_nvvm_tld4_unified_b_2d_v4f32_f32
2736   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2737               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2738               "llvm.nvvm.tld4.unified.b.2d.v4f32.f32">;
2739 def int_nvvm_tld4_unified_a_2d_v4f32_f32
2740   : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
2741               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2742               "llvm.nvvm.tld4.unified.a.2d.v4f32.f32">;
2743 def int_nvvm_tld4_unified_r_2d_v4s32_f32
2744   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2745               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2746               "llvm.nvvm.tld4.unified.r.2d.v4s32.f32">;
2747 def int_nvvm_tld4_unified_g_2d_v4s32_f32
2748   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2749               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2750               "llvm.nvvm.tld4.unified.g.2d.v4s32.f32">;
2751 def int_nvvm_tld4_unified_b_2d_v4s32_f32
2752   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2753               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2754               "llvm.nvvm.tld4.unified.b.2d.v4s32.f32">;
2755 def int_nvvm_tld4_unified_a_2d_v4s32_f32
2756   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2757               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2758               "llvm.nvvm.tld4.unified.a.2d.v4s32.f32">;
2759 def int_nvvm_tld4_unified_r_2d_v4u32_f32
2760   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2761               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2762               "llvm.nvvm.tld4.unified.r.2d.v4u32.f32">;
2763 def int_nvvm_tld4_unified_g_2d_v4u32_f32
2764   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2765               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2766               "llvm.nvvm.tld4.unified.g.2d.v4u32.f32">;
2767 def int_nvvm_tld4_unified_b_2d_v4u32_f32
2768   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2769               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2770               "llvm.nvvm.tld4.unified.b.2d.v4u32.f32">;
2771 def int_nvvm_tld4_unified_a_2d_v4u32_f32
2772   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2773               [llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
2774               "llvm.nvvm.tld4.unified.a.2d.v4u32.f32">;
2775 
2776 
2777 //=== Surface Load
2778 // .clamp variants
2779 def int_nvvm_suld_1d_i8_clamp
2780   : Intrinsic<[llvm_i16_ty],
2781               [llvm_i64_ty, llvm_i32_ty], [],
2782               "llvm.nvvm.suld.1d.i8.clamp">;
2783 def int_nvvm_suld_1d_i16_clamp
2784   : Intrinsic<[llvm_i16_ty],
2785               [llvm_i64_ty, llvm_i32_ty], [],
2786               "llvm.nvvm.suld.1d.i16.clamp">;
2787 def int_nvvm_suld_1d_i32_clamp
2788   : Intrinsic<[llvm_i32_ty],
2789               [llvm_i64_ty, llvm_i32_ty], [],
2790               "llvm.nvvm.suld.1d.i32.clamp">;
2791 def int_nvvm_suld_1d_i64_clamp
2792   : Intrinsic<[llvm_i64_ty],
2793               [llvm_i64_ty, llvm_i32_ty], [],
2794               "llvm.nvvm.suld.1d.i64.clamp">;
2795 def int_nvvm_suld_1d_v2i8_clamp
2796   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2797               [llvm_i64_ty, llvm_i32_ty], [],
2798               "llvm.nvvm.suld.1d.v2i8.clamp">;
2799 def int_nvvm_suld_1d_v2i16_clamp
2800   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2801               [llvm_i64_ty, llvm_i32_ty], [],
2802               "llvm.nvvm.suld.1d.v2i16.clamp">;
2803 def int_nvvm_suld_1d_v2i32_clamp
2804   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2805               [llvm_i64_ty, llvm_i32_ty], [],
2806               "llvm.nvvm.suld.1d.v2i32.clamp">;
2807 def int_nvvm_suld_1d_v2i64_clamp
2808   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2809               [llvm_i64_ty, llvm_i32_ty], [],
2810               "llvm.nvvm.suld.1d.v2i64.clamp">;
2811 def int_nvvm_suld_1d_v4i8_clamp
2812   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2813               [llvm_i64_ty, llvm_i32_ty], [],
2814               "llvm.nvvm.suld.1d.v4i8.clamp">;
2815 def int_nvvm_suld_1d_v4i16_clamp
2816   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2817               [llvm_i64_ty, llvm_i32_ty], [],
2818               "llvm.nvvm.suld.1d.v4i16.clamp">;
2819 def int_nvvm_suld_1d_v4i32_clamp
2820   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2821               [llvm_i64_ty, llvm_i32_ty], [],
2822               "llvm.nvvm.suld.1d.v4i32.clamp">;
2823 
2824 def int_nvvm_suld_1d_array_i8_clamp
2825   : Intrinsic<[llvm_i16_ty],
2826               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2827               "llvm.nvvm.suld.1d.array.i8.clamp">;
2828 def int_nvvm_suld_1d_array_i16_clamp
2829   : Intrinsic<[llvm_i16_ty],
2830               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2831               "llvm.nvvm.suld.1d.array.i16.clamp">;
2832 def int_nvvm_suld_1d_array_i32_clamp
2833   : Intrinsic<[llvm_i32_ty],
2834               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2835               "llvm.nvvm.suld.1d.array.i32.clamp">;
2836 def int_nvvm_suld_1d_array_i64_clamp
2837   : Intrinsic<[llvm_i64_ty],
2838               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2839               "llvm.nvvm.suld.1d.array.i64.clamp">;
2840 def int_nvvm_suld_1d_array_v2i8_clamp
2841   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2842               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2843               "llvm.nvvm.suld.1d.array.v2i8.clamp">;
2844 def int_nvvm_suld_1d_array_v2i16_clamp
2845   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2846               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2847               "llvm.nvvm.suld.1d.array.v2i16.clamp">;
2848 def int_nvvm_suld_1d_array_v2i32_clamp
2849   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2850               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2851               "llvm.nvvm.suld.1d.array.v2i32.clamp">;
2852 def int_nvvm_suld_1d_array_v2i64_clamp
2853   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2854               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2855               "llvm.nvvm.suld.1d.array.v2i64.clamp">;
2856 def int_nvvm_suld_1d_array_v4i8_clamp
2857   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2858               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2859               "llvm.nvvm.suld.1d.array.v4i8.clamp">;
2860 def int_nvvm_suld_1d_array_v4i16_clamp
2861   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2862               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2863               "llvm.nvvm.suld.1d.array.v4i16.clamp">;
2864 def int_nvvm_suld_1d_array_v4i32_clamp
2865   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2866               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2867               "llvm.nvvm.suld.1d.array.v4i32.clamp">;
2868 
2869 def int_nvvm_suld_2d_i8_clamp
2870   : Intrinsic<[llvm_i16_ty],
2871               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2872               "llvm.nvvm.suld.2d.i8.clamp">;
2873 def int_nvvm_suld_2d_i16_clamp
2874   : Intrinsic<[llvm_i16_ty],
2875               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2876               "llvm.nvvm.suld.2d.i16.clamp">;
2877 def int_nvvm_suld_2d_i32_clamp
2878   : Intrinsic<[llvm_i32_ty],
2879               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2880               "llvm.nvvm.suld.2d.i32.clamp">;
2881 def int_nvvm_suld_2d_i64_clamp
2882   : Intrinsic<[llvm_i64_ty],
2883               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2884               "llvm.nvvm.suld.2d.i64.clamp">;
2885 def int_nvvm_suld_2d_v2i8_clamp
2886   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2887               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2888               "llvm.nvvm.suld.2d.v2i8.clamp">;
2889 def int_nvvm_suld_2d_v2i16_clamp
2890   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2891               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2892               "llvm.nvvm.suld.2d.v2i16.clamp">;
2893 def int_nvvm_suld_2d_v2i32_clamp
2894   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2895               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2896               "llvm.nvvm.suld.2d.v2i32.clamp">;
2897 def int_nvvm_suld_2d_v2i64_clamp
2898   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2899               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2900               "llvm.nvvm.suld.2d.v2i64.clamp">;
2901 def int_nvvm_suld_2d_v4i8_clamp
2902   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2903               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2904               "llvm.nvvm.suld.2d.v4i8.clamp">;
2905 def int_nvvm_suld_2d_v4i16_clamp
2906   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2907               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2908               "llvm.nvvm.suld.2d.v4i16.clamp">;
2909 def int_nvvm_suld_2d_v4i32_clamp
2910   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2911               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
2912               "llvm.nvvm.suld.2d.v4i32.clamp">;
2913 
2914 def int_nvvm_suld_2d_array_i8_clamp
2915   : Intrinsic<[llvm_i16_ty],
2916               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2917               "llvm.nvvm.suld.2d.array.i8.clamp">;
2918 def int_nvvm_suld_2d_array_i16_clamp
2919   : Intrinsic<[llvm_i16_ty],
2920               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2921               "llvm.nvvm.suld.2d.array.i16.clamp">;
2922 def int_nvvm_suld_2d_array_i32_clamp
2923   : Intrinsic<[llvm_i32_ty],
2924               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2925               "llvm.nvvm.suld.2d.array.i32.clamp">;
2926 def int_nvvm_suld_2d_array_i64_clamp
2927   : Intrinsic<[llvm_i64_ty],
2928               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2929               "llvm.nvvm.suld.2d.array.i64.clamp">;
2930 def int_nvvm_suld_2d_array_v2i8_clamp
2931   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2932               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2933               "llvm.nvvm.suld.2d.array.v2i8.clamp">;
2934 def int_nvvm_suld_2d_array_v2i16_clamp
2935   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2936               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2937               "llvm.nvvm.suld.2d.array.v2i16.clamp">;
2938 def int_nvvm_suld_2d_array_v2i32_clamp
2939   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2940               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2941               "llvm.nvvm.suld.2d.array.v2i32.clamp">;
2942 def int_nvvm_suld_2d_array_v2i64_clamp
2943   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2944               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2945               "llvm.nvvm.suld.2d.array.v2i64.clamp">;
2946 def int_nvvm_suld_2d_array_v4i8_clamp
2947   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2948               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2949               "llvm.nvvm.suld.2d.array.v4i8.clamp">;
2950 def int_nvvm_suld_2d_array_v4i16_clamp
2951   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2952               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2953               "llvm.nvvm.suld.2d.array.v4i16.clamp">;
2954 def int_nvvm_suld_2d_array_v4i32_clamp
2955   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2956               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2957               "llvm.nvvm.suld.2d.array.v4i32.clamp">;
2958 
2959 def int_nvvm_suld_3d_i8_clamp
2960   : Intrinsic<[llvm_i16_ty],
2961               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2962               "llvm.nvvm.suld.3d.i8.clamp">;
2963 def int_nvvm_suld_3d_i16_clamp
2964   : Intrinsic<[llvm_i16_ty],
2965               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2966               "llvm.nvvm.suld.3d.i16.clamp">;
2967 def int_nvvm_suld_3d_i32_clamp
2968   : Intrinsic<[llvm_i32_ty],
2969               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2970               "llvm.nvvm.suld.3d.i32.clamp">;
2971 def int_nvvm_suld_3d_i64_clamp
2972   : Intrinsic<[llvm_i64_ty],
2973               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2974               "llvm.nvvm.suld.3d.i64.clamp">;
2975 def int_nvvm_suld_3d_v2i8_clamp
2976   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2977               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2978               "llvm.nvvm.suld.3d.v2i8.clamp">;
2979 def int_nvvm_suld_3d_v2i16_clamp
2980   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
2981               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2982               "llvm.nvvm.suld.3d.v2i16.clamp">;
2983 def int_nvvm_suld_3d_v2i32_clamp
2984   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
2985               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2986               "llvm.nvvm.suld.3d.v2i32.clamp">;
2987 def int_nvvm_suld_3d_v2i64_clamp
2988   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
2989               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2990               "llvm.nvvm.suld.3d.v2i64.clamp">;
2991 def int_nvvm_suld_3d_v4i8_clamp
2992   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2993               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2994               "llvm.nvvm.suld.3d.v4i8.clamp">;
2995 def int_nvvm_suld_3d_v4i16_clamp
2996   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
2997               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
2998               "llvm.nvvm.suld.3d.v4i16.clamp">;
2999 def int_nvvm_suld_3d_v4i32_clamp
3000   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3001               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3002               "llvm.nvvm.suld.3d.v4i32.clamp">;
3003 
3004 // .trap variants
3005 def int_nvvm_suld_1d_i8_trap
3006   : Intrinsic<[llvm_i16_ty],
3007               [llvm_i64_ty, llvm_i32_ty], [],
3008               "llvm.nvvm.suld.1d.i8.trap">;
3009 def int_nvvm_suld_1d_i16_trap
3010   : Intrinsic<[llvm_i16_ty],
3011               [llvm_i64_ty, llvm_i32_ty], [],
3012               "llvm.nvvm.suld.1d.i16.trap">;
3013 def int_nvvm_suld_1d_i32_trap
3014   : Intrinsic<[llvm_i32_ty],
3015               [llvm_i64_ty, llvm_i32_ty], [],
3016               "llvm.nvvm.suld.1d.i32.trap">;
3017 def int_nvvm_suld_1d_i64_trap
3018   : Intrinsic<[llvm_i64_ty],
3019               [llvm_i64_ty, llvm_i32_ty], [],
3020               "llvm.nvvm.suld.1d.i64.trap">;
3021 def int_nvvm_suld_1d_v2i8_trap
3022   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3023               [llvm_i64_ty, llvm_i32_ty], [],
3024               "llvm.nvvm.suld.1d.v2i8.trap">;
3025 def int_nvvm_suld_1d_v2i16_trap
3026   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3027               [llvm_i64_ty, llvm_i32_ty], [],
3028               "llvm.nvvm.suld.1d.v2i16.trap">;
3029 def int_nvvm_suld_1d_v2i32_trap
3030   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3031               [llvm_i64_ty, llvm_i32_ty], [],
3032               "llvm.nvvm.suld.1d.v2i32.trap">;
3033 def int_nvvm_suld_1d_v2i64_trap
3034   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3035               [llvm_i64_ty, llvm_i32_ty], [],
3036               "llvm.nvvm.suld.1d.v2i64.trap">;
3037 def int_nvvm_suld_1d_v4i8_trap
3038   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3039               [llvm_i64_ty, llvm_i32_ty], [],
3040               "llvm.nvvm.suld.1d.v4i8.trap">;
3041 def int_nvvm_suld_1d_v4i16_trap
3042   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3043               [llvm_i64_ty, llvm_i32_ty], [],
3044               "llvm.nvvm.suld.1d.v4i16.trap">;
3045 def int_nvvm_suld_1d_v4i32_trap
3046   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3047               [llvm_i64_ty, llvm_i32_ty], [],
3048               "llvm.nvvm.suld.1d.v4i32.trap">;
3049 
3050 def int_nvvm_suld_1d_array_i8_trap
3051   : Intrinsic<[llvm_i16_ty],
3052               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3053               "llvm.nvvm.suld.1d.array.i8.trap">;
3054 def int_nvvm_suld_1d_array_i16_trap
3055   : Intrinsic<[llvm_i16_ty],
3056               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3057               "llvm.nvvm.suld.1d.array.i16.trap">;
3058 def int_nvvm_suld_1d_array_i32_trap
3059   : Intrinsic<[llvm_i32_ty],
3060               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3061               "llvm.nvvm.suld.1d.array.i32.trap">;
3062 def int_nvvm_suld_1d_array_i64_trap
3063   : Intrinsic<[llvm_i64_ty],
3064               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3065               "llvm.nvvm.suld.1d.array.i64.trap">;
3066 def int_nvvm_suld_1d_array_v2i8_trap
3067   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3068               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3069               "llvm.nvvm.suld.1d.array.v2i8.trap">;
3070 def int_nvvm_suld_1d_array_v2i16_trap
3071   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3072               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3073               "llvm.nvvm.suld.1d.array.v2i16.trap">;
3074 def int_nvvm_suld_1d_array_v2i32_trap
3075   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3076               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3077               "llvm.nvvm.suld.1d.array.v2i32.trap">;
3078 def int_nvvm_suld_1d_array_v2i64_trap
3079   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3080               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3081               "llvm.nvvm.suld.1d.array.v2i64.trap">;
3082 def int_nvvm_suld_1d_array_v4i8_trap
3083   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3084               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3085               "llvm.nvvm.suld.1d.array.v4i8.trap">;
3086 def int_nvvm_suld_1d_array_v4i16_trap
3087   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3088               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3089               "llvm.nvvm.suld.1d.array.v4i16.trap">;
3090 def int_nvvm_suld_1d_array_v4i32_trap
3091   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3092               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3093               "llvm.nvvm.suld.1d.array.v4i32.trap">;
3094 
3095 def int_nvvm_suld_2d_i8_trap
3096   : Intrinsic<[llvm_i16_ty],
3097               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3098               "llvm.nvvm.suld.2d.i8.trap">;
3099 def int_nvvm_suld_2d_i16_trap
3100   : Intrinsic<[llvm_i16_ty],
3101               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3102               "llvm.nvvm.suld.2d.i16.trap">;
3103 def int_nvvm_suld_2d_i32_trap
3104   : Intrinsic<[llvm_i32_ty],
3105               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3106               "llvm.nvvm.suld.2d.i32.trap">;
3107 def int_nvvm_suld_2d_i64_trap
3108   : Intrinsic<[llvm_i64_ty],
3109               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3110               "llvm.nvvm.suld.2d.i64.trap">;
3111 def int_nvvm_suld_2d_v2i8_trap
3112   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3113               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3114               "llvm.nvvm.suld.2d.v2i8.trap">;
3115 def int_nvvm_suld_2d_v2i16_trap
3116   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3117               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3118               "llvm.nvvm.suld.2d.v2i16.trap">;
3119 def int_nvvm_suld_2d_v2i32_trap
3120   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3121               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3122               "llvm.nvvm.suld.2d.v2i32.trap">;
3123 def int_nvvm_suld_2d_v2i64_trap
3124   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3125               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3126               "llvm.nvvm.suld.2d.v2i64.trap">;
3127 def int_nvvm_suld_2d_v4i8_trap
3128   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3129               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3130               "llvm.nvvm.suld.2d.v4i8.trap">;
3131 def int_nvvm_suld_2d_v4i16_trap
3132   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3133               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3134               "llvm.nvvm.suld.2d.v4i16.trap">;
3135 def int_nvvm_suld_2d_v4i32_trap
3136   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3137               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3138               "llvm.nvvm.suld.2d.v4i32.trap">;
3139 
3140 def int_nvvm_suld_2d_array_i8_trap
3141   : Intrinsic<[llvm_i16_ty],
3142               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3143               "llvm.nvvm.suld.2d.array.i8.trap">;
3144 def int_nvvm_suld_2d_array_i16_trap
3145   : Intrinsic<[llvm_i16_ty],
3146               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3147               "llvm.nvvm.suld.2d.array.i16.trap">;
3148 def int_nvvm_suld_2d_array_i32_trap
3149   : Intrinsic<[llvm_i32_ty],
3150               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3151               "llvm.nvvm.suld.2d.array.i32.trap">;
3152 def int_nvvm_suld_2d_array_i64_trap
3153   : Intrinsic<[llvm_i64_ty],
3154               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3155               "llvm.nvvm.suld.2d.array.i64.trap">;
3156 def int_nvvm_suld_2d_array_v2i8_trap
3157   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3158               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3159               "llvm.nvvm.suld.2d.array.v2i8.trap">;
3160 def int_nvvm_suld_2d_array_v2i16_trap
3161   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3162               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3163               "llvm.nvvm.suld.2d.array.v2i16.trap">;
3164 def int_nvvm_suld_2d_array_v2i32_trap
3165   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3166               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3167               "llvm.nvvm.suld.2d.array.v2i32.trap">;
3168 def int_nvvm_suld_2d_array_v2i64_trap
3169   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3170               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3171               "llvm.nvvm.suld.2d.array.v2i64.trap">;
3172 def int_nvvm_suld_2d_array_v4i8_trap
3173   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3174               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3175               "llvm.nvvm.suld.2d.array.v4i8.trap">;
3176 def int_nvvm_suld_2d_array_v4i16_trap
3177   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3178               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3179               "llvm.nvvm.suld.2d.array.v4i16.trap">;
3180 def int_nvvm_suld_2d_array_v4i32_trap
3181   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3182               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3183               "llvm.nvvm.suld.2d.array.v4i32.trap">;
3184 
3185 def int_nvvm_suld_3d_i8_trap
3186   : Intrinsic<[llvm_i16_ty],
3187               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3188               "llvm.nvvm.suld.3d.i8.trap">;
3189 def int_nvvm_suld_3d_i16_trap
3190   : Intrinsic<[llvm_i16_ty],
3191               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3192               "llvm.nvvm.suld.3d.i16.trap">;
3193 def int_nvvm_suld_3d_i32_trap
3194   : Intrinsic<[llvm_i32_ty],
3195               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3196               "llvm.nvvm.suld.3d.i32.trap">;
3197 def int_nvvm_suld_3d_i64_trap
3198   : Intrinsic<[llvm_i64_ty],
3199               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3200               "llvm.nvvm.suld.3d.i64.trap">;
3201 def int_nvvm_suld_3d_v2i8_trap
3202   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3203               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3204               "llvm.nvvm.suld.3d.v2i8.trap">;
3205 def int_nvvm_suld_3d_v2i16_trap
3206   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3207               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3208               "llvm.nvvm.suld.3d.v2i16.trap">;
3209 def int_nvvm_suld_3d_v2i32_trap
3210   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3211               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3212               "llvm.nvvm.suld.3d.v2i32.trap">;
3213 def int_nvvm_suld_3d_v2i64_trap
3214   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3215               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3216               "llvm.nvvm.suld.3d.v2i64.trap">;
3217 def int_nvvm_suld_3d_v4i8_trap
3218   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3219               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3220               "llvm.nvvm.suld.3d.v4i8.trap">;
3221 def int_nvvm_suld_3d_v4i16_trap
3222   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3223               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3224               "llvm.nvvm.suld.3d.v4i16.trap">;
3225 def int_nvvm_suld_3d_v4i32_trap
3226   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3227               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3228               "llvm.nvvm.suld.3d.v4i32.trap">;
3229 
3230 // .zero variants
3231 def int_nvvm_suld_1d_i8_zero
3232   : Intrinsic<[llvm_i16_ty],
3233               [llvm_i64_ty, llvm_i32_ty], [],
3234               "llvm.nvvm.suld.1d.i8.zero">;
3235 def int_nvvm_suld_1d_i16_zero
3236   : Intrinsic<[llvm_i16_ty],
3237               [llvm_i64_ty, llvm_i32_ty], [],
3238               "llvm.nvvm.suld.1d.i16.zero">;
3239 def int_nvvm_suld_1d_i32_zero
3240   : Intrinsic<[llvm_i32_ty],
3241               [llvm_i64_ty, llvm_i32_ty], [],
3242               "llvm.nvvm.suld.1d.i32.zero">;
3243 def int_nvvm_suld_1d_i64_zero
3244   : Intrinsic<[llvm_i64_ty],
3245               [llvm_i64_ty, llvm_i32_ty], [],
3246               "llvm.nvvm.suld.1d.i64.zero">;
3247 def int_nvvm_suld_1d_v2i8_zero
3248   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3249               [llvm_i64_ty, llvm_i32_ty], [],
3250               "llvm.nvvm.suld.1d.v2i8.zero">;
3251 def int_nvvm_suld_1d_v2i16_zero
3252   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3253               [llvm_i64_ty, llvm_i32_ty], [],
3254               "llvm.nvvm.suld.1d.v2i16.zero">;
3255 def int_nvvm_suld_1d_v2i32_zero
3256   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3257               [llvm_i64_ty, llvm_i32_ty], [],
3258               "llvm.nvvm.suld.1d.v2i32.zero">;
3259 def int_nvvm_suld_1d_v2i64_zero
3260   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3261               [llvm_i64_ty, llvm_i32_ty], [],
3262               "llvm.nvvm.suld.1d.v2i64.zero">;
3263 def int_nvvm_suld_1d_v4i8_zero
3264   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3265               [llvm_i64_ty, llvm_i32_ty], [],
3266               "llvm.nvvm.suld.1d.v4i8.zero">;
3267 def int_nvvm_suld_1d_v4i16_zero
3268   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3269               [llvm_i64_ty, llvm_i32_ty], [],
3270               "llvm.nvvm.suld.1d.v4i16.zero">;
3271 def int_nvvm_suld_1d_v4i32_zero
3272   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3273               [llvm_i64_ty, llvm_i32_ty], [],
3274               "llvm.nvvm.suld.1d.v4i32.zero">;
3275 
3276 def int_nvvm_suld_1d_array_i8_zero
3277   : Intrinsic<[llvm_i16_ty],
3278               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3279               "llvm.nvvm.suld.1d.array.i8.zero">;
3280 def int_nvvm_suld_1d_array_i16_zero
3281   : Intrinsic<[llvm_i16_ty],
3282               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3283               "llvm.nvvm.suld.1d.array.i16.zero">;
3284 def int_nvvm_suld_1d_array_i32_zero
3285   : Intrinsic<[llvm_i32_ty],
3286               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3287               "llvm.nvvm.suld.1d.array.i32.zero">;
3288 def int_nvvm_suld_1d_array_i64_zero
3289   : Intrinsic<[llvm_i64_ty],
3290               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3291               "llvm.nvvm.suld.1d.array.i64.zero">;
3292 def int_nvvm_suld_1d_array_v2i8_zero
3293   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3294               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3295               "llvm.nvvm.suld.1d.array.v2i8.zero">;
3296 def int_nvvm_suld_1d_array_v2i16_zero
3297   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3298               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3299               "llvm.nvvm.suld.1d.array.v2i16.zero">;
3300 def int_nvvm_suld_1d_array_v2i32_zero
3301   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3302               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3303               "llvm.nvvm.suld.1d.array.v2i32.zero">;
3304 def int_nvvm_suld_1d_array_v2i64_zero
3305   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3306               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3307               "llvm.nvvm.suld.1d.array.v2i64.zero">;
3308 def int_nvvm_suld_1d_array_v4i8_zero
3309   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3310               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3311               "llvm.nvvm.suld.1d.array.v4i8.zero">;
3312 def int_nvvm_suld_1d_array_v4i16_zero
3313   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3314               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3315               "llvm.nvvm.suld.1d.array.v4i16.zero">;
3316 def int_nvvm_suld_1d_array_v4i32_zero
3317   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3318               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3319               "llvm.nvvm.suld.1d.array.v4i32.zero">;
3320 
3321 def int_nvvm_suld_2d_i8_zero
3322   : Intrinsic<[llvm_i16_ty],
3323               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3324               "llvm.nvvm.suld.2d.i8.zero">;
3325 def int_nvvm_suld_2d_i16_zero
3326   : Intrinsic<[llvm_i16_ty],
3327               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3328               "llvm.nvvm.suld.2d.i16.zero">;
3329 def int_nvvm_suld_2d_i32_zero
3330   : Intrinsic<[llvm_i32_ty],
3331               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3332               "llvm.nvvm.suld.2d.i32.zero">;
3333 def int_nvvm_suld_2d_i64_zero
3334   : Intrinsic<[llvm_i64_ty],
3335               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3336               "llvm.nvvm.suld.2d.i64.zero">;
3337 def int_nvvm_suld_2d_v2i8_zero
3338   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3339               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3340               "llvm.nvvm.suld.2d.v2i8.zero">;
3341 def int_nvvm_suld_2d_v2i16_zero
3342   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3343               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3344               "llvm.nvvm.suld.2d.v2i16.zero">;
3345 def int_nvvm_suld_2d_v2i32_zero
3346   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3347               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3348               "llvm.nvvm.suld.2d.v2i32.zero">;
3349 def int_nvvm_suld_2d_v2i64_zero
3350   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3351               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3352               "llvm.nvvm.suld.2d.v2i64.zero">;
3353 def int_nvvm_suld_2d_v4i8_zero
3354   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3355               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3356               "llvm.nvvm.suld.2d.v4i8.zero">;
3357 def int_nvvm_suld_2d_v4i16_zero
3358   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3359               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3360               "llvm.nvvm.suld.2d.v4i16.zero">;
3361 def int_nvvm_suld_2d_v4i32_zero
3362   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3363               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3364               "llvm.nvvm.suld.2d.v4i32.zero">;
3365 
3366 def int_nvvm_suld_2d_array_i8_zero
3367   : Intrinsic<[llvm_i16_ty],
3368               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3369               "llvm.nvvm.suld.2d.array.i8.zero">;
3370 def int_nvvm_suld_2d_array_i16_zero
3371   : Intrinsic<[llvm_i16_ty],
3372               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3373               "llvm.nvvm.suld.2d.array.i16.zero">;
3374 def int_nvvm_suld_2d_array_i32_zero
3375   : Intrinsic<[llvm_i32_ty],
3376               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3377               "llvm.nvvm.suld.2d.array.i32.zero">;
3378 def int_nvvm_suld_2d_array_i64_zero
3379   : Intrinsic<[llvm_i64_ty],
3380               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3381               "llvm.nvvm.suld.2d.array.i64.zero">;
3382 def int_nvvm_suld_2d_array_v2i8_zero
3383   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3384               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3385               "llvm.nvvm.suld.2d.array.v2i8.zero">;
3386 def int_nvvm_suld_2d_array_v2i16_zero
3387   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3388               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3389               "llvm.nvvm.suld.2d.array.v2i16.zero">;
3390 def int_nvvm_suld_2d_array_v2i32_zero
3391   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3392               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3393               "llvm.nvvm.suld.2d.array.v2i32.zero">;
3394 def int_nvvm_suld_2d_array_v2i64_zero
3395   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3396               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3397               "llvm.nvvm.suld.2d.array.v2i64.zero">;
3398 def int_nvvm_suld_2d_array_v4i8_zero
3399   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3400               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3401               "llvm.nvvm.suld.2d.array.v4i8.zero">;
3402 def int_nvvm_suld_2d_array_v4i16_zero
3403   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3404               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3405               "llvm.nvvm.suld.2d.array.v4i16.zero">;
3406 def int_nvvm_suld_2d_array_v4i32_zero
3407   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3408               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3409               "llvm.nvvm.suld.2d.array.v4i32.zero">;
3410 
3411 def int_nvvm_suld_3d_i8_zero
3412   : Intrinsic<[llvm_i16_ty],
3413               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3414               "llvm.nvvm.suld.3d.i8.zero">;
3415 def int_nvvm_suld_3d_i16_zero
3416   : Intrinsic<[llvm_i16_ty],
3417               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3418               "llvm.nvvm.suld.3d.i16.zero">;
3419 def int_nvvm_suld_3d_i32_zero
3420   : Intrinsic<[llvm_i32_ty],
3421               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3422               "llvm.nvvm.suld.3d.i32.zero">;
3423 def int_nvvm_suld_3d_i64_zero
3424   : Intrinsic<[llvm_i64_ty],
3425               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3426               "llvm.nvvm.suld.3d.i64.zero">;
3427 def int_nvvm_suld_3d_v2i8_zero
3428   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3429               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3430               "llvm.nvvm.suld.3d.v2i8.zero">;
3431 def int_nvvm_suld_3d_v2i16_zero
3432   : Intrinsic<[llvm_i16_ty, llvm_i16_ty],
3433               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3434               "llvm.nvvm.suld.3d.v2i16.zero">;
3435 def int_nvvm_suld_3d_v2i32_zero
3436   : Intrinsic<[llvm_i32_ty, llvm_i32_ty],
3437               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3438               "llvm.nvvm.suld.3d.v2i32.zero">;
3439 def int_nvvm_suld_3d_v2i64_zero
3440   : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
3441               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3442               "llvm.nvvm.suld.3d.v2i64.zero">;
3443 def int_nvvm_suld_3d_v4i8_zero
3444   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3445               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3446               "llvm.nvvm.suld.3d.v4i8.zero">;
3447 def int_nvvm_suld_3d_v4i16_zero
3448   : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
3449               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3450               "llvm.nvvm.suld.3d.v4i16.zero">;
3451 def int_nvvm_suld_3d_v4i32_zero
3452   : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
3453               [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3454               "llvm.nvvm.suld.3d.v4i32.zero">;
3455 
3456 //===- Texture Query ------------------------------------------------------===//
3457 
3458 def int_nvvm_txq_channel_order
3459   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3460               "llvm.nvvm.txq.channel.order">,
3461     ClangBuiltin<"__nvvm_txq_channel_order">;
3462 def int_nvvm_txq_channel_data_type
3463   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3464               "llvm.nvvm.txq.channel.data.type">,
3465     ClangBuiltin<"__nvvm_txq_channel_data_type">;
3466 def int_nvvm_txq_width
3467   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3468               "llvm.nvvm.txq.width">,
3469     ClangBuiltin<"__nvvm_txq_width">;
3470 def int_nvvm_txq_height
3471   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3472               "llvm.nvvm.txq.height">,
3473     ClangBuiltin<"__nvvm_txq_height">;
3474 def int_nvvm_txq_depth
3475   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3476               "llvm.nvvm.txq.depth">,
3477     ClangBuiltin<"__nvvm_txq_depth">;
3478 def int_nvvm_txq_array_size
3479   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3480               "llvm.nvvm.txq.array.size">,
3481     ClangBuiltin<"__nvvm_txq_array_size">;
3482 def int_nvvm_txq_num_samples
3483   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3484               "llvm.nvvm.txq.num.samples">,
3485     ClangBuiltin<"__nvvm_txq_num_samples">;
3486 def int_nvvm_txq_num_mipmap_levels
3487   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3488               "llvm.nvvm.txq.num.mipmap.levels">,
3489     ClangBuiltin<"__nvvm_txq_num_mipmap_levels">;
3490 
3491 //===- Surface Query ------------------------------------------------------===//
3492 
3493 def int_nvvm_suq_channel_order
3494   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3495               "llvm.nvvm.suq.channel.order">,
3496     ClangBuiltin<"__nvvm_suq_channel_order">;
3497 def int_nvvm_suq_channel_data_type
3498   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3499               "llvm.nvvm.suq.channel.data.type">,
3500     ClangBuiltin<"__nvvm_suq_channel_data_type">;
3501 def int_nvvm_suq_width
3502   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3503               "llvm.nvvm.suq.width">,
3504     ClangBuiltin<"__nvvm_suq_width">;
3505 def int_nvvm_suq_height
3506   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3507               "llvm.nvvm.suq.height">,
3508     ClangBuiltin<"__nvvm_suq_height">;
3509 def int_nvvm_suq_depth
3510   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3511               "llvm.nvvm.suq.depth">,
3512     ClangBuiltin<"__nvvm_suq_depth">;
3513 def int_nvvm_suq_array_size
3514   : Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem],
3515               "llvm.nvvm.suq.array.size">,
3516     ClangBuiltin<"__nvvm_suq_array_size">;
3517 
3518 
3519 //===- Handle Query -------------------------------------------------------===//
3520 
3521 def int_nvvm_istypep_sampler
3522   : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3523               "llvm.nvvm.istypep.sampler">,
3524     ClangBuiltin<"__nvvm_istypep_sampler">;
3525 def int_nvvm_istypep_surface
3526   : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3527               "llvm.nvvm.istypep.surface">,
3528     ClangBuiltin<"__nvvm_istypep_surface">;
3529 def int_nvvm_istypep_texture
3530   : Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem],
3531               "llvm.nvvm.istypep.texture">,
3532     ClangBuiltin<"__nvvm_istypep_texture">;
3533 
3534 
3535 
3536 //===- Surface Stores -----------------------------------------------------===//
3537 
3538 // Unformatted
3539 // .clamp variant
3540 def int_nvvm_sust_b_1d_i8_clamp
3541   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3542               "llvm.nvvm.sust.b.1d.i8.clamp">,
3543     ClangBuiltin<"__nvvm_sust_b_1d_i8_clamp">;
3544 def int_nvvm_sust_b_1d_i16_clamp
3545   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3546               "llvm.nvvm.sust.b.1d.i16.clamp">,
3547     ClangBuiltin<"__nvvm_sust_b_1d_i16_clamp">;
3548 def int_nvvm_sust_b_1d_i32_clamp
3549   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3550               "llvm.nvvm.sust.b.1d.i32.clamp">,
3551     ClangBuiltin<"__nvvm_sust_b_1d_i32_clamp">;
3552 def int_nvvm_sust_b_1d_i64_clamp
3553   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3554               "llvm.nvvm.sust.b.1d.i64.clamp">,
3555     ClangBuiltin<"__nvvm_sust_b_1d_i64_clamp">;
3556 def int_nvvm_sust_b_1d_v2i8_clamp
3557   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3558               "llvm.nvvm.sust.b.1d.v2i8.clamp">,
3559     ClangBuiltin<"__nvvm_sust_b_1d_v2i8_clamp">;
3560 def int_nvvm_sust_b_1d_v2i16_clamp
3561   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3562               "llvm.nvvm.sust.b.1d.v2i16.clamp">,
3563     ClangBuiltin<"__nvvm_sust_b_1d_v2i16_clamp">;
3564 def int_nvvm_sust_b_1d_v2i32_clamp
3565   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3566               "llvm.nvvm.sust.b.1d.v2i32.clamp">,
3567     ClangBuiltin<"__nvvm_sust_b_1d_v2i32_clamp">;
3568 def int_nvvm_sust_b_1d_v2i64_clamp
3569   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3570               "llvm.nvvm.sust.b.1d.v2i64.clamp">,
3571     ClangBuiltin<"__nvvm_sust_b_1d_v2i64_clamp">;
3572 def int_nvvm_sust_b_1d_v4i8_clamp
3573   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3574                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3575               "llvm.nvvm.sust.b.1d.v4i8.clamp">,
3576     ClangBuiltin<"__nvvm_sust_b_1d_v4i8_clamp">;
3577 def int_nvvm_sust_b_1d_v4i16_clamp
3578   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3579                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3580               "llvm.nvvm.sust.b.1d.v4i16.clamp">,
3581     ClangBuiltin<"__nvvm_sust_b_1d_v4i16_clamp">;
3582 def int_nvvm_sust_b_1d_v4i32_clamp
3583   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3584                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3585               "llvm.nvvm.sust.b.1d.v4i32.clamp">,
3586     ClangBuiltin<"__nvvm_sust_b_1d_v4i32_clamp">;
3587 
3588 
3589 def int_nvvm_sust_b_1d_array_i8_clamp
3590   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3591               "llvm.nvvm.sust.b.1d.array.i8.clamp">,
3592     ClangBuiltin<"__nvvm_sust_b_1d_array_i8_clamp">;
3593 def int_nvvm_sust_b_1d_array_i16_clamp
3594   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3595               "llvm.nvvm.sust.b.1d.array.i16.clamp">,
3596     ClangBuiltin<"__nvvm_sust_b_1d_array_i16_clamp">;
3597 def int_nvvm_sust_b_1d_array_i32_clamp
3598   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3599               "llvm.nvvm.sust.b.1d.array.i32.clamp">,
3600     ClangBuiltin<"__nvvm_sust_b_1d_array_i32_clamp">;
3601 def int_nvvm_sust_b_1d_array_i64_clamp
3602   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3603               "llvm.nvvm.sust.b.1d.array.i64.clamp">,
3604     ClangBuiltin<"__nvvm_sust_b_1d_array_i64_clamp">;
3605 def int_nvvm_sust_b_1d_array_v2i8_clamp
3606   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3607                    llvm_i16_ty, llvm_i16_ty], [],
3608               "llvm.nvvm.sust.b.1d.array.v2i8.clamp">,
3609     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_clamp">;
3610 def int_nvvm_sust_b_1d_array_v2i16_clamp
3611   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3612                    llvm_i16_ty, llvm_i16_ty], [],
3613               "llvm.nvvm.sust.b.1d.array.v2i16.clamp">,
3614     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_clamp">;
3615 def int_nvvm_sust_b_1d_array_v2i32_clamp
3616   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3617                    llvm_i32_ty, llvm_i32_ty], [],
3618               "llvm.nvvm.sust.b.1d.array.v2i32.clamp">,
3619     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_clamp">;
3620 def int_nvvm_sust_b_1d_array_v2i64_clamp
3621   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3622                    llvm_i64_ty, llvm_i64_ty], [],
3623               "llvm.nvvm.sust.b.1d.array.v2i64.clamp">,
3624     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_clamp">;
3625 def int_nvvm_sust_b_1d_array_v4i8_clamp
3626   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3627                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3628               "llvm.nvvm.sust.b.1d.array.v4i8.clamp">,
3629     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_clamp">;
3630 def int_nvvm_sust_b_1d_array_v4i16_clamp
3631   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3632                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3633               "llvm.nvvm.sust.b.1d.array.v4i16.clamp">,
3634     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_clamp">;
3635 def int_nvvm_sust_b_1d_array_v4i32_clamp
3636   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3637                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3638               "llvm.nvvm.sust.b.1d.array.v4i32.clamp">,
3639     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_clamp">;
3640 
3641 
3642 def int_nvvm_sust_b_2d_i8_clamp
3643   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3644               "llvm.nvvm.sust.b.2d.i8.clamp">,
3645     ClangBuiltin<"__nvvm_sust_b_2d_i8_clamp">;
3646 def int_nvvm_sust_b_2d_i16_clamp
3647   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3648               "llvm.nvvm.sust.b.2d.i16.clamp">,
3649     ClangBuiltin<"__nvvm_sust_b_2d_i16_clamp">;
3650 def int_nvvm_sust_b_2d_i32_clamp
3651   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3652               "llvm.nvvm.sust.b.2d.i32.clamp">,
3653     ClangBuiltin<"__nvvm_sust_b_2d_i32_clamp">;
3654 def int_nvvm_sust_b_2d_i64_clamp
3655   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3656               "llvm.nvvm.sust.b.2d.i64.clamp">,
3657     ClangBuiltin<"__nvvm_sust_b_2d_i64_clamp">;
3658 def int_nvvm_sust_b_2d_v2i8_clamp
3659   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3660                    llvm_i16_ty, llvm_i16_ty], [],
3661               "llvm.nvvm.sust.b.2d.v2i8.clamp">,
3662     ClangBuiltin<"__nvvm_sust_b_2d_v2i8_clamp">;
3663 def int_nvvm_sust_b_2d_v2i16_clamp
3664   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3665                    llvm_i16_ty, llvm_i16_ty], [],
3666               "llvm.nvvm.sust.b.2d.v2i16.clamp">,
3667     ClangBuiltin<"__nvvm_sust_b_2d_v2i16_clamp">;
3668 def int_nvvm_sust_b_2d_v2i32_clamp
3669   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3670                    llvm_i32_ty, llvm_i32_ty], [],
3671               "llvm.nvvm.sust.b.2d.v2i32.clamp">,
3672     ClangBuiltin<"__nvvm_sust_b_2d_v2i32_clamp">;
3673 def int_nvvm_sust_b_2d_v2i64_clamp
3674   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3675                    llvm_i64_ty, llvm_i64_ty], [],
3676               "llvm.nvvm.sust.b.2d.v2i64.clamp">,
3677     ClangBuiltin<"__nvvm_sust_b_2d_v2i64_clamp">;
3678 def int_nvvm_sust_b_2d_v4i8_clamp
3679   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3680                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3681               "llvm.nvvm.sust.b.2d.v4i8.clamp">,
3682     ClangBuiltin<"__nvvm_sust_b_2d_v4i8_clamp">;
3683 def int_nvvm_sust_b_2d_v4i16_clamp
3684   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3685                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3686               "llvm.nvvm.sust.b.2d.v4i16.clamp">,
3687     ClangBuiltin<"__nvvm_sust_b_2d_v4i16_clamp">;
3688 def int_nvvm_sust_b_2d_v4i32_clamp
3689   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3690                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3691               "llvm.nvvm.sust.b.2d.v4i32.clamp">,
3692     ClangBuiltin<"__nvvm_sust_b_2d_v4i32_clamp">;
3693 
3694 
3695 def int_nvvm_sust_b_2d_array_i8_clamp
3696   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3697                    llvm_i32_ty, llvm_i16_ty], [],
3698               "llvm.nvvm.sust.b.2d.array.i8.clamp">,
3699     ClangBuiltin<"__nvvm_sust_b_2d_array_i8_clamp">;
3700 def int_nvvm_sust_b_2d_array_i16_clamp
3701   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3702                    llvm_i32_ty, llvm_i16_ty], [],
3703               "llvm.nvvm.sust.b.2d.array.i16.clamp">,
3704     ClangBuiltin<"__nvvm_sust_b_2d_array_i16_clamp">;
3705 def int_nvvm_sust_b_2d_array_i32_clamp
3706   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3707                    llvm_i32_ty, llvm_i32_ty], [],
3708               "llvm.nvvm.sust.b.2d.array.i32.clamp">,
3709     ClangBuiltin<"__nvvm_sust_b_2d_array_i32_clamp">;
3710 def int_nvvm_sust_b_2d_array_i64_clamp
3711   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3712                    llvm_i32_ty, llvm_i64_ty], [],
3713               "llvm.nvvm.sust.b.2d.array.i64.clamp">,
3714     ClangBuiltin<"__nvvm_sust_b_2d_array_i64_clamp">;
3715 def int_nvvm_sust_b_2d_array_v2i8_clamp
3716   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3717                    llvm_i16_ty, llvm_i16_ty], [],
3718               "llvm.nvvm.sust.b.2d.array.v2i8.clamp">,
3719     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_clamp">;
3720 def int_nvvm_sust_b_2d_array_v2i16_clamp
3721   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3722                    llvm_i16_ty, llvm_i16_ty], [],
3723               "llvm.nvvm.sust.b.2d.array.v2i16.clamp">,
3724     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_clamp">;
3725 def int_nvvm_sust_b_2d_array_v2i32_clamp
3726   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3727                    llvm_i32_ty, llvm_i32_ty], [],
3728               "llvm.nvvm.sust.b.2d.array.v2i32.clamp">,
3729     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_clamp">;
3730 def int_nvvm_sust_b_2d_array_v2i64_clamp
3731   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3732                    llvm_i64_ty, llvm_i64_ty], [],
3733               "llvm.nvvm.sust.b.2d.array.v2i64.clamp">,
3734     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_clamp">;
3735 def int_nvvm_sust_b_2d_array_v4i8_clamp
3736   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3737                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3738               "llvm.nvvm.sust.b.2d.array.v4i8.clamp">,
3739     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_clamp">;
3740 def int_nvvm_sust_b_2d_array_v4i16_clamp
3741   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3742                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3743               "llvm.nvvm.sust.b.2d.array.v4i16.clamp">,
3744     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_clamp">;
3745 def int_nvvm_sust_b_2d_array_v4i32_clamp
3746   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3747                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3748               "llvm.nvvm.sust.b.2d.array.v4i32.clamp">,
3749     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_clamp">;
3750 
3751 
3752 def int_nvvm_sust_b_3d_i8_clamp
3753   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3754                    llvm_i32_ty, llvm_i16_ty], [],
3755               "llvm.nvvm.sust.b.3d.i8.clamp">,
3756     ClangBuiltin<"__nvvm_sust_b_3d_i8_clamp">;
3757 def int_nvvm_sust_b_3d_i16_clamp
3758   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3759                    llvm_i32_ty, llvm_i16_ty], [],
3760               "llvm.nvvm.sust.b.3d.i16.clamp">,
3761     ClangBuiltin<"__nvvm_sust_b_3d_i16_clamp">;
3762 def int_nvvm_sust_b_3d_i32_clamp
3763   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3764                    llvm_i32_ty, llvm_i32_ty], [],
3765               "llvm.nvvm.sust.b.3d.i32.clamp">,
3766     ClangBuiltin<"__nvvm_sust_b_3d_i32_clamp">;
3767 def int_nvvm_sust_b_3d_i64_clamp
3768   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3769                    llvm_i32_ty, llvm_i64_ty], [],
3770               "llvm.nvvm.sust.b.3d.i64.clamp">,
3771     ClangBuiltin<"__nvvm_sust_b_3d_i64_clamp">;
3772 def int_nvvm_sust_b_3d_v2i8_clamp
3773   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3774                    llvm_i16_ty, llvm_i16_ty], [],
3775               "llvm.nvvm.sust.b.3d.v2i8.clamp">,
3776     ClangBuiltin<"__nvvm_sust_b_3d_v2i8_clamp">;
3777 def int_nvvm_sust_b_3d_v2i16_clamp
3778   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3779                    llvm_i16_ty, llvm_i16_ty], [],
3780               "llvm.nvvm.sust.b.3d.v2i16.clamp">,
3781     ClangBuiltin<"__nvvm_sust_b_3d_v2i16_clamp">;
3782 def int_nvvm_sust_b_3d_v2i32_clamp
3783   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3784                    llvm_i32_ty, llvm_i32_ty], [],
3785               "llvm.nvvm.sust.b.3d.v2i32.clamp">,
3786     ClangBuiltin<"__nvvm_sust_b_3d_v2i32_clamp">;
3787 def int_nvvm_sust_b_3d_v2i64_clamp
3788   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3789                    llvm_i64_ty, llvm_i64_ty], [],
3790               "llvm.nvvm.sust.b.3d.v2i64.clamp">,
3791     ClangBuiltin<"__nvvm_sust_b_3d_v2i64_clamp">;
3792 def int_nvvm_sust_b_3d_v4i8_clamp
3793   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3794                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3795               "llvm.nvvm.sust.b.3d.v4i8.clamp">,
3796     ClangBuiltin<"__nvvm_sust_b_3d_v4i8_clamp">;
3797 def int_nvvm_sust_b_3d_v4i16_clamp
3798   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3799                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3800               "llvm.nvvm.sust.b.3d.v4i16.clamp">,
3801     ClangBuiltin<"__nvvm_sust_b_3d_v4i16_clamp">;
3802 def int_nvvm_sust_b_3d_v4i32_clamp
3803   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3804                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3805               "llvm.nvvm.sust.b.3d.v4i32.clamp">,
3806     ClangBuiltin<"__nvvm_sust_b_3d_v4i32_clamp">;
3807 
3808 
3809 // .trap variant
3810 def int_nvvm_sust_b_1d_i8_trap
3811   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3812               "llvm.nvvm.sust.b.1d.i8.trap">,
3813     ClangBuiltin<"__nvvm_sust_b_1d_i8_trap">;
3814 def int_nvvm_sust_b_1d_i16_trap
3815   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
3816               "llvm.nvvm.sust.b.1d.i16.trap">,
3817     ClangBuiltin<"__nvvm_sust_b_1d_i16_trap">;
3818 def int_nvvm_sust_b_1d_i32_trap
3819   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
3820               "llvm.nvvm.sust.b.1d.i32.trap">,
3821     ClangBuiltin<"__nvvm_sust_b_1d_i32_trap">;
3822 def int_nvvm_sust_b_1d_i64_trap
3823   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
3824               "llvm.nvvm.sust.b.1d.i64.trap">,
3825     ClangBuiltin<"__nvvm_sust_b_1d_i64_trap">;
3826 def int_nvvm_sust_b_1d_v2i8_trap
3827   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3828               "llvm.nvvm.sust.b.1d.v2i8.trap">,
3829     ClangBuiltin<"__nvvm_sust_b_1d_v2i8_trap">;
3830 def int_nvvm_sust_b_1d_v2i16_trap
3831   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
3832               "llvm.nvvm.sust.b.1d.v2i16.trap">,
3833     ClangBuiltin<"__nvvm_sust_b_1d_v2i16_trap">;
3834 def int_nvvm_sust_b_1d_v2i32_trap
3835   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3836               "llvm.nvvm.sust.b.1d.v2i32.trap">,
3837     ClangBuiltin<"__nvvm_sust_b_1d_v2i32_trap">;
3838 def int_nvvm_sust_b_1d_v2i64_trap
3839   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
3840               "llvm.nvvm.sust.b.1d.v2i64.trap">,
3841     ClangBuiltin<"__nvvm_sust_b_1d_v2i64_trap">;
3842 def int_nvvm_sust_b_1d_v4i8_trap
3843   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3844                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3845               "llvm.nvvm.sust.b.1d.v4i8.trap">,
3846     ClangBuiltin<"__nvvm_sust_b_1d_v4i8_trap">;
3847 def int_nvvm_sust_b_1d_v4i16_trap
3848   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
3849                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3850               "llvm.nvvm.sust.b.1d.v4i16.trap">,
3851     ClangBuiltin<"__nvvm_sust_b_1d_v4i16_trap">;
3852 def int_nvvm_sust_b_1d_v4i32_trap
3853   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3854                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3855               "llvm.nvvm.sust.b.1d.v4i32.trap">,
3856     ClangBuiltin<"__nvvm_sust_b_1d_v4i32_trap">;
3857 
3858 
3859 def int_nvvm_sust_b_1d_array_i8_trap
3860   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3861               "llvm.nvvm.sust.b.1d.array.i8.trap">,
3862     ClangBuiltin<"__nvvm_sust_b_1d_array_i8_trap">;
3863 def int_nvvm_sust_b_1d_array_i16_trap
3864   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3865               "llvm.nvvm.sust.b.1d.array.i16.trap">,
3866     ClangBuiltin<"__nvvm_sust_b_1d_array_i16_trap">;
3867 def int_nvvm_sust_b_1d_array_i32_trap
3868   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3869               "llvm.nvvm.sust.b.1d.array.i32.trap">,
3870     ClangBuiltin<"__nvvm_sust_b_1d_array_i32_trap">;
3871 def int_nvvm_sust_b_1d_array_i64_trap
3872   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3873               "llvm.nvvm.sust.b.1d.array.i64.trap">,
3874     ClangBuiltin<"__nvvm_sust_b_1d_array_i64_trap">;
3875 def int_nvvm_sust_b_1d_array_v2i8_trap
3876   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3877                    llvm_i16_ty, llvm_i16_ty], [],
3878               "llvm.nvvm.sust.b.1d.array.v2i8.trap">,
3879     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_trap">;
3880 def int_nvvm_sust_b_1d_array_v2i16_trap
3881   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3882                    llvm_i16_ty, llvm_i16_ty], [],
3883               "llvm.nvvm.sust.b.1d.array.v2i16.trap">,
3884     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_trap">;
3885 def int_nvvm_sust_b_1d_array_v2i32_trap
3886   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3887                    llvm_i32_ty, llvm_i32_ty], [],
3888               "llvm.nvvm.sust.b.1d.array.v2i32.trap">,
3889     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_trap">;
3890 def int_nvvm_sust_b_1d_array_v2i64_trap
3891   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3892                    llvm_i64_ty, llvm_i64_ty], [],
3893               "llvm.nvvm.sust.b.1d.array.v2i64.trap">,
3894     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_trap">;
3895 def int_nvvm_sust_b_1d_array_v4i8_trap
3896   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3897                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3898               "llvm.nvvm.sust.b.1d.array.v4i8.trap">,
3899     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_trap">;
3900 def int_nvvm_sust_b_1d_array_v4i16_trap
3901   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3902                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3903               "llvm.nvvm.sust.b.1d.array.v4i16.trap">,
3904     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_trap">;
3905 def int_nvvm_sust_b_1d_array_v4i32_trap
3906   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3907                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3908               "llvm.nvvm.sust.b.1d.array.v4i32.trap">,
3909     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_trap">;
3910 
3911 
3912 def int_nvvm_sust_b_2d_i8_trap
3913   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3914               "llvm.nvvm.sust.b.2d.i8.trap">,
3915     ClangBuiltin<"__nvvm_sust_b_2d_i8_trap">;
3916 def int_nvvm_sust_b_2d_i16_trap
3917   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
3918               "llvm.nvvm.sust.b.2d.i16.trap">,
3919     ClangBuiltin<"__nvvm_sust_b_2d_i16_trap">;
3920 def int_nvvm_sust_b_2d_i32_trap
3921   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3922               "llvm.nvvm.sust.b.2d.i32.trap">,
3923     ClangBuiltin<"__nvvm_sust_b_2d_i32_trap">;
3924 def int_nvvm_sust_b_2d_i64_trap
3925   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
3926               "llvm.nvvm.sust.b.2d.i64.trap">,
3927     ClangBuiltin<"__nvvm_sust_b_2d_i64_trap">;
3928 def int_nvvm_sust_b_2d_v2i8_trap
3929   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3930                    llvm_i16_ty, llvm_i16_ty], [],
3931               "llvm.nvvm.sust.b.2d.v2i8.trap">,
3932     ClangBuiltin<"__nvvm_sust_b_2d_v2i8_trap">;
3933 def int_nvvm_sust_b_2d_v2i16_trap
3934   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3935                    llvm_i16_ty, llvm_i16_ty], [],
3936               "llvm.nvvm.sust.b.2d.v2i16.trap">,
3937     ClangBuiltin<"__nvvm_sust_b_2d_v2i16_trap">;
3938 def int_nvvm_sust_b_2d_v2i32_trap
3939   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3940                    llvm_i32_ty, llvm_i32_ty], [],
3941               "llvm.nvvm.sust.b.2d.v2i32.trap">,
3942     ClangBuiltin<"__nvvm_sust_b_2d_v2i32_trap">;
3943 def int_nvvm_sust_b_2d_v2i64_trap
3944   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3945                    llvm_i64_ty, llvm_i64_ty], [],
3946               "llvm.nvvm.sust.b.2d.v2i64.trap">,
3947     ClangBuiltin<"__nvvm_sust_b_2d_v2i64_trap">;
3948 def int_nvvm_sust_b_2d_v4i8_trap
3949   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3950                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3951               "llvm.nvvm.sust.b.2d.v4i8.trap">,
3952     ClangBuiltin<"__nvvm_sust_b_2d_v4i8_trap">;
3953 def int_nvvm_sust_b_2d_v4i16_trap
3954   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
3955                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
3956               "llvm.nvvm.sust.b.2d.v4i16.trap">,
3957     ClangBuiltin<"__nvvm_sust_b_2d_v4i16_trap">;
3958 def int_nvvm_sust_b_2d_v4i32_trap
3959   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3960                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
3961               "llvm.nvvm.sust.b.2d.v4i32.trap">,
3962     ClangBuiltin<"__nvvm_sust_b_2d_v4i32_trap">;
3963 
3964 
3965 def int_nvvm_sust_b_2d_array_i8_trap
3966   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3967                    llvm_i32_ty, llvm_i16_ty], [],
3968               "llvm.nvvm.sust.b.2d.array.i8.trap">,
3969     ClangBuiltin<"__nvvm_sust_b_2d_array_i8_trap">;
3970 def int_nvvm_sust_b_2d_array_i16_trap
3971   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3972                    llvm_i32_ty, llvm_i16_ty], [],
3973               "llvm.nvvm.sust.b.2d.array.i16.trap">,
3974     ClangBuiltin<"__nvvm_sust_b_2d_array_i16_trap">;
3975 def int_nvvm_sust_b_2d_array_i32_trap
3976   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3977                    llvm_i32_ty, llvm_i32_ty], [],
3978               "llvm.nvvm.sust.b.2d.array.i32.trap">,
3979     ClangBuiltin<"__nvvm_sust_b_2d_array_i32_trap">;
3980 def int_nvvm_sust_b_2d_array_i64_trap
3981   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
3982                    llvm_i32_ty, llvm_i64_ty], [],
3983               "llvm.nvvm.sust.b.2d.array.i64.trap">,
3984     ClangBuiltin<"__nvvm_sust_b_2d_array_i64_trap">;
3985 def int_nvvm_sust_b_2d_array_v2i8_trap
3986   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3987                    llvm_i16_ty, llvm_i16_ty], [],
3988               "llvm.nvvm.sust.b.2d.array.v2i8.trap">,
3989     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_trap">;
3990 def int_nvvm_sust_b_2d_array_v2i16_trap
3991   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3992                    llvm_i16_ty, llvm_i16_ty], [],
3993               "llvm.nvvm.sust.b.2d.array.v2i16.trap">,
3994     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_trap">;
3995 def int_nvvm_sust_b_2d_array_v2i32_trap
3996   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
3997                    llvm_i32_ty, llvm_i32_ty], [],
3998               "llvm.nvvm.sust.b.2d.array.v2i32.trap">,
3999     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_trap">;
4000 def int_nvvm_sust_b_2d_array_v2i64_trap
4001   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4002                    llvm_i64_ty, llvm_i64_ty], [],
4003               "llvm.nvvm.sust.b.2d.array.v2i64.trap">,
4004     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_trap">;
4005 def int_nvvm_sust_b_2d_array_v4i8_trap
4006   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4007                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4008               "llvm.nvvm.sust.b.2d.array.v4i8.trap">,
4009     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_trap">;
4010 def int_nvvm_sust_b_2d_array_v4i16_trap
4011   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4012                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4013               "llvm.nvvm.sust.b.2d.array.v4i16.trap">,
4014     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_trap">;
4015 def int_nvvm_sust_b_2d_array_v4i32_trap
4016   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4017                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4018               "llvm.nvvm.sust.b.2d.array.v4i32.trap">,
4019     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_trap">;
4020 
4021 
4022 def int_nvvm_sust_b_3d_i8_trap
4023   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4024                    llvm_i32_ty, llvm_i16_ty], [],
4025               "llvm.nvvm.sust.b.3d.i8.trap">,
4026     ClangBuiltin<"__nvvm_sust_b_3d_i8_trap">;
4027 def int_nvvm_sust_b_3d_i16_trap
4028   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4029                    llvm_i32_ty, llvm_i16_ty], [],
4030               "llvm.nvvm.sust.b.3d.i16.trap">,
4031     ClangBuiltin<"__nvvm_sust_b_3d_i16_trap">;
4032 def int_nvvm_sust_b_3d_i32_trap
4033   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4034                    llvm_i32_ty, llvm_i32_ty], [],
4035               "llvm.nvvm.sust.b.3d.i32.trap">,
4036     ClangBuiltin<"__nvvm_sust_b_3d_i32_trap">;
4037 def int_nvvm_sust_b_3d_i64_trap
4038   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4039                    llvm_i32_ty, llvm_i64_ty], [],
4040               "llvm.nvvm.sust.b.3d.i64.trap">,
4041     ClangBuiltin<"__nvvm_sust_b_3d_i64_trap">;
4042 def int_nvvm_sust_b_3d_v2i8_trap
4043   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4044                    llvm_i16_ty, llvm_i16_ty], [],
4045               "llvm.nvvm.sust.b.3d.v2i8.trap">,
4046     ClangBuiltin<"__nvvm_sust_b_3d_v2i8_trap">;
4047 def int_nvvm_sust_b_3d_v2i16_trap
4048   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4049                    llvm_i16_ty, llvm_i16_ty], [],
4050               "llvm.nvvm.sust.b.3d.v2i16.trap">,
4051     ClangBuiltin<"__nvvm_sust_b_3d_v2i16_trap">;
4052 def int_nvvm_sust_b_3d_v2i32_trap
4053   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4054                    llvm_i32_ty, llvm_i32_ty], [],
4055               "llvm.nvvm.sust.b.3d.v2i32.trap">,
4056     ClangBuiltin<"__nvvm_sust_b_3d_v2i32_trap">;
4057 def int_nvvm_sust_b_3d_v2i64_trap
4058   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4059                    llvm_i64_ty, llvm_i64_ty], [],
4060               "llvm.nvvm.sust.b.3d.v2i64.trap">,
4061     ClangBuiltin<"__nvvm_sust_b_3d_v2i64_trap">;
4062 def int_nvvm_sust_b_3d_v4i8_trap
4063   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4064                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4065               "llvm.nvvm.sust.b.3d.v4i8.trap">,
4066     ClangBuiltin<"__nvvm_sust_b_3d_v4i8_trap">;
4067 def int_nvvm_sust_b_3d_v4i16_trap
4068   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4069                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4070               "llvm.nvvm.sust.b.3d.v4i16.trap">,
4071     ClangBuiltin<"__nvvm_sust_b_3d_v4i16_trap">;
4072 def int_nvvm_sust_b_3d_v4i32_trap
4073   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4074                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4075               "llvm.nvvm.sust.b.3d.v4i32.trap">,
4076     ClangBuiltin<"__nvvm_sust_b_3d_v4i32_trap">;
4077 
4078 
4079 // .zero variant
4080 def int_nvvm_sust_b_1d_i8_zero
4081   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4082               "llvm.nvvm.sust.b.1d.i8.zero">,
4083     ClangBuiltin<"__nvvm_sust_b_1d_i8_zero">;
4084 def int_nvvm_sust_b_1d_i16_zero
4085   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4086               "llvm.nvvm.sust.b.1d.i16.zero">,
4087     ClangBuiltin<"__nvvm_sust_b_1d_i16_zero">;
4088 def int_nvvm_sust_b_1d_i32_zero
4089   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4090               "llvm.nvvm.sust.b.1d.i32.zero">,
4091     ClangBuiltin<"__nvvm_sust_b_1d_i32_zero">;
4092 def int_nvvm_sust_b_1d_i64_zero
4093   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [],
4094               "llvm.nvvm.sust.b.1d.i64.zero">,
4095     ClangBuiltin<"__nvvm_sust_b_1d_i64_zero">;
4096 def int_nvvm_sust_b_1d_v2i8_zero
4097   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4098               "llvm.nvvm.sust.b.1d.v2i8.zero">,
4099     ClangBuiltin<"__nvvm_sust_b_1d_v2i8_zero">;
4100 def int_nvvm_sust_b_1d_v2i16_zero
4101   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4102               "llvm.nvvm.sust.b.1d.v2i16.zero">,
4103     ClangBuiltin<"__nvvm_sust_b_1d_v2i16_zero">;
4104 def int_nvvm_sust_b_1d_v2i32_zero
4105   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4106               "llvm.nvvm.sust.b.1d.v2i32.zero">,
4107     ClangBuiltin<"__nvvm_sust_b_1d_v2i32_zero">;
4108 def int_nvvm_sust_b_1d_v2i64_zero
4109   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], [],
4110               "llvm.nvvm.sust.b.1d.v2i64.zero">,
4111     ClangBuiltin<"__nvvm_sust_b_1d_v2i64_zero">;
4112 def int_nvvm_sust_b_1d_v4i8_zero
4113   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4114                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4115               "llvm.nvvm.sust.b.1d.v4i8.zero">,
4116     ClangBuiltin<"__nvvm_sust_b_1d_v4i8_zero">;
4117 def int_nvvm_sust_b_1d_v4i16_zero
4118   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4119                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4120               "llvm.nvvm.sust.b.1d.v4i16.zero">,
4121     ClangBuiltin<"__nvvm_sust_b_1d_v4i16_zero">;
4122 def int_nvvm_sust_b_1d_v4i32_zero
4123   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4124                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4125               "llvm.nvvm.sust.b.1d.v4i32.zero">,
4126     ClangBuiltin<"__nvvm_sust_b_1d_v4i32_zero">;
4127 
4128 
4129 def int_nvvm_sust_b_1d_array_i8_zero
4130   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4131               "llvm.nvvm.sust.b.1d.array.i8.zero">,
4132     ClangBuiltin<"__nvvm_sust_b_1d_array_i8_zero">;
4133 def int_nvvm_sust_b_1d_array_i16_zero
4134   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4135               "llvm.nvvm.sust.b.1d.array.i16.zero">,
4136     ClangBuiltin<"__nvvm_sust_b_1d_array_i16_zero">;
4137 def int_nvvm_sust_b_1d_array_i32_zero
4138   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4139               "llvm.nvvm.sust.b.1d.array.i32.zero">,
4140     ClangBuiltin<"__nvvm_sust_b_1d_array_i32_zero">;
4141 def int_nvvm_sust_b_1d_array_i64_zero
4142   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4143               "llvm.nvvm.sust.b.1d.array.i64.zero">,
4144     ClangBuiltin<"__nvvm_sust_b_1d_array_i64_zero">;
4145 def int_nvvm_sust_b_1d_array_v2i8_zero
4146   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4147                    llvm_i16_ty, llvm_i16_ty], [],
4148               "llvm.nvvm.sust.b.1d.array.v2i8.zero">,
4149     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i8_zero">;
4150 def int_nvvm_sust_b_1d_array_v2i16_zero
4151   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4152                    llvm_i16_ty, llvm_i16_ty], [],
4153               "llvm.nvvm.sust.b.1d.array.v2i16.zero">,
4154     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i16_zero">;
4155 def int_nvvm_sust_b_1d_array_v2i32_zero
4156   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4157                    llvm_i32_ty, llvm_i32_ty], [],
4158               "llvm.nvvm.sust.b.1d.array.v2i32.zero">,
4159     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i32_zero">;
4160 def int_nvvm_sust_b_1d_array_v2i64_zero
4161   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4162                    llvm_i64_ty, llvm_i64_ty], [],
4163               "llvm.nvvm.sust.b.1d.array.v2i64.zero">,
4164     ClangBuiltin<"__nvvm_sust_b_1d_array_v2i64_zero">;
4165 def int_nvvm_sust_b_1d_array_v4i8_zero
4166   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4167                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4168               "llvm.nvvm.sust.b.1d.array.v4i8.zero">,
4169     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i8_zero">;
4170 def int_nvvm_sust_b_1d_array_v4i16_zero
4171   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4172                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4173               "llvm.nvvm.sust.b.1d.array.v4i16.zero">,
4174     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i16_zero">;
4175 def int_nvvm_sust_b_1d_array_v4i32_zero
4176   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4177                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4178               "llvm.nvvm.sust.b.1d.array.v4i32.zero">,
4179     ClangBuiltin<"__nvvm_sust_b_1d_array_v4i32_zero">;
4180 
4181 
4182 def int_nvvm_sust_b_2d_i8_zero
4183   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4184               "llvm.nvvm.sust.b.2d.i8.zero">,
4185     ClangBuiltin<"__nvvm_sust_b_2d_i8_zero">;
4186 def int_nvvm_sust_b_2d_i16_zero
4187   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4188               "llvm.nvvm.sust.b.2d.i16.zero">,
4189     ClangBuiltin<"__nvvm_sust_b_2d_i16_zero">;
4190 def int_nvvm_sust_b_2d_i32_zero
4191   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4192               "llvm.nvvm.sust.b.2d.i32.zero">,
4193     ClangBuiltin<"__nvvm_sust_b_2d_i32_zero">;
4194 def int_nvvm_sust_b_2d_i64_zero
4195   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], [],
4196               "llvm.nvvm.sust.b.2d.i64.zero">,
4197     ClangBuiltin<"__nvvm_sust_b_2d_i64_zero">;
4198 def int_nvvm_sust_b_2d_v2i8_zero
4199   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4200                    llvm_i16_ty, llvm_i16_ty], [],
4201               "llvm.nvvm.sust.b.2d.v2i8.zero">,
4202     ClangBuiltin<"__nvvm_sust_b_2d_v2i8_zero">;
4203 def int_nvvm_sust_b_2d_v2i16_zero
4204   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4205                    llvm_i16_ty, llvm_i16_ty], [],
4206               "llvm.nvvm.sust.b.2d.v2i16.zero">,
4207     ClangBuiltin<"__nvvm_sust_b_2d_v2i16_zero">;
4208 def int_nvvm_sust_b_2d_v2i32_zero
4209   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4210                    llvm_i32_ty, llvm_i32_ty], [],
4211               "llvm.nvvm.sust.b.2d.v2i32.zero">,
4212     ClangBuiltin<"__nvvm_sust_b_2d_v2i32_zero">;
4213 def int_nvvm_sust_b_2d_v2i64_zero
4214   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4215                    llvm_i64_ty, llvm_i64_ty], [],
4216               "llvm.nvvm.sust.b.2d.v2i64.zero">,
4217     ClangBuiltin<"__nvvm_sust_b_2d_v2i64_zero">;
4218 def int_nvvm_sust_b_2d_v4i8_zero
4219   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4220                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4221               "llvm.nvvm.sust.b.2d.v4i8.zero">,
4222     ClangBuiltin<"__nvvm_sust_b_2d_v4i8_zero">;
4223 def int_nvvm_sust_b_2d_v4i16_zero
4224   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4225                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4226               "llvm.nvvm.sust.b.2d.v4i16.zero">,
4227     ClangBuiltin<"__nvvm_sust_b_2d_v4i16_zero">;
4228 def int_nvvm_sust_b_2d_v4i32_zero
4229   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4230                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4231               "llvm.nvvm.sust.b.2d.v4i32.zero">,
4232     ClangBuiltin<"__nvvm_sust_b_2d_v4i32_zero">;
4233 
4234 
4235 def int_nvvm_sust_b_2d_array_i8_zero
4236   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4237                    llvm_i32_ty, llvm_i16_ty], [],
4238               "llvm.nvvm.sust.b.2d.array.i8.zero">,
4239     ClangBuiltin<"__nvvm_sust_b_2d_array_i8_zero">;
4240 def int_nvvm_sust_b_2d_array_i16_zero
4241   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4242                    llvm_i32_ty, llvm_i16_ty], [],
4243               "llvm.nvvm.sust.b.2d.array.i16.zero">,
4244     ClangBuiltin<"__nvvm_sust_b_2d_array_i16_zero">;
4245 def int_nvvm_sust_b_2d_array_i32_zero
4246   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4247                    llvm_i32_ty, llvm_i32_ty], [],
4248               "llvm.nvvm.sust.b.2d.array.i32.zero">,
4249     ClangBuiltin<"__nvvm_sust_b_2d_array_i32_zero">;
4250 def int_nvvm_sust_b_2d_array_i64_zero
4251   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4252                    llvm_i32_ty, llvm_i64_ty], [],
4253               "llvm.nvvm.sust.b.2d.array.i64.zero">,
4254     ClangBuiltin<"__nvvm_sust_b_2d_array_i64_zero">;
4255 def int_nvvm_sust_b_2d_array_v2i8_zero
4256   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4257                    llvm_i16_ty, llvm_i16_ty], [],
4258               "llvm.nvvm.sust.b.2d.array.v2i8.zero">,
4259     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i8_zero">;
4260 def int_nvvm_sust_b_2d_array_v2i16_zero
4261   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4262                    llvm_i16_ty, llvm_i16_ty], [],
4263               "llvm.nvvm.sust.b.2d.array.v2i16.zero">,
4264     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i16_zero">;
4265 def int_nvvm_sust_b_2d_array_v2i32_zero
4266   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4267                    llvm_i32_ty, llvm_i32_ty], [],
4268               "llvm.nvvm.sust.b.2d.array.v2i32.zero">,
4269     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i32_zero">;
4270 def int_nvvm_sust_b_2d_array_v2i64_zero
4271   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4272                    llvm_i64_ty, llvm_i64_ty], [],
4273               "llvm.nvvm.sust.b.2d.array.v2i64.zero">,
4274     ClangBuiltin<"__nvvm_sust_b_2d_array_v2i64_zero">;
4275 def int_nvvm_sust_b_2d_array_v4i8_zero
4276   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4277                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4278               "llvm.nvvm.sust.b.2d.array.v4i8.zero">,
4279     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i8_zero">;
4280 def int_nvvm_sust_b_2d_array_v4i16_zero
4281   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4282                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4283               "llvm.nvvm.sust.b.2d.array.v4i16.zero">,
4284     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i16_zero">;
4285 def int_nvvm_sust_b_2d_array_v4i32_zero
4286   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4287                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4288               "llvm.nvvm.sust.b.2d.array.v4i32.zero">,
4289     ClangBuiltin<"__nvvm_sust_b_2d_array_v4i32_zero">;
4290 
4291 
4292 def int_nvvm_sust_b_3d_i8_zero
4293   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4294                    llvm_i32_ty, llvm_i16_ty], [],
4295               "llvm.nvvm.sust.b.3d.i8.zero">,
4296     ClangBuiltin<"__nvvm_sust_b_3d_i8_zero">;
4297 def int_nvvm_sust_b_3d_i16_zero
4298   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4299                    llvm_i32_ty, llvm_i16_ty], [],
4300               "llvm.nvvm.sust.b.3d.i16.zero">,
4301     ClangBuiltin<"__nvvm_sust_b_3d_i16_zero">;
4302 def int_nvvm_sust_b_3d_i32_zero
4303   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4304                    llvm_i32_ty, llvm_i32_ty], [],
4305               "llvm.nvvm.sust.b.3d.i32.zero">,
4306     ClangBuiltin<"__nvvm_sust_b_3d_i32_zero">;
4307 def int_nvvm_sust_b_3d_i64_zero
4308   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4309                    llvm_i32_ty, llvm_i64_ty], [],
4310               "llvm.nvvm.sust.b.3d.i64.zero">,
4311     ClangBuiltin<"__nvvm_sust_b_3d_i64_zero">;
4312 def int_nvvm_sust_b_3d_v2i8_zero
4313   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4314                    llvm_i16_ty, llvm_i16_ty], [],
4315               "llvm.nvvm.sust.b.3d.v2i8.zero">,
4316     ClangBuiltin<"__nvvm_sust_b_3d_v2i8_zero">;
4317 def int_nvvm_sust_b_3d_v2i16_zero
4318   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4319                    llvm_i16_ty, llvm_i16_ty], [],
4320               "llvm.nvvm.sust.b.3d.v2i16.zero">,
4321     ClangBuiltin<"__nvvm_sust_b_3d_v2i16_zero">;
4322 def int_nvvm_sust_b_3d_v2i32_zero
4323   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4324                    llvm_i32_ty, llvm_i32_ty], [],
4325               "llvm.nvvm.sust.b.3d.v2i32.zero">,
4326     ClangBuiltin<"__nvvm_sust_b_3d_v2i32_zero">;
4327 def int_nvvm_sust_b_3d_v2i64_zero
4328   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4329                    llvm_i64_ty, llvm_i64_ty], [],
4330               "llvm.nvvm.sust.b.3d.v2i64.zero">,
4331     ClangBuiltin<"__nvvm_sust_b_3d_v2i64_zero">;
4332 def int_nvvm_sust_b_3d_v4i8_zero
4333   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4334                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4335               "llvm.nvvm.sust.b.3d.v4i8.zero">,
4336     ClangBuiltin<"__nvvm_sust_b_3d_v4i8_zero">;
4337 def int_nvvm_sust_b_3d_v4i16_zero
4338   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4339                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4340               "llvm.nvvm.sust.b.3d.v4i16.zero">,
4341     ClangBuiltin<"__nvvm_sust_b_3d_v4i16_zero">;
4342 def int_nvvm_sust_b_3d_v4i32_zero
4343   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4344                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4345               "llvm.nvvm.sust.b.3d.v4i32.zero">,
4346     ClangBuiltin<"__nvvm_sust_b_3d_v4i32_zero">;
4347 
4348 
4349 
4350 // Formatted
4351 
4352 def int_nvvm_sust_p_1d_i8_trap
4353   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4354               "llvm.nvvm.sust.p.1d.i8.trap">,
4355     ClangBuiltin<"__nvvm_sust_p_1d_i8_trap">;
4356 def int_nvvm_sust_p_1d_i16_trap
4357   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], [],
4358               "llvm.nvvm.sust.p.1d.i16.trap">,
4359     ClangBuiltin<"__nvvm_sust_p_1d_i16_trap">;
4360 def int_nvvm_sust_p_1d_i32_trap
4361   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], [],
4362               "llvm.nvvm.sust.p.1d.i32.trap">,
4363     ClangBuiltin<"__nvvm_sust_p_1d_i32_trap">;
4364 def int_nvvm_sust_p_1d_v2i8_trap
4365   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4366               "llvm.nvvm.sust.p.1d.v2i8.trap">,
4367     ClangBuiltin<"__nvvm_sust_p_1d_v2i8_trap">;
4368 def int_nvvm_sust_p_1d_v2i16_trap
4369   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], [],
4370               "llvm.nvvm.sust.p.1d.v2i16.trap">,
4371     ClangBuiltin<"__nvvm_sust_p_1d_v2i16_trap">;
4372 def int_nvvm_sust_p_1d_v2i32_trap
4373   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4374               "llvm.nvvm.sust.p.1d.v2i32.trap">,
4375     ClangBuiltin<"__nvvm_sust_p_1d_v2i32_trap">;
4376 def int_nvvm_sust_p_1d_v4i8_trap
4377   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4378                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4379               "llvm.nvvm.sust.p.1d.v4i8.trap">,
4380     ClangBuiltin<"__nvvm_sust_p_1d_v4i8_trap">;
4381 def int_nvvm_sust_p_1d_v4i16_trap
4382   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty,
4383                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4384               "llvm.nvvm.sust.p.1d.v4i16.trap">,
4385     ClangBuiltin<"__nvvm_sust_p_1d_v4i16_trap">;
4386 def int_nvvm_sust_p_1d_v4i32_trap
4387   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4388                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4389               "llvm.nvvm.sust.p.1d.v4i32.trap">,
4390     ClangBuiltin<"__nvvm_sust_p_1d_v4i32_trap">;
4391 
4392 
4393 def int_nvvm_sust_p_1d_array_i8_trap
4394   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4395               "llvm.nvvm.sust.p.1d.array.i8.trap">,
4396     ClangBuiltin<"__nvvm_sust_p_1d_array_i8_trap">;
4397 def int_nvvm_sust_p_1d_array_i16_trap
4398   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4399               "llvm.nvvm.sust.p.1d.array.i16.trap">,
4400     ClangBuiltin<"__nvvm_sust_p_1d_array_i16_trap">;
4401 def int_nvvm_sust_p_1d_array_i32_trap
4402   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4403               "llvm.nvvm.sust.p.1d.array.i32.trap">,
4404     ClangBuiltin<"__nvvm_sust_p_1d_array_i32_trap">;
4405 def int_nvvm_sust_p_1d_array_v2i8_trap
4406   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4407                    llvm_i16_ty, llvm_i16_ty], [],
4408               "llvm.nvvm.sust.p.1d.array.v2i8.trap">,
4409     ClangBuiltin<"__nvvm_sust_p_1d_array_v2i8_trap">;
4410 def int_nvvm_sust_p_1d_array_v2i16_trap
4411   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4412                    llvm_i16_ty, llvm_i16_ty], [],
4413               "llvm.nvvm.sust.p.1d.array.v2i16.trap">,
4414     ClangBuiltin<"__nvvm_sust_p_1d_array_v2i16_trap">;
4415 def int_nvvm_sust_p_1d_array_v2i32_trap
4416   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4417                    llvm_i32_ty, llvm_i32_ty], [],
4418               "llvm.nvvm.sust.p.1d.array.v2i32.trap">,
4419     ClangBuiltin<"__nvvm_sust_p_1d_array_v2i32_trap">;
4420 def int_nvvm_sust_p_1d_array_v4i8_trap
4421   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4422                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4423               "llvm.nvvm.sust.p.1d.array.v4i8.trap">,
4424     ClangBuiltin<"__nvvm_sust_p_1d_array_v4i8_trap">;
4425 def int_nvvm_sust_p_1d_array_v4i16_trap
4426   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4427                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4428               "llvm.nvvm.sust.p.1d.array.v4i16.trap">,
4429     ClangBuiltin<"__nvvm_sust_p_1d_array_v4i16_trap">;
4430 def int_nvvm_sust_p_1d_array_v4i32_trap
4431   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4432                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4433               "llvm.nvvm.sust.p.1d.array.v4i32.trap">,
4434     ClangBuiltin<"__nvvm_sust_p_1d_array_v4i32_trap">;
4435 
4436 
4437 def int_nvvm_sust_p_2d_i8_trap
4438   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4439               "llvm.nvvm.sust.p.2d.i8.trap">,
4440     ClangBuiltin<"__nvvm_sust_p_2d_i8_trap">;
4441 def int_nvvm_sust_p_2d_i16_trap
4442   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], [],
4443               "llvm.nvvm.sust.p.2d.i16.trap">,
4444     ClangBuiltin<"__nvvm_sust_p_2d_i16_trap">;
4445 def int_nvvm_sust_p_2d_i32_trap
4446   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4447               "llvm.nvvm.sust.p.2d.i32.trap">,
4448     ClangBuiltin<"__nvvm_sust_p_2d_i32_trap">;
4449 def int_nvvm_sust_p_2d_v2i8_trap
4450   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4451                    llvm_i16_ty, llvm_i16_ty], [],
4452               "llvm.nvvm.sust.p.2d.v2i8.trap">,
4453     ClangBuiltin<"__nvvm_sust_p_2d_v2i8_trap">;
4454 def int_nvvm_sust_p_2d_v2i16_trap
4455   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4456                    llvm_i16_ty, llvm_i16_ty], [],
4457               "llvm.nvvm.sust.p.2d.v2i16.trap">,
4458     ClangBuiltin<"__nvvm_sust_p_2d_v2i16_trap">;
4459 def int_nvvm_sust_p_2d_v2i32_trap
4460   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4461                    llvm_i32_ty, llvm_i32_ty], [],
4462               "llvm.nvvm.sust.p.2d.v2i32.trap">,
4463     ClangBuiltin<"__nvvm_sust_p_2d_v2i32_trap">;
4464 def int_nvvm_sust_p_2d_v4i8_trap
4465   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4466                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4467               "llvm.nvvm.sust.p.2d.v4i8.trap">,
4468     ClangBuiltin<"__nvvm_sust_p_2d_v4i8_trap">;
4469 def int_nvvm_sust_p_2d_v4i16_trap
4470   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty,
4471                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4472               "llvm.nvvm.sust.p.2d.v4i16.trap">,
4473     ClangBuiltin<"__nvvm_sust_p_2d_v4i16_trap">;
4474 def int_nvvm_sust_p_2d_v4i32_trap
4475   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4476                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4477               "llvm.nvvm.sust.p.2d.v4i32.trap">,
4478     ClangBuiltin<"__nvvm_sust_p_2d_v4i32_trap">;
4479 
4480 
4481 def int_nvvm_sust_p_2d_array_i8_trap
4482   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4483                    llvm_i32_ty, llvm_i16_ty], [],
4484               "llvm.nvvm.sust.p.2d.array.i8.trap">,
4485     ClangBuiltin<"__nvvm_sust_p_2d_array_i8_trap">;
4486 def int_nvvm_sust_p_2d_array_i16_trap
4487   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4488                    llvm_i32_ty, llvm_i16_ty], [],
4489               "llvm.nvvm.sust.p.2d.array.i16.trap">,
4490     ClangBuiltin<"__nvvm_sust_p_2d_array_i16_trap">;
4491 def int_nvvm_sust_p_2d_array_i32_trap
4492   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4493                    llvm_i32_ty, llvm_i32_ty], [],
4494               "llvm.nvvm.sust.p.2d.array.i32.trap">,
4495     ClangBuiltin<"__nvvm_sust_p_2d_array_i32_trap">;
4496 def int_nvvm_sust_p_2d_array_v2i8_trap
4497   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4498                    llvm_i16_ty, llvm_i16_ty], [],
4499               "llvm.nvvm.sust.p.2d.array.v2i8.trap">,
4500     ClangBuiltin<"__nvvm_sust_p_2d_array_v2i8_trap">;
4501 def int_nvvm_sust_p_2d_array_v2i16_trap
4502   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4503                    llvm_i16_ty, llvm_i16_ty], [],
4504               "llvm.nvvm.sust.p.2d.array.v2i16.trap">,
4505     ClangBuiltin<"__nvvm_sust_p_2d_array_v2i16_trap">;
4506 def int_nvvm_sust_p_2d_array_v2i32_trap
4507   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4508                    llvm_i32_ty, llvm_i32_ty], [],
4509               "llvm.nvvm.sust.p.2d.array.v2i32.trap">,
4510     ClangBuiltin<"__nvvm_sust_p_2d_array_v2i32_trap">;
4511 def int_nvvm_sust_p_2d_array_v4i8_trap
4512   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4513                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4514               "llvm.nvvm.sust.p.2d.array.v4i8.trap">,
4515     ClangBuiltin<"__nvvm_sust_p_2d_array_v4i8_trap">;
4516 def int_nvvm_sust_p_2d_array_v4i16_trap
4517   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4518                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4519               "llvm.nvvm.sust.p.2d.array.v4i16.trap">,
4520     ClangBuiltin<"__nvvm_sust_p_2d_array_v4i16_trap">;
4521 def int_nvvm_sust_p_2d_array_v4i32_trap
4522   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4523                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4524               "llvm.nvvm.sust.p.2d.array.v4i32.trap">,
4525     ClangBuiltin<"__nvvm_sust_p_2d_array_v4i32_trap">;
4526 
4527 
4528 def int_nvvm_sust_p_3d_i8_trap
4529   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4530                    llvm_i32_ty, llvm_i16_ty], [],
4531               "llvm.nvvm.sust.p.3d.i8.trap">,
4532     ClangBuiltin<"__nvvm_sust_p_3d_i8_trap">;
4533 def int_nvvm_sust_p_3d_i16_trap
4534   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4535                    llvm_i32_ty, llvm_i16_ty], [],
4536               "llvm.nvvm.sust.p.3d.i16.trap">,
4537     ClangBuiltin<"__nvvm_sust_p_3d_i16_trap">;
4538 def int_nvvm_sust_p_3d_i32_trap
4539   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty,
4540                    llvm_i32_ty, llvm_i32_ty], [],
4541               "llvm.nvvm.sust.p.3d.i32.trap">,
4542     ClangBuiltin<"__nvvm_sust_p_3d_i32_trap">;
4543 def int_nvvm_sust_p_3d_v2i8_trap
4544   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4545                    llvm_i16_ty, llvm_i16_ty], [],
4546               "llvm.nvvm.sust.p.3d.v2i8.trap">,
4547     ClangBuiltin<"__nvvm_sust_p_3d_v2i8_trap">;
4548 def int_nvvm_sust_p_3d_v2i16_trap
4549   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4550                    llvm_i16_ty, llvm_i16_ty], [],
4551               "llvm.nvvm.sust.p.3d.v2i16.trap">,
4552     ClangBuiltin<"__nvvm_sust_p_3d_v2i16_trap">;
4553 def int_nvvm_sust_p_3d_v2i32_trap
4554   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4555                    llvm_i32_ty, llvm_i32_ty], [],
4556               "llvm.nvvm.sust.p.3d.v2i32.trap">,
4557     ClangBuiltin<"__nvvm_sust_p_3d_v2i32_trap">;
4558 def int_nvvm_sust_p_3d_v4i8_trap
4559   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4560                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4561               "llvm.nvvm.sust.p.3d.v4i8.trap">,
4562     ClangBuiltin<"__nvvm_sust_p_3d_v4i8_trap">;
4563 def int_nvvm_sust_p_3d_v4i16_trap
4564   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4565                    llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], [],
4566               "llvm.nvvm.sust.p.3d.v4i16.trap">,
4567     ClangBuiltin<"__nvvm_sust_p_3d_v4i16_trap">;
4568 def int_nvvm_sust_p_3d_v4i32_trap
4569   : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
4570                    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [],
4571               "llvm.nvvm.sust.p.3d.v4i32.trap">,
4572     ClangBuiltin<"__nvvm_sust_p_3d_v4i32_trap">;
4573 
4574 def int_nvvm_swap_lo_hi_b64
4575   : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
4576               [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
4577               ClangBuiltin<"__nvvm_swap_lo_hi_b64">;
4578 
4579 
4580 // Accessing special registers.
4581 
4582 class PTXReadSRegIntrinsicNB_r32
4583   : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
4584 class PTXReadSRegIntrinsic_r32<string name>
4585   : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4586 
4587 multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
4588 // FIXME: Do we need the 128-bit integer type version?
4589 //    def _r64   : Intrinsic<[llvm_i128_ty],   [], [IntrNoMem, IntrSpeculatable]>;
4590 
4591 // FIXME: Enable this once v4i32 support is enabled in back-end.
4592 //    def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
4593   foreach suffix = ["_x", "_y", "_z", "_w"] in
4594     def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
4595 }
4596 
4597 // Same, but without automatic clang builtins. It will be used for
4598 // registers that require particular GPU or PTX version.
4599 multiclass PTXReadSRegIntrinsicNB_v4i32 {
4600   foreach suffix = ["_x", "_y", "_z", "_w"] in
4601     def suffix : PTXReadSRegIntrinsicNB_r32;
4602 }
4603 
4604 class PTXReadSRegIntrinsic_r64<string name>
4605   : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
4606     ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4607 
4608 // Intrinsics to read registers with non-constant values. E.g. the values that
4609 // do change over the kernel lifetime. Such reads should not be CSE'd.
4610 class PTXReadNCSRegIntrinsic_r32<string name>
4611   : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4612     ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4613 class PTXReadNCSRegIntrinsic_r64<string name>
4614   : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
4615     ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
4616 
4617 defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
4618 defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
4619 
4620 def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
4621 def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
4622 def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
4623 
4624 defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
4625 defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
4626 
4627 def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
4628 def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
4629 def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
4630 
4631 def int_nvvm_read_ptx_sreg_lanemask_eq :
4632     PTXReadSRegIntrinsic_r32<"lanemask_eq">;
4633 def int_nvvm_read_ptx_sreg_lanemask_le :
4634     PTXReadSRegIntrinsic_r32<"lanemask_le">;
4635 def int_nvvm_read_ptx_sreg_lanemask_lt :
4636     PTXReadSRegIntrinsic_r32<"lanemask_lt">;
4637 def int_nvvm_read_ptx_sreg_lanemask_ge :
4638     PTXReadSRegIntrinsic_r32<"lanemask_ge">;
4639 def int_nvvm_read_ptx_sreg_lanemask_gt :
4640     PTXReadSRegIntrinsic_r32<"lanemask_gt">;
4641 
4642 def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
4643 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
4644 
4645 def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
4646 
4647 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
4648 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
4649 def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
4650 def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
4651 
4652 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
4653 
4654 // sm90+, PTX7.8+
4655 defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
4656 defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
4657 defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
4658 defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
4659 
4660 def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
4661 def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
4662 
4663 //
4664 // SHUFFLE
4665 //
4666 // Generate intrinsics for all variants of shfl instruction.
4667 foreach sync = [false, true] in {
4668   foreach mode = ["up", "down", "bfly", "idx"] in {
4669     foreach type = ["i32", "f32"] in {
4670       foreach return_pred = [false, true] in {
4671         foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in {
4672           if i.withGccBuiltin then {
4673             def i.Name : ClangBuiltin<i.Builtin>,
4674                          Intrinsic<i.RetTy, i.ArgsTy,
4675                                    [IntrInaccessibleMemOnly, IntrConvergent,
4676                                    IntrNoCallback],
4677                                    i.IntrName>;
4678           }
4679           if i.withoutGccBuiltin then {
4680             def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
4681                          [IntrInaccessibleMemOnly, IntrConvergent,
4682                          IntrNoCallback], i.IntrName>;
4683           }
4684         }
4685       }
4686     }
4687   }
4688 }
4689 
4690 //
4691 // VOTE
4692 //
4693 
4694 // vote.all pred
4695 def int_nvvm_vote_all :
4696   Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4697             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">,
4698   ClangBuiltin<"__nvvm_vote_all">;
4699 // vote.any pred
4700 def int_nvvm_vote_any :
4701   Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4702             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">,
4703   ClangBuiltin<"__nvvm_vote_any">;
4704 // vote.uni pred
4705 def int_nvvm_vote_uni :
4706   Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
4707             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">,
4708   ClangBuiltin<"__nvvm_vote_uni">;
4709 // vote.ballot pred
4710 def int_nvvm_vote_ballot :
4711   Intrinsic<[llvm_i32_ty], [llvm_i1_ty],
4712             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">,
4713   ClangBuiltin<"__nvvm_vote_ballot">;
4714 
4715 //
4716 // VOTE.SYNC
4717 //
4718 
4719 // vote.sync.all mask, pred
4720 def int_nvvm_vote_all_sync :
4721   Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4722             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">,
4723   ClangBuiltin<"__nvvm_vote_all_sync">;
4724 // vote.sync.any mask, pred
4725 def int_nvvm_vote_any_sync :
4726   Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4727             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">,
4728   ClangBuiltin<"__nvvm_vote_any_sync">;
4729 // vote.sync.uni mask, pred
4730 def int_nvvm_vote_uni_sync :
4731   Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
4732             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">,
4733   ClangBuiltin<"__nvvm_vote_uni_sync">;
4734 // vote.sync.ballot mask, pred
4735 def int_nvvm_vote_ballot_sync :
4736   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty],
4737             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">,
4738   ClangBuiltin<"__nvvm_vote_ballot_sync">;
4739 
4740 //
4741 // ACTIVEMASK
4742 //
4743 def int_nvvm_activemask :
4744   Intrinsic<[llvm_i32_ty], [],
4745             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback, IntrHasSideEffects], "llvm.nvvm.activemask">,
4746   ClangBuiltin<"__nvvm_activemask">;
4747 
4748 //
4749 // MATCH.SYNC
4750 //
4751 // match.any.sync.b32 mask, value
4752 def int_nvvm_match_any_sync_i32 :
4753   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4754             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">,
4755   ClangBuiltin<"__nvvm_match_any_sync_i32">;
4756 // match.any.sync.b64 mask, value
4757 def int_nvvm_match_any_sync_i64 :
4758   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
4759             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">,
4760   ClangBuiltin<"__nvvm_match_any_sync_i64">;
4761 
4762 // match.all instruction have two variants -- one returns a single value, another
4763 // returns a pair {value, predicate}. We currently only implement the latter as
4764 // that's the variant exposed by CUDA API.
4765 
4766 // match.all.sync.b32p mask, value
4767 def int_nvvm_match_all_sync_i32p :
4768   Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
4769             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">;
4770 // match.all.sync.b64p mask, value
4771 def int_nvvm_match_all_sync_i64p :
4772   Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
4773             [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">;
4774 
4775 //
4776 // ELECT.SYNC
4777 //
4778 // elect.sync dst|pred, membermask
4779 def int_nvvm_elect_sync :
4780   DefaultAttrsIntrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty],
4781                         [IntrInaccessibleMemOnly, IntrConvergent]>;
4782 
4783 //
4784 // REDUX.SYNC
4785 //
4786 // redux.sync.min.u32 dst, src, membermask;
4787 def int_nvvm_redux_sync_umin : ClangBuiltin<"__nvvm_redux_sync_umin">,
4788   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4789             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4790 
4791 // redux.sync.max.u32 dst, src, membermask;
4792 def int_nvvm_redux_sync_umax : ClangBuiltin<"__nvvm_redux_sync_umax">,
4793   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4794             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4795 
4796 // redux.sync.add.s32 dst, src, membermask;
4797 def int_nvvm_redux_sync_add : ClangBuiltin<"__nvvm_redux_sync_add">,
4798   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4799             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4800 
4801 // redux.sync.min.s32 dst, src, membermask;
4802 def int_nvvm_redux_sync_min : ClangBuiltin<"__nvvm_redux_sync_min">,
4803   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4804             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4805 
4806 // redux.sync.max.s32 dst, src, membermask;
4807 def int_nvvm_redux_sync_max : ClangBuiltin<"__nvvm_redux_sync_max">,
4808   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4809             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4810 
4811 // redux.sync.and.b32 dst, src, membermask;
4812 def int_nvvm_redux_sync_and : ClangBuiltin<"__nvvm_redux_sync_and">,
4813   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4814             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4815 
4816 // redux.sync.xor.b32 dst, src, membermask;
4817 def int_nvvm_redux_sync_xor : ClangBuiltin<"__nvvm_redux_sync_xor">,
4818   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4819             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4820 
4821 // redux.sync.or.b32 dst, src, membermask;
4822 def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
4823   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
4824             [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
4825 
4826 //
4827 // WGMMA fence instructions
4828 //
4829 // wgmma.fence.sync.aligned;
4830 def int_nvvm_wgmma_fence_sync_aligned 
4831   : Intrinsic<[], [], [IntrConvergent]>;
4832 
4833 // wgmma.commit_group.sync.aligned;
4834 def int_nvvm_wgmma_commit_group_sync_aligned
4835   : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">;
4836 
4837 // wgmma.wait_group.sync.aligned N;
4838 def int_nvvm_wgmma_wait_group_sync_aligned
4839   : Intrinsic<[], [llvm_i64_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">;
4840 
4841 //
4842 // WMMA instructions
4843 //
4844 // WMMA.LOAD
4845 class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
4846   : Intrinsic<Frag.regs,
4847               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
4848               [IntrWillReturn, IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4849               WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
4850 
4851 // WMMA.STORE.D
4852 class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
4853   : Intrinsic<[],
4854               !listconcat(
4855                 [llvm_anyptr_ty],
4856                 Frag.regs,
4857                 !if(WithStride, [llvm_i32_ty], [])),
4858               [IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
4859               WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
4860 
4861 // Create all load/store variants
4862 foreach layout = ["row", "col"] in {
4863   foreach stride = [0, 1] in {
4864     foreach frag = NVVM_MMA_OPS.all_ld_ops in
4865       if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4866         def WMMA_NAME_LDST<"load", frag, layout, stride>.record
4867              : NVVM_WMMA_LD<frag, layout, stride>;
4868     foreach frag = NVVM_MMA_OPS.all_st_ops in
4869       if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
4870         def WMMA_NAME_LDST<"store", frag, layout, stride>.record
4871              : NVVM_WMMA_ST<frag, layout, stride>;
4872   }
4873 }
4874 
4875 // WMMA.MMA
4876 class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite, string rnd, string b1op,
4877                     WMMA_REGS A, WMMA_REGS B,
4878                     WMMA_REGS C, WMMA_REGS D>
4879   : Intrinsic<D.regs,
4880               !listconcat(A.regs, B.regs, C.regs),
4881               [IntrNoMem, IntrNoCallback],
4882               WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, A, B, C, D>.llvm>;
4883 
4884 foreach layout_a = ["row", "col"] in {
4885   foreach layout_b = ["row", "col"] in {
4886     foreach satf = [0, 1] in {
4887       foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
4888         foreach op = NVVM_MMA_OPS.all_wmma_ops in {
4889           foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4890             if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
4891               def WMMA_NAME<layout_a, layout_b, satf, rnd, b1op,
4892                                 op[0], op[1], op[2], op[3]>.record
4893                 : NVVM_WMMA_MMA<layout_a, layout_b, satf, rnd, b1op,
4894                                 op[0], op[1], op[2], op[3]>;
4895             }
4896           } // b1op
4897         } // op
4898       } // rnd
4899     } // satf
4900   } // layout_b
4901 } // layout_a
4902 
4903 // MMA
4904 class NVVM_MMA<string ALayout, string BLayout, int Satfinite, string b1op,
4905                WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
4906   : Intrinsic<D.regs,
4907               !listconcat(A.regs, B.regs, C.regs),
4908               [IntrNoMem, IntrNoCallback],
4909               MMA_NAME<ALayout, BLayout, Satfinite, b1op, A, B, C, D>.llvm>;
4910 
4911 foreach layout_a = ["row", "col"] in {
4912   foreach layout_b = ["row", "col"] in {
4913     foreach satf = [0, 1] in {
4914       foreach op = NVVM_MMA_OPS.all_mma_ops in {
4915         foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
4916           if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
4917             def MMA_NAME<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>.record
4918               : NVVM_MMA<layout_a, layout_b, satf, b1op, op[0], op[1], op[2], op[3]>;
4919           }
4920         } // b1op
4921       } // op
4922     } // satf
4923   } // layout_b
4924 } // layout_a
4925 
4926 // LDMATRIX
4927 class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
4928   : Intrinsic<Frag.regs, [llvm_anyptr_ty],
4929               [IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>,
4930                NoCapture<ArgIndex<0>>],
4931               LDMATRIX_NAME<Frag, Transposed>.intr>;
4932 
4933 foreach transposed = [0, 1] in {
4934   foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
4935     if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
4936       def LDMATRIX_NAME<frag, transposed>.record
4937         : NVVM_LDMATRIX<frag, transposed>;
4938     }
4939   }
4940 }
4941 
4942 def int_nvvm_mapa
4943   : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
4944               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4945               "llvm.nvvm.mapa">;
4946 def int_nvvm_mapa_shared_cluster
4947   : DefaultAttrsIntrinsic<[llvm_shared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
4948               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4949               "llvm.nvvm.mapa.shared.cluster">;
4950 def int_nvvm_getctarank
4951   : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
4952               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4953               "llvm.nvvm.getctarank">;
4954 def int_nvvm_getctarank_shared_cluster
4955   : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty],
4956               [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
4957               "llvm.nvvm.getctarank.shared.cluster">;
4958 def int_nvvm_is_explicit_cluster
4959   : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
4960               [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
4961               "llvm.nvvm.is_explicit_cluster">;
4962 
4963 // Setmaxnreg inc/dec intrinsics
4964 def int_nvvm_setmaxnreg_inc_sync_aligned_u32
4965   : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4966               [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4967               "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">;
4968 def int_nvvm_setmaxnreg_dec_sync_aligned_u32
4969   : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
4970               [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
4971               "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">;
4972 
4973 // Exit
4974 def int_nvvm_exit : ClangBuiltin<"__nvvm_exit">,
4975     Intrinsic<[], [], [IntrConvergent, IntrInaccessibleMemOnly, IntrNoReturn]>;
4976 
4977 // Intrinsics for Tensor Copy using TMA
4978 // G2S -> From Global to Shared memory variants
4979 // S2G -> From Shared to Global memory variants
4980 foreach dim = [1, 2, 3, 4, 5] in {
4981   foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4982     foreach g2s = [CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, mode>] in
4983       def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>;
4984     foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, mode>] in
4985       def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>;
4986     foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>] in
4987       def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>;
4988   }
4989 }
4990 
4991 // Intrinsics for TMA Copy with reduction
4992 foreach dim = [1, 2, 3, 4, 5] in {
4993   foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in {
4994     foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in {
4995       foreach reduce = [CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, mode, red_op>] in
4996         def reduce.Name : DefaultAttrsIntrinsic<[], reduce.ArgsTy, reduce.IntrProp>;
4997     }
4998   }
4999 }
5000 
5001 // Intrinsics for Bulk Copy using TMA (non-tensor)
5002 // From Global to Shared Cluster
5003 def int_nvvm_cp_async_bulk_global_to_shared_cluster
5004   : DefaultAttrsIntrinsic<[],
5005       [llvm_shared_ptr_ty, // dst_smem_ptr
5006        llvm_shared_ptr_ty, // mbarrier_ptr
5007        llvm_global_ptr_ty, // src_gmem_ptr
5008        llvm_i32_ty,        // copy_size
5009        llvm_i16_ty,        // cta_mask
5010        llvm_i64_ty,        // cache_hint
5011        llvm_i1_ty,         // Flag for cta_mask
5012        llvm_i1_ty],        // Flag for cache_hint
5013       [IntrConvergent, IntrArgMemOnly,
5014        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5015        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5016        NoCapture<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
5017        ImmArg<ArgIndex<7>>]>;
5018 
5019 // From Shared CTA to Shared Cluster
5020 def int_nvvm_cp_async_bulk_shared_cta_to_cluster
5021   : DefaultAttrsIntrinsic<[],
5022       [llvm_shared_ptr_ty, // dst_smem_ptr
5023        llvm_shared_ptr_ty, // mbarrier_ptr
5024        llvm_shared_ptr_ty, // src_smem_ptr
5025        llvm_i32_ty],       // copy_size
5026       [IntrConvergent, IntrArgMemOnly,
5027        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<2>>,
5028        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5029        NoCapture<ArgIndex<2>>]>;
5030 
5031 // From Shared CTA to Global memory
5032 def int_nvvm_cp_async_bulk_shared_cta_to_global
5033   : DefaultAttrsIntrinsic<[],
5034       [llvm_global_ptr_ty, // dst_gmem_ptr
5035        llvm_shared_ptr_ty, // src_smem_ptr
5036        llvm_i32_ty,        // copy_size
5037        llvm_i64_ty,        // cache_hint
5038        llvm_i1_ty],        // Flag for cache_hint
5039       [IntrConvergent, IntrArgMemOnly,
5040        WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>,
5041        NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
5042        ImmArg<ArgIndex<4>>]>;
5043 
5044 // Intrinsics for Bulk Copy Prefetch L2
5045 def int_nvvm_cp_async_bulk_prefetch_L2
5046   : DefaultAttrsIntrinsic<[],
5047       [llvm_global_ptr_ty, // src_gmem_ptr
5048        llvm_i32_ty,        // copy_size
5049        llvm_i64_ty,        // cache_hint
5050        llvm_i1_ty],        // Flag for cache_hint
5051       [IntrConvergent, IntrArgMemOnly,
5052        NoCapture<ArgIndex<0>>, ReadOnly<ArgIndex<0>>,
5053        ImmArg<ArgIndex<3>>]>;
5054 
5055 def int_nvvm_griddepcontrol_launch_dependents: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5056 def int_nvvm_griddepcontrol_wait: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
5057 
5058 } // let TargetPrefix = "nvvm"