Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:27

0001 //===--- AMDHSAKernelDescriptor.h -----------------------------*- C++ -*---===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 /// \file
0010 /// AMDHSA kernel descriptor definitions. For more information, visit
0011 /// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
0012 ///
0013 /// \warning
0014 /// Any changes to this file should also be audited for corresponding changes
0015 /// needed in both the assembler and disassembler, namely:
0016 /// * AMDGPUAsmPrinter.{cpp,h}
0017 /// * AMDGPUTargetStreamer.{cpp,h}
0018 /// * AMDGPUDisassembler.{cpp,h}
0019 //
0020 //===----------------------------------------------------------------------===//
0021 
0022 #ifndef LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
0023 #define LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H
0024 
0025 #include <cstddef>
0026 #include <cstdint>
0027 
0028 // Gets offset of specified member in specified type.
0029 #ifndef offsetof
0030 #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->MEMBER)
0031 #endif // offsetof
0032 
0033 // Creates enumeration entries used for packing bits into integers. Enumeration
0034 // entries include bit shift amount, bit width, and bit mask.
0035 #ifndef AMDHSA_BITS_ENUM_ENTRY
0036 #define AMDHSA_BITS_ENUM_ENTRY(NAME, SHIFT, WIDTH) \
0037   NAME ## _SHIFT = (SHIFT),                        \
0038   NAME ## _WIDTH = (WIDTH),                        \
0039   NAME = (((1 << (WIDTH)) - 1) << (SHIFT))
0040 #endif // AMDHSA_BITS_ENUM_ENTRY
0041 
0042 // Gets bits for specified bit mask from specified source.
0043 #ifndef AMDHSA_BITS_GET
0044 #define AMDHSA_BITS_GET(SRC, MSK) ((SRC & MSK) >> MSK ## _SHIFT)
0045 #endif // AMDHSA_BITS_GET
0046 
0047 // Sets bits for specified bit mask in specified destination.
0048 #ifndef AMDHSA_BITS_SET
0049 #define AMDHSA_BITS_SET(DST, MSK, VAL)                                         \
0050   do {                                                                         \
0051     auto local = VAL;                                                          \
0052     DST &= ~MSK;                                                               \
0053     DST |= ((local << MSK##_SHIFT) & MSK);                                     \
0054   } while (0)
0055 #endif // AMDHSA_BITS_SET
0056 
0057 namespace llvm {
0058 namespace amdhsa {
0059 
0060 // Floating point rounding modes. Must match hardware definition.
0061 enum : uint8_t {
0062   FLOAT_ROUND_MODE_NEAR_EVEN = 0,
0063   FLOAT_ROUND_MODE_PLUS_INFINITY = 1,
0064   FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
0065   FLOAT_ROUND_MODE_ZERO = 3,
0066 };
0067 
0068 // Floating point denorm modes. Must match hardware definition.
0069 enum : uint8_t {
0070   FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
0071   FLOAT_DENORM_MODE_FLUSH_DST = 1,
0072   FLOAT_DENORM_MODE_FLUSH_SRC = 2,
0073   FLOAT_DENORM_MODE_FLUSH_NONE = 3,
0074 };
0075 
0076 // System VGPR workitem IDs. Must match hardware definition.
0077 enum : uint8_t {
0078   SYSTEM_VGPR_WORKITEM_ID_X = 0,
0079   SYSTEM_VGPR_WORKITEM_ID_X_Y = 1,
0080   SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2,
0081   SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
0082 };
0083 
0084 // Compute program resource register 1. Must match hardware definition.
0085 // GFX6+.
0086 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
0087   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
0088 // [GFX6-GFX8].
0089 #define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
0090   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
0091 // [GFX6-GFX9].
0092 #define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
0093   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
0094 // [GFX6-GFX11].
0095 #define COMPUTE_PGM_RSRC1_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
0096   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX11_##NAME, SHIFT, WIDTH)
0097 // GFX9+.
0098 #define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
0099   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
0100 // GFX10+.
0101 #define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
0102   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
0103 // GFX12+.
0104 #define COMPUTE_PGM_RSRC1_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
0105   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX12_PLUS_##NAME, SHIFT, WIDTH)
0106 enum : int32_t {
0107   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
0108   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
0109   COMPUTE_PGM_RSRC1(PRIORITY, 10, 2),
0110   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_32, 12, 2),
0111   COMPUTE_PGM_RSRC1(FLOAT_ROUND_MODE_16_64, 14, 2),
0112   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_32, 16, 2),
0113   COMPUTE_PGM_RSRC1(FLOAT_DENORM_MODE_16_64, 18, 2),
0114   COMPUTE_PGM_RSRC1(PRIV, 20, 1),
0115   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_DX10_CLAMP, 21, 1),
0116   COMPUTE_PGM_RSRC1_GFX12_PLUS(ENABLE_WG_RR_EN, 21, 1),
0117   COMPUTE_PGM_RSRC1(DEBUG_MODE, 22, 1),
0118   COMPUTE_PGM_RSRC1_GFX6_GFX11(ENABLE_IEEE_MODE, 23, 1),
0119   COMPUTE_PGM_RSRC1_GFX12_PLUS(DISABLE_PERF, 23, 1),
0120   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
0121   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
0122   COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
0123   COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
0124   COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
0125   COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
0126   COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
0127   COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
0128   COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
0129 };
0130 #undef COMPUTE_PGM_RSRC1
0131 
0132 // Compute program resource register 2. Must match hardware definition.
0133 // GFX6+.
0134 #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \
0135   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH)
0136 // [GFX6-GFX11].
0137 #define COMPUTE_PGM_RSRC2_GFX6_GFX11(NAME, SHIFT, WIDTH)                       \
0138   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX6_GFX11_##NAME, SHIFT, WIDTH)
0139 // GFX12+.
0140 #define COMPUTE_PGM_RSRC2_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
0141   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_GFX12_PLUS_##NAME, SHIFT, WIDTH)
0142 enum : int32_t {
0143   COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1),
0144   COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5),
0145   COMPUTE_PGM_RSRC2_GFX6_GFX11(ENABLE_TRAP_HANDLER, 6, 1),
0146   COMPUTE_PGM_RSRC2_GFX12_PLUS(RESERVED1, 6, 1),
0147   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
0148   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
0149   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
0150   COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
0151   COMPUTE_PGM_RSRC2(ENABLE_VGPR_WORKITEM_ID, 11, 2),
0152   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
0153   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_MEMORY, 14, 1),
0154   COMPUTE_PGM_RSRC2(GRANULATED_LDS_SIZE, 15, 9),
0155   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
0156   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
0157   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
0158   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
0159   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
0160   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
0161   COMPUTE_PGM_RSRC2(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
0162   COMPUTE_PGM_RSRC2(RESERVED0, 31, 1),
0163 };
0164 #undef COMPUTE_PGM_RSRC2
0165 
0166 // Compute program resource register 3 for GFX90A+. Must match hardware
0167 // definition.
0168 #define COMPUTE_PGM_RSRC3_GFX90A(NAME, SHIFT, WIDTH) \
0169   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX90A_ ## NAME, SHIFT, WIDTH)
0170 enum : int32_t {
0171   COMPUTE_PGM_RSRC3_GFX90A(ACCUM_OFFSET, 0, 6),
0172   COMPUTE_PGM_RSRC3_GFX90A(RESERVED0, 6, 10),
0173   COMPUTE_PGM_RSRC3_GFX90A(TG_SPLIT, 16, 1),
0174   COMPUTE_PGM_RSRC3_GFX90A(RESERVED1, 17, 15),
0175 };
0176 #undef COMPUTE_PGM_RSRC3_GFX90A
0177 
0178 // Compute program resource register 3 for GFX10+. Must match hardware
0179 // definition.
0180 // GFX10+.
0181 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
0182   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
0183 // [GFX10].
0184 #define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH)                            \
0185   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_##NAME, SHIFT, WIDTH)
0186 // [GFX10-GFX11].
0187 #define COMPUTE_PGM_RSRC3_GFX10_GFX11(NAME, SHIFT, WIDTH)                      \
0188   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_GFX11_##NAME, SHIFT, WIDTH)
0189 // GFX11+.
0190 #define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
0191   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
0192 // [GFX11].
0193 #define COMPUTE_PGM_RSRC3_GFX11(NAME, SHIFT, WIDTH)                            \
0194   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_##NAME, SHIFT, WIDTH)
0195 // GFX12+.
0196 #define COMPUTE_PGM_RSRC3_GFX12_PLUS(NAME, SHIFT, WIDTH)                       \
0197   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX12_PLUS_##NAME, SHIFT, WIDTH)
0198 enum : int32_t {
0199   COMPUTE_PGM_RSRC3_GFX10_GFX11(SHARED_VGPR_COUNT, 0, 4),
0200   COMPUTE_PGM_RSRC3_GFX12_PLUS(RESERVED0, 0, 4),
0201   COMPUTE_PGM_RSRC3_GFX10(RESERVED1, 4, 8),
0202   COMPUTE_PGM_RSRC3_GFX11(INST_PREF_SIZE, 4, 6),
0203   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_START, 10, 1),
0204   COMPUTE_PGM_RSRC3_GFX11(TRAP_ON_END, 11, 1),
0205   COMPUTE_PGM_RSRC3_GFX12_PLUS(INST_PREF_SIZE, 4, 8),
0206   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED2, 12, 1),
0207   COMPUTE_PGM_RSRC3_GFX10_GFX11(RESERVED3, 13, 1),
0208   COMPUTE_PGM_RSRC3_GFX12_PLUS(GLG_EN, 13, 1),
0209   COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED4, 14, 17),
0210   COMPUTE_PGM_RSRC3_GFX10(RESERVED5, 31, 1),
0211   COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
0212 };
0213 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
0214 
0215 // Kernel code properties. Must be kept backwards compatible.
0216 #define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
0217   AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
0218 enum : int32_t {
0219   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 0, 1),
0220   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_PTR, 1, 1),
0221   KERNEL_CODE_PROPERTY(ENABLE_SGPR_QUEUE_PTR, 2, 1),
0222   KERNEL_CODE_PROPERTY(ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3, 1),
0223   KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
0224   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
0225   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
0226   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
0227   KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
0228   KERNEL_CODE_PROPERTY(USES_DYNAMIC_STACK, 11, 1),
0229   KERNEL_CODE_PROPERTY(RESERVED1, 12, 4),
0230 };
0231 #undef KERNEL_CODE_PROPERTY
0232 
0233 // Kernarg preload specification.
0234 #define KERNARG_PRELOAD_SPEC(NAME, SHIFT, WIDTH)                               \
0235   AMDHSA_BITS_ENUM_ENTRY(KERNARG_PRELOAD_SPEC_##NAME, SHIFT, WIDTH)
0236 enum : int32_t {
0237   KERNARG_PRELOAD_SPEC(LENGTH, 0, 7),
0238   KERNARG_PRELOAD_SPEC(OFFSET, 7, 9),
0239 };
0240 #undef KERNARG_PRELOAD_SPEC
0241 
0242 // Kernel descriptor. Must be kept backwards compatible.
0243 struct kernel_descriptor_t {
0244   uint32_t group_segment_fixed_size;
0245   uint32_t private_segment_fixed_size;
0246   uint32_t kernarg_size;
0247   uint8_t reserved0[4];
0248   int64_t kernel_code_entry_byte_offset;
0249   uint8_t reserved1[20];
0250   uint32_t compute_pgm_rsrc3; // GFX10+ and GFX90A+
0251   uint32_t compute_pgm_rsrc1;
0252   uint32_t compute_pgm_rsrc2;
0253   uint16_t kernel_code_properties;
0254   uint16_t kernarg_preload;
0255   uint8_t reserved3[4];
0256 };
0257 
0258 enum : uint32_t {
0259   GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0,
0260   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4,
0261   KERNARG_SIZE_OFFSET = 8,
0262   RESERVED0_OFFSET = 12,
0263   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16,
0264   RESERVED1_OFFSET = 24,
0265   COMPUTE_PGM_RSRC3_OFFSET = 44,
0266   COMPUTE_PGM_RSRC1_OFFSET = 48,
0267   COMPUTE_PGM_RSRC2_OFFSET = 52,
0268   KERNEL_CODE_PROPERTIES_OFFSET = 56,
0269   KERNARG_PRELOAD_OFFSET = 58,
0270   RESERVED3_OFFSET = 60
0271 };
0272 
0273 static_assert(
0274     sizeof(kernel_descriptor_t) == 64,
0275     "invalid size for kernel_descriptor_t");
0276 static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) ==
0277                   GROUP_SEGMENT_FIXED_SIZE_OFFSET,
0278               "invalid offset for group_segment_fixed_size");
0279 static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) ==
0280                   PRIVATE_SEGMENT_FIXED_SIZE_OFFSET,
0281               "invalid offset for private_segment_fixed_size");
0282 static_assert(offsetof(kernel_descriptor_t, kernarg_size) ==
0283                   KERNARG_SIZE_OFFSET,
0284               "invalid offset for kernarg_size");
0285 static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET,
0286               "invalid offset for reserved0");
0287 static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) ==
0288                   KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET,
0289               "invalid offset for kernel_code_entry_byte_offset");
0290 static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET,
0291               "invalid offset for reserved1");
0292 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) ==
0293                   COMPUTE_PGM_RSRC3_OFFSET,
0294               "invalid offset for compute_pgm_rsrc3");
0295 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) ==
0296                   COMPUTE_PGM_RSRC1_OFFSET,
0297               "invalid offset for compute_pgm_rsrc1");
0298 static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) ==
0299                   COMPUTE_PGM_RSRC2_OFFSET,
0300               "invalid offset for compute_pgm_rsrc2");
0301 static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) ==
0302                   KERNEL_CODE_PROPERTIES_OFFSET,
0303               "invalid offset for kernel_code_properties");
0304 static_assert(offsetof(kernel_descriptor_t, kernarg_preload) ==
0305                   KERNARG_PRELOAD_OFFSET,
0306               "invalid offset for kernarg_preload");
0307 static_assert(offsetof(kernel_descriptor_t, reserved3) == RESERVED3_OFFSET,
0308               "invalid offset for reserved3");
0309 
0310 } // end namespace amdhsa
0311 } // end namespace llvm
0312 
0313 #endif // LLVM_SUPPORT_AMDHSAKERNELDESCRIPTOR_H