Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 08:54:12

0001 /*
0002  * SPDX-PackageName: "covfie, a part of the ACTS project"
0003  * SPDX-FileCopyrightText: 2022 CERN
0004  * SPDX-License-Identifier: MPL-2.0
0005  */
0006 
0007 #pragma once
0008 
0009 #include <memory>
0010 #include <optional>
0011 
0012 #include <cuda_runtime.h>
0013 
0014 #include <covfie/core/backend/primitive/array.hpp>
0015 #include <covfie/core/backend/transformer/linear.hpp>
0016 #include <covfie/core/concepts.hpp>
0017 #include <covfie/core/parameter_pack.hpp>
0018 #include <covfie/core/utility/nd_map.hpp>
0019 #include <covfie/core/vector.hpp>
0020 #include <covfie/cuda/error_check.hpp>
0021 #include <covfie/cuda/utility/type_conversion.hpp>
0022 
0023 namespace covfie::backend {
0024 enum class cuda_texture_interpolation {
0025     LINEAR,
0026     NEAREST_NEIGHBOUR
0027 };
0028 
0029 template <
0030     concepts::vector_descriptor _input_vector_t,
0031     concepts::vector_descriptor _output_vector_t,
0032     cuda_texture_interpolation _interpolation_method =
0033         cuda_texture_interpolation::LINEAR>
0034 struct cuda_texture {
0035     static_assert(_input_vector_t::size == 2 || _input_vector_t::size == 3);
0036 
0037     using this_t =
0038         cuda_texture<_input_vector_t, _output_vector_t, _interpolation_method>;
0039 
0040     static constexpr bool is_initial = true;
0041 
0042     using contravariant_input_t =
0043         covfie::vector::array_vector_d<_input_vector_t>;
0044     using covariant_output_t = covfie::vector::array_vector_d<_output_vector_t>;
0045 
0046     using channel_t =
0047         typename utility::to_cuda_channel_t<_output_vector_t>::type;
0048 
0049     template <typename T>
0050     using linear_tc = linear<T, float>;
0051 
0052     using configuration_t = std::monostate;
0053 
0054     static constexpr uint32_t IO_MAGIC_HEADER = 0xAB110001;
0055 
0056     struct owning_data_t {
0057         using parent_t = this_t;
0058 
0059         owning_data_t() = default;
0060 
0061         owning_data_t(owning_data_t && o)
0062             : m_array(o.m_array)
0063             , m_tex(o.m_tex)
0064         {
0065             o.m_array = nullptr;
0066             o.m_tex = std::nullopt;
0067         }
0068 
0069         owning_data_t & operator=(owning_data_t && o)
0070         {
0071             if (m_tex.has_value()) {
0072                 cudaErrorCheck(cudaDestroyTextureObject(*m_tex));
0073                 m_tex.reset();
0074             }
0075 
0076             if (m_array != nullptr) {
0077                 cudaErrorCheck(cudaFreeArray(m_array));
0078             }
0079 
0080             m_tex = o.m_tex;
0081             m_array = o.m_array;
0082 
0083             o.m_tex = std::nullopt;
0084             o.m_array = nullptr;
0085 
0086             return *this;
0087         }
0088 
0089         owning_data_t & operator=(const owning_data_t & o)
0090         {
0091             cudaChannelFormatDesc channelDesc =
0092                 cudaCreateChannelDesc<channel_t>();
0093 
0094             cudaExtent extent;
0095 
0096             cudaErrorCheck(
0097                 cudaArrayGetInfo(nullptr, &extent, nullptr, o.m_array)
0098             );
0099 
0100             cudaErrorCheck(cudaMalloc3DArray(&m_array, &channelDesc, extent));
0101 
0102             if constexpr (_input_vector_t::size == 2) {
0103                 cudaErrorCheck(cudaMemcpy2DArrayToArray(
0104                     m_array,
0105                     0,
0106                     0,
0107                     o.m_array,
0108                     0,
0109                     0,
0110                     extent.width * sizeof(channel_t),
0111                     extent.height,
0112                     cudaMemcpyDeviceToDevice
0113                 ));
0114             } else if constexpr (_input_vector_t::size == 3) {
0115                 cudaMemcpy3DParms copyParams;
0116                 // cudaMemcpy3DParms copyParams = {0};
0117                 memset(&copyParams, 0, sizeof(cudaMemcpy3DParms));
0118                 copyParams.srcArray = o.m_array;
0119                 copyParams.dstArray = m_array;
0120                 copyParams.extent = extent;
0121                 copyParams.kind = cudaMemcpyDeviceToDevice;
0122                 cudaErrorCheck(cudaMemcpy3D(&copyParams));
0123             }
0124 
0125             cudaResourceDesc resDesc;
0126             memset(&resDesc, 0, sizeof(cudaResourceDesc));
0127             resDesc.resType = cudaResourceTypeArray;
0128             resDesc.res.array.array = m_array;
0129 
0130             cudaTextureDesc texDesc;
0131             memset(&texDesc, 0, sizeof(cudaTextureDesc));
0132 
0133             for (std::size_t i = 0; i < _input_vector_t::size; ++i) {
0134                 texDesc.addressMode[i] = cudaAddressModeClamp;
0135             }
0136 
0137             // TODO: Make configurable
0138             if (_interpolation_method == cuda_texture_interpolation::LINEAR) {
0139                 texDesc.filterMode = cudaFilterModeLinear;
0140             } else if (_interpolation_method == cuda_texture_interpolation::NEAREST_NEIGHBOUR)
0141             {
0142                 texDesc.filterMode = cudaFilterModePoint;
0143             }
0144             texDesc.readMode = cudaReadModeElementType;
0145 
0146             cudaErrorCheck(
0147                 cudaCreateTextureObject(&(*m_tex), &resDesc, &texDesc, nullptr)
0148             );
0149 
0150             return *this;
0151         }
0152 
0153         owning_data_t(const owning_data_t & o)
0154         {
0155             *this = o;
0156         }
0157 
0158         template <typename T>
0159         requires(
0160             std::unsigned_integral<
0161                 typename T::parent_t::contravariant_input_t::scalar_t> &&
0162             (T::parent_t::contravariant_input_t::dimensions ==
0163              contravariant_input_t::dimensions)
0164         ) owning_data_t(const T & o)
0165             : m_tex(cudaTextureObject_t{})
0166         {
0167             cudaChannelFormatDesc channelDesc =
0168                 cudaCreateChannelDesc<channel_t>();
0169 
0170             typename T::parent_t::non_owning_data_t no(o);
0171 
0172             typename T::parent_t::configuration_t srcSize =
0173                 o.get_configuration();
0174 
0175             cudaExtent extent = make_cudaExtent(
0176                 _input_vector_t::size >= 1 ? srcSize[0] : 0,
0177                 _input_vector_t::size >= 2 ? srcSize[1] : 0,
0178                 _input_vector_t::size >= 3 ? srcSize[2] : 0
0179             );
0180 
0181             cudaErrorCheck(cudaMalloc3DArray(&m_array, &channelDesc, extent));
0182 
0183             std::size_t stage_size = extent.width *
0184                                      std::max(1UL, extent.height) *
0185                                      std::max(1UL, extent.depth);
0186 
0187             std::unique_ptr<channel_t[]> stage =
0188                 std::make_unique<channel_t[]>(stage_size);
0189 
0190             utility::nd_map(
0191                 std::function<void(decltype(srcSize)
0192                 )>([&no, &stage, &srcSize, &stage_size](decltype(srcSize) i
0193                    ) -> void {
0194                     typename T::parent_t::covariant_output_t::vector_t v =
0195                         no.at(i);
0196 
0197                     std::size_t idx = 0;
0198 
0199                     for (std::size_t k = contravariant_input_t::dimensions - 1;
0200                          k <= contravariant_input_t::dimensions;
0201                          --k)
0202                     {
0203                         std::size_t tmp = i[k];
0204 
0205                         for (std::size_t l = k - 1; l < k; --l) {
0206                             tmp *= srcSize[l];
0207                         }
0208 
0209                         idx += tmp;
0210                     }
0211 
0212                     std::size_t idx2 = static_cast<std::size_t>(idx);
0213                     assert(idx2 < stage_size);
0214 
0215                     using stage_scalar_t =
0216                         typename covariant_output_t::scalar_t;
0217 
0218                     if constexpr (covariant_output_t::dimensions == 1) {
0219                         stage[idx2] = static_cast<stage_scalar_t>(v[0]);
0220                     } else if constexpr (covariant_output_t::dimensions == 2) {
0221                         stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0222                         stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0223                     } else if constexpr (covariant_output_t::dimensions == 3) {
0224                         stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0225                         stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0226                         stage[idx2].z = static_cast<stage_scalar_t>(v[2]);
0227                         stage[idx2].w = static_cast<stage_scalar_t>(0.f);
0228                     } else if constexpr (covariant_output_t::dimensions == 4) {
0229                         stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0230                         stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0231                         stage[idx2].z = static_cast<stage_scalar_t>(v[2]);
0232                         stage[idx2].w = static_cast<stage_scalar_t>(v[3]);
0233                     }
0234                 }),
0235                 srcSize
0236             );
0237 
0238             if constexpr (_input_vector_t::size == 2) {
0239                 cudaErrorCheck(cudaMemcpy2DToArray(
0240                     m_array,
0241                     0,
0242                     0,
0243                     stage.get(),
0244                     extent.width * sizeof(channel_t),
0245                     extent.width * sizeof(channel_t),
0246                     extent.height,
0247                     cudaMemcpyHostToDevice
0248                 ));
0249             } else if constexpr (_input_vector_t::size == 3) {
0250                 cudaMemcpy3DParms copyParams;
0251                 // cudaMemcpy3DParms copyParams = {0};
0252                 memset(&copyParams, 0, sizeof(cudaMemcpy3DParms));
0253                 copyParams.srcPtr = make_cudaPitchedPtr(
0254                     stage.get(),
0255                     extent.width * sizeof(channel_t),
0256                     extent.width,
0257                     extent.height
0258                 );
0259                 copyParams.dstArray = m_array;
0260                 copyParams.extent = extent;
0261                 copyParams.kind = cudaMemcpyHostToDevice;
0262                 cudaErrorCheck(cudaMemcpy3D(&copyParams));
0263             }
0264 
0265             cudaResourceDesc resDesc;
0266             memset(&resDesc, 0, sizeof(cudaResourceDesc));
0267             resDesc.resType = cudaResourceTypeArray;
0268             resDesc.res.array.array = m_array;
0269 
0270             cudaTextureDesc texDesc;
0271             memset(&texDesc, 0, sizeof(cudaTextureDesc));
0272 
0273             // TODO: Make configurable
0274             for (std::size_t i = 0; i < _input_vector_t::size; ++i) {
0275                 texDesc.addressMode[i] = cudaAddressModeClamp;
0276             }
0277 
0278             // TODO: Make configurable
0279             if (_interpolation_method == cuda_texture_interpolation::LINEAR) {
0280                 texDesc.filterMode = cudaFilterModeLinear;
0281             } else if (_interpolation_method == cuda_texture_interpolation::NEAREST_NEIGHBOUR)
0282             {
0283                 texDesc.filterMode = cudaFilterModePoint;
0284             }
0285             texDesc.readMode = cudaReadModeElementType;
0286 
0287             cudaErrorCheck(
0288                 cudaCreateTextureObject(&(*m_tex), &resDesc, &texDesc, nullptr)
0289             );
0290         }
0291 
0292         // NOTE: The stream is currently ignored.
0293         template <typename T>
0294         requires(
0295             std::unsigned_integral<
0296                 typename T::parent_t::contravariant_input_t::scalar_t> &&
0297             (T::parent_t::contravariant_input_t::dimensions ==
0298              contravariant_input_t::dimensions)
0299         ) owning_data_t(const T & o, const cudaStream_t &)
0300             : owning_data_t(o)
0301         {
0302         }
0303 
0304         template <typename T>
0305         owning_data_t(parameter_pack<T> && i)
0306             : owning_data_t(std::move(i.x))
0307         {
0308         }
0309 
0310         ~owning_data_t()
0311         {
0312             if (m_tex.has_value()) {
0313                 cudaErrorCheck(cudaDestroyTextureObject(*m_tex));
0314                 m_tex.reset();
0315             }
0316 
0317             if (m_array != nullptr) {
0318                 cudaErrorCheck(cudaFreeArray(m_array));
0319             }
0320         }
0321 
0322         configuration_t get_configuration() const
0323         {
0324             return {};
0325         }
0326 
0327         static owning_data_t read_binary(std::istream & fs)
0328         {
0329             throw std::invalid_argument("Cannot perform IO on texture memory.");
0330 
0331             return owning_data_t();
0332         }
0333 
0334         static void write_binary(std::ostream & fs, const owning_data_t & o)
0335         {
0336             throw std::invalid_argument("Cannot perform IO on texture memory.");
0337         }
0338 
0339         cudaArray_t m_array = nullptr;
0340         std::optional<cudaTextureObject_t> m_tex;
0341         std::optional<cudaStream_t> m_stream;
0342     };
0343 
0344     struct non_owning_data_t {
0345         using parent_t = this_t;
0346 
0347         non_owning_data_t(const owning_data_t & o)
0348             : m_tex(*o.m_tex)
0349         {
0350         }
0351 
0352         COVFIE_HOST_DEVICE typename covariant_output_t::vector_t
0353         at(typename contravariant_input_t::vector_t i) const
0354         {
0355             channel_t r;
0356 
0357             if constexpr (_input_vector_t::size == 1) {
0358                 r = tex1D<channel_t>(m_tex, i[0] + 0.5);
0359             } else if constexpr (_input_vector_t::size == 2) {
0360                 r = tex2D<channel_t>(m_tex, i[0] + 0.5, i[1] + 0.5);
0361             } else if constexpr (_input_vector_t::size == 3) {
0362                 r = tex3D<channel_t>(m_tex, i[0] + 0.5, i[1] + 0.5, i[2] + 0.5);
0363             }
0364 
0365             if constexpr (_output_vector_t::size == 1) {
0366                 return {r.x};
0367             } else if constexpr (_output_vector_t::size == 2) {
0368                 return {r.x, r.y};
0369             } else if constexpr (_output_vector_t::size == 3) {
0370                 return {r.x, r.y, r.z};
0371             } else if constexpr (_output_vector_t::size == 4) {
0372                 return {r.x, r.y, r.z, r.w};
0373             }
0374 
0375             return {};
0376         }
0377 
0378         cudaTextureObject_t m_tex;
0379     };
0380 };
0381 }