File indexing completed on 2025-09-17 08:54:12
0001
0002
0003
0004
0005
0006
0007 #pragma once
0008
0009 #include <memory>
0010 #include <optional>
0011
0012 #include <cuda_runtime.h>
0013
0014 #include <covfie/core/backend/primitive/array.hpp>
0015 #include <covfie/core/backend/transformer/linear.hpp>
0016 #include <covfie/core/concepts.hpp>
0017 #include <covfie/core/parameter_pack.hpp>
0018 #include <covfie/core/utility/nd_map.hpp>
0019 #include <covfie/core/vector.hpp>
0020 #include <covfie/cuda/error_check.hpp>
0021 #include <covfie/cuda/utility/type_conversion.hpp>
0022
0023 namespace covfie::backend {
0024 enum class cuda_texture_interpolation {
0025 LINEAR,
0026 NEAREST_NEIGHBOUR
0027 };
0028
0029 template <
0030 concepts::vector_descriptor _input_vector_t,
0031 concepts::vector_descriptor _output_vector_t,
0032 cuda_texture_interpolation _interpolation_method =
0033 cuda_texture_interpolation::LINEAR>
0034 struct cuda_texture {
0035 static_assert(_input_vector_t::size == 2 || _input_vector_t::size == 3);
0036
0037 using this_t =
0038 cuda_texture<_input_vector_t, _output_vector_t, _interpolation_method>;
0039
0040 static constexpr bool is_initial = true;
0041
0042 using contravariant_input_t =
0043 covfie::vector::array_vector_d<_input_vector_t>;
0044 using covariant_output_t = covfie::vector::array_vector_d<_output_vector_t>;
0045
0046 using channel_t =
0047 typename utility::to_cuda_channel_t<_output_vector_t>::type;
0048
0049 template <typename T>
0050 using linear_tc = linear<T, float>;
0051
0052 using configuration_t = std::monostate;
0053
0054 static constexpr uint32_t IO_MAGIC_HEADER = 0xAB110001;
0055
0056 struct owning_data_t {
0057 using parent_t = this_t;
0058
0059 owning_data_t() = default;
0060
0061 owning_data_t(owning_data_t && o)
0062 : m_array(o.m_array)
0063 , m_tex(o.m_tex)
0064 {
0065 o.m_array = nullptr;
0066 o.m_tex = std::nullopt;
0067 }
0068
0069 owning_data_t & operator=(owning_data_t && o)
0070 {
0071 if (m_tex.has_value()) {
0072 cudaErrorCheck(cudaDestroyTextureObject(*m_tex));
0073 m_tex.reset();
0074 }
0075
0076 if (m_array != nullptr) {
0077 cudaErrorCheck(cudaFreeArray(m_array));
0078 }
0079
0080 m_tex = o.m_tex;
0081 m_array = o.m_array;
0082
0083 o.m_tex = std::nullopt;
0084 o.m_array = nullptr;
0085
0086 return *this;
0087 }
0088
0089 owning_data_t & operator=(const owning_data_t & o)
0090 {
0091 cudaChannelFormatDesc channelDesc =
0092 cudaCreateChannelDesc<channel_t>();
0093
0094 cudaExtent extent;
0095
0096 cudaErrorCheck(
0097 cudaArrayGetInfo(nullptr, &extent, nullptr, o.m_array)
0098 );
0099
0100 cudaErrorCheck(cudaMalloc3DArray(&m_array, &channelDesc, extent));
0101
0102 if constexpr (_input_vector_t::size == 2) {
0103 cudaErrorCheck(cudaMemcpy2DArrayToArray(
0104 m_array,
0105 0,
0106 0,
0107 o.m_array,
0108 0,
0109 0,
0110 extent.width * sizeof(channel_t),
0111 extent.height,
0112 cudaMemcpyDeviceToDevice
0113 ));
0114 } else if constexpr (_input_vector_t::size == 3) {
0115 cudaMemcpy3DParms copyParams;
0116
0117 memset(©Params, 0, sizeof(cudaMemcpy3DParms));
0118 copyParams.srcArray = o.m_array;
0119 copyParams.dstArray = m_array;
0120 copyParams.extent = extent;
0121 copyParams.kind = cudaMemcpyDeviceToDevice;
0122 cudaErrorCheck(cudaMemcpy3D(©Params));
0123 }
0124
0125 cudaResourceDesc resDesc;
0126 memset(&resDesc, 0, sizeof(cudaResourceDesc));
0127 resDesc.resType = cudaResourceTypeArray;
0128 resDesc.res.array.array = m_array;
0129
0130 cudaTextureDesc texDesc;
0131 memset(&texDesc, 0, sizeof(cudaTextureDesc));
0132
0133 for (std::size_t i = 0; i < _input_vector_t::size; ++i) {
0134 texDesc.addressMode[i] = cudaAddressModeClamp;
0135 }
0136
0137
0138 if (_interpolation_method == cuda_texture_interpolation::LINEAR) {
0139 texDesc.filterMode = cudaFilterModeLinear;
0140 } else if (_interpolation_method == cuda_texture_interpolation::NEAREST_NEIGHBOUR)
0141 {
0142 texDesc.filterMode = cudaFilterModePoint;
0143 }
0144 texDesc.readMode = cudaReadModeElementType;
0145
0146 cudaErrorCheck(
0147 cudaCreateTextureObject(&(*m_tex), &resDesc, &texDesc, nullptr)
0148 );
0149
0150 return *this;
0151 }
0152
0153 owning_data_t(const owning_data_t & o)
0154 {
0155 *this = o;
0156 }
0157
0158 template <typename T>
0159 requires(
0160 std::unsigned_integral<
0161 typename T::parent_t::contravariant_input_t::scalar_t> &&
0162 (T::parent_t::contravariant_input_t::dimensions ==
0163 contravariant_input_t::dimensions)
0164 ) owning_data_t(const T & o)
0165 : m_tex(cudaTextureObject_t{})
0166 {
0167 cudaChannelFormatDesc channelDesc =
0168 cudaCreateChannelDesc<channel_t>();
0169
0170 typename T::parent_t::non_owning_data_t no(o);
0171
0172 typename T::parent_t::configuration_t srcSize =
0173 o.get_configuration();
0174
0175 cudaExtent extent = make_cudaExtent(
0176 _input_vector_t::size >= 1 ? srcSize[0] : 0,
0177 _input_vector_t::size >= 2 ? srcSize[1] : 0,
0178 _input_vector_t::size >= 3 ? srcSize[2] : 0
0179 );
0180
0181 cudaErrorCheck(cudaMalloc3DArray(&m_array, &channelDesc, extent));
0182
0183 std::size_t stage_size = extent.width *
0184 std::max(1UL, extent.height) *
0185 std::max(1UL, extent.depth);
0186
0187 std::unique_ptr<channel_t[]> stage =
0188 std::make_unique<channel_t[]>(stage_size);
0189
0190 utility::nd_map(
0191 std::function<void(decltype(srcSize)
0192 )>([&no, &stage, &srcSize, &stage_size](decltype(srcSize) i
0193 ) -> void {
0194 typename T::parent_t::covariant_output_t::vector_t v =
0195 no.at(i);
0196
0197 std::size_t idx = 0;
0198
0199 for (std::size_t k = contravariant_input_t::dimensions - 1;
0200 k <= contravariant_input_t::dimensions;
0201 --k)
0202 {
0203 std::size_t tmp = i[k];
0204
0205 for (std::size_t l = k - 1; l < k; --l) {
0206 tmp *= srcSize[l];
0207 }
0208
0209 idx += tmp;
0210 }
0211
0212 std::size_t idx2 = static_cast<std::size_t>(idx);
0213 assert(idx2 < stage_size);
0214
0215 using stage_scalar_t =
0216 typename covariant_output_t::scalar_t;
0217
0218 if constexpr (covariant_output_t::dimensions == 1) {
0219 stage[idx2] = static_cast<stage_scalar_t>(v[0]);
0220 } else if constexpr (covariant_output_t::dimensions == 2) {
0221 stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0222 stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0223 } else if constexpr (covariant_output_t::dimensions == 3) {
0224 stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0225 stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0226 stage[idx2].z = static_cast<stage_scalar_t>(v[2]);
0227 stage[idx2].w = static_cast<stage_scalar_t>(0.f);
0228 } else if constexpr (covariant_output_t::dimensions == 4) {
0229 stage[idx2].x = static_cast<stage_scalar_t>(v[0]);
0230 stage[idx2].y = static_cast<stage_scalar_t>(v[1]);
0231 stage[idx2].z = static_cast<stage_scalar_t>(v[2]);
0232 stage[idx2].w = static_cast<stage_scalar_t>(v[3]);
0233 }
0234 }),
0235 srcSize
0236 );
0237
0238 if constexpr (_input_vector_t::size == 2) {
0239 cudaErrorCheck(cudaMemcpy2DToArray(
0240 m_array,
0241 0,
0242 0,
0243 stage.get(),
0244 extent.width * sizeof(channel_t),
0245 extent.width * sizeof(channel_t),
0246 extent.height,
0247 cudaMemcpyHostToDevice
0248 ));
0249 } else if constexpr (_input_vector_t::size == 3) {
0250 cudaMemcpy3DParms copyParams;
0251
0252 memset(©Params, 0, sizeof(cudaMemcpy3DParms));
0253 copyParams.srcPtr = make_cudaPitchedPtr(
0254 stage.get(),
0255 extent.width * sizeof(channel_t),
0256 extent.width,
0257 extent.height
0258 );
0259 copyParams.dstArray = m_array;
0260 copyParams.extent = extent;
0261 copyParams.kind = cudaMemcpyHostToDevice;
0262 cudaErrorCheck(cudaMemcpy3D(©Params));
0263 }
0264
0265 cudaResourceDesc resDesc;
0266 memset(&resDesc, 0, sizeof(cudaResourceDesc));
0267 resDesc.resType = cudaResourceTypeArray;
0268 resDesc.res.array.array = m_array;
0269
0270 cudaTextureDesc texDesc;
0271 memset(&texDesc, 0, sizeof(cudaTextureDesc));
0272
0273
0274 for (std::size_t i = 0; i < _input_vector_t::size; ++i) {
0275 texDesc.addressMode[i] = cudaAddressModeClamp;
0276 }
0277
0278
0279 if (_interpolation_method == cuda_texture_interpolation::LINEAR) {
0280 texDesc.filterMode = cudaFilterModeLinear;
0281 } else if (_interpolation_method == cuda_texture_interpolation::NEAREST_NEIGHBOUR)
0282 {
0283 texDesc.filterMode = cudaFilterModePoint;
0284 }
0285 texDesc.readMode = cudaReadModeElementType;
0286
0287 cudaErrorCheck(
0288 cudaCreateTextureObject(&(*m_tex), &resDesc, &texDesc, nullptr)
0289 );
0290 }
0291
0292
0293 template <typename T>
0294 requires(
0295 std::unsigned_integral<
0296 typename T::parent_t::contravariant_input_t::scalar_t> &&
0297 (T::parent_t::contravariant_input_t::dimensions ==
0298 contravariant_input_t::dimensions)
0299 ) owning_data_t(const T & o, const cudaStream_t &)
0300 : owning_data_t(o)
0301 {
0302 }
0303
0304 template <typename T>
0305 owning_data_t(parameter_pack<T> && i)
0306 : owning_data_t(std::move(i.x))
0307 {
0308 }
0309
0310 ~owning_data_t()
0311 {
0312 if (m_tex.has_value()) {
0313 cudaErrorCheck(cudaDestroyTextureObject(*m_tex));
0314 m_tex.reset();
0315 }
0316
0317 if (m_array != nullptr) {
0318 cudaErrorCheck(cudaFreeArray(m_array));
0319 }
0320 }
0321
0322 configuration_t get_configuration() const
0323 {
0324 return {};
0325 }
0326
0327 static owning_data_t read_binary(std::istream & fs)
0328 {
0329 throw std::invalid_argument("Cannot perform IO on texture memory.");
0330
0331 return owning_data_t();
0332 }
0333
0334 static void write_binary(std::ostream & fs, const owning_data_t & o)
0335 {
0336 throw std::invalid_argument("Cannot perform IO on texture memory.");
0337 }
0338
0339 cudaArray_t m_array = nullptr;
0340 std::optional<cudaTextureObject_t> m_tex;
0341 std::optional<cudaStream_t> m_stream;
0342 };
0343
0344 struct non_owning_data_t {
0345 using parent_t = this_t;
0346
0347 non_owning_data_t(const owning_data_t & o)
0348 : m_tex(*o.m_tex)
0349 {
0350 }
0351
0352 COVFIE_HOST_DEVICE typename covariant_output_t::vector_t
0353 at(typename contravariant_input_t::vector_t i) const
0354 {
0355 channel_t r;
0356
0357 if constexpr (_input_vector_t::size == 1) {
0358 r = tex1D<channel_t>(m_tex, i[0] + 0.5);
0359 } else if constexpr (_input_vector_t::size == 2) {
0360 r = tex2D<channel_t>(m_tex, i[0] + 0.5, i[1] + 0.5);
0361 } else if constexpr (_input_vector_t::size == 3) {
0362 r = tex3D<channel_t>(m_tex, i[0] + 0.5, i[1] + 0.5, i[2] + 0.5);
0363 }
0364
0365 if constexpr (_output_vector_t::size == 1) {
0366 return {r.x};
0367 } else if constexpr (_output_vector_t::size == 2) {
0368 return {r.x, r.y};
0369 } else if constexpr (_output_vector_t::size == 3) {
0370 return {r.x, r.y, r.z};
0371 } else if constexpr (_output_vector_t::size == 4) {
0372 return {r.x, r.y, r.z, r.w};
0373 }
0374
0375 return {};
0376 }
0377
0378 cudaTextureObject_t m_tex;
0379 };
0380 };
0381 }