File indexing completed on 2025-09-17 08:54:12
0001
0002
0003
0004
0005
0006
0007 #pragma once
0008
0009 #include <memory>
0010
0011 #include <cuda_runtime.h>
0012
0013 #include <covfie/core/backend/primitive/array.hpp>
0014 #include <covfie/core/concepts.hpp>
0015 #include <covfie/core/qualifiers.hpp>
0016 #include <covfie/core/vector.hpp>
0017 #include <covfie/cuda/error_check.hpp>
0018 #include <covfie/cuda/utility/memory.hpp>
0019 #include <covfie/cuda/utility/unique_ptr.hpp>
0020
0021 namespace covfie::backend {
0022 template <
0023 concepts::vector_descriptor _output_vector_t,
0024 typename _index_t = std::size_t>
0025 struct cuda_device_array {
0026 using this_t = cuda_device_array<_output_vector_t, _index_t>;
0027
0028 static constexpr bool is_initial = true;
0029
0030 using contravariant_input_t =
0031 covfie::vector::scalar_d<covfie::vector::vector_d<_index_t, 1>>;
0032 using covariant_output_t =
0033 covfie::vector::array_reference_vector_d<_output_vector_t>;
0034
0035 using output_vector_t = _output_vector_t;
0036
0037 using value_t = typename output_vector_t::type[output_vector_t::size];
0038 using vector_t = std::decay_t<typename covariant_output_t::vector_t>;
0039
0040 using configuration_t = utility::nd_size<1>;
0041
0042 static constexpr uint32_t IO_MAGIC_HEADER = 0xAB110000;
0043
0044 struct owning_data_t {
0045 using parent_t = this_t;
0046
0047 owning_data_t()
0048 : m_size(0)
0049 , m_ptr({})
0050 {
0051 }
0052
0053 owning_data_t(owning_data_t &&) = default;
0054 owning_data_t & operator=(owning_data_t &&) = default;
0055
0056 owning_data_t & operator=(const owning_data_t & o)
0057 {
0058 m_size = o.m_size;
0059 m_ptr = utility::cuda::device_copy_d2d(
0060 o.m_ptr.get(), m_size, o.m_stream
0061 );
0062 m_stream = o.m_stream;
0063 return *this;
0064 }
0065
0066 owning_data_t(const owning_data_t & o)
0067 : m_size(o.m_size)
0068 , m_ptr(utility::cuda::device_copy_d2d(
0069 o.m_ptr.get(), m_size, o.m_stream
0070 ))
0071 , m_stream(o.m_stream)
0072 {
0073 assert(m_size == 0 || m_ptr);
0074 }
0075
0076 explicit owning_data_t(parameter_pack<owning_data_t> && args)
0077 : owning_data_t(std::move(args.x))
0078 {
0079 }
0080
0081 explicit owning_data_t(
0082 std::size_t size,
0083 std::unique_ptr<vector_t[]> && ptr,
0084 std::optional<cudaStream_t> stream
0085 )
0086 : m_size(size)
0087 , m_ptr(utility::cuda::device_copy_h2d(ptr.get(), size, stream))
0088 , m_stream(stream)
0089 {
0090 }
0091
0092 explicit owning_data_t(parameter_pack<configuration_t> && args)
0093 : owning_data_t(args.x[0], std::make_unique<vector_t[]>(m_size))
0094 {
0095 }
0096
0097 explicit owning_data_t(
0098 std::size_t size, std::unique_ptr<vector_t[]> && ptr
0099 )
0100 : owning_data_t(size, std::move(ptr), std::nullopt)
0101 {
0102 }
0103
0104 explicit owning_data_t(
0105 parameter_pack<configuration_t> && args, cudaStream_t stream
0106 )
0107 : owning_data_t(
0108 args.x[0], utility::cuda::device_allocate<vector_t[]>(m_size)
0109 )
0110 {
0111 }
0112
0113 explicit owning_data_t(
0114 std::size_t size,
0115 std::unique_ptr<vector_t[]> && ptr,
0116 cudaStream_t stream
0117 )
0118 : owning_data_t(
0119 size, std::move(ptr), std::optional<cudaStream_t>(stream)
0120 )
0121 {
0122 }
0123
0124 template <typename B>
0125 requires(!std::same_as<B, owning_data_t> && concepts::array_1d_like_field_backend<typename B::parent_t>) explicit owning_data_t(
0126 const B & o
0127 )
0128 : owning_data_t(o.get_size(), o.get_host_array())
0129 {
0130 }
0131
0132 template <typename B>
0133 requires(!std::same_as<B, owning_data_t> && concepts::array_1d_like_field_backend<typename B::parent_t>) explicit owning_data_t(
0134 const B & o, cudaStream_t stream
0135 )
0136 : owning_data_t(o.get_size(), o.get_host_array(), stream)
0137 {
0138 }
0139
0140 configuration_t get_configuration() const
0141 {
0142 return {m_size};
0143 }
0144
0145 static owning_data_t read_binary(std::istream & fs)
0146 {
0147 utility::read_io_header(fs, IO_MAGIC_HEADER);
0148
0149 uint32_t float_width = utility::read_binary<uint32_t>(fs);
0150
0151 if (float_width != 4 && float_width != 8) {
0152 throw std::runtime_error(
0153 "Float type is neither IEEE 754 single- nor "
0154 "double-precision, binary input is not supported."
0155 );
0156 }
0157
0158 auto size =
0159 utility::read_binary<std::decay_t<decltype(m_size)>>(fs);
0160 std::unique_ptr<vector_t[]> ptr =
0161 std::make_unique<vector_t[]>(size);
0162
0163 for (std::size_t i = 0; i < size; ++i) {
0164 for (std::size_t j = 0; j < _output_vector_t::size; ++j) {
0165 if (float_width == 4) {
0166 ptr[i][j] = utility::read_binary<float>(fs);
0167 } else if (float_width == 8) {
0168 ptr[i][j] = utility::read_binary<double>(fs);
0169 } else {
0170 throw std::logic_error("Float width is unexpected.");
0171 }
0172 }
0173 }
0174
0175 utility::read_io_footer(fs, IO_MAGIC_HEADER);
0176
0177 return owning_data_t(size, std::move(ptr));
0178 }
0179
0180 static void write_binary(std::ostream & fs, const owning_data_t & o)
0181 {
0182 utility::write_io_header(fs, IO_MAGIC_HEADER);
0183
0184 uint32_t float_width;
0185
0186 if constexpr (std::
0187 is_same_v<typename _output_vector_t::type, float>)
0188 {
0189 float_width = 4;
0190 } else if constexpr (std::is_same_v<
0191 typename _output_vector_t::type,
0192 double>)
0193 {
0194 float_width = 8;
0195 } else {
0196 throw std::logic_error(
0197 "Float type is neither IEEE 754 single- nor "
0198 "double-precision, binary output is not supported."
0199 );
0200 }
0201
0202 fs.write(
0203 reinterpret_cast<const char *>(&float_width),
0204 sizeof(std::decay_t<decltype(float_width)>)
0205 );
0206
0207 fs.write(
0208 reinterpret_cast<const char *>(&o.m_size),
0209 sizeof(std::decay_t<decltype(o.m_size)>)
0210 );
0211
0212 for (std::size_t i = 0; i < o.m_size; ++i) {
0213 for (std::size_t j = 0; j < _output_vector_t::size; ++j) {
0214 fs.write(
0215 reinterpret_cast<const char *>(&o.m_ptr[i][j]),
0216 sizeof(typename _output_vector_t::type)
0217 );
0218 }
0219 }
0220
0221 utility::write_io_footer(fs, IO_MAGIC_HEADER);
0222 }
0223
0224 std::size_t m_size;
0225 utility::cuda::unique_device_ptr<vector_t[]> m_ptr;
0226 std::optional<cudaStream_t> m_stream;
0227 };
0228
0229 struct non_owning_data_t {
0230 using parent_t = this_t;
0231
0232 non_owning_data_t(const owning_data_t & o)
0233 : m_ptr(o.m_ptr.get())
0234 {
0235 }
0236
0237 COVFIE_HOST_DEVICE typename covariant_output_t::vector_t
0238 at(typename contravariant_input_t::vector_t i) const
0239 {
0240 return m_ptr[i];
0241 }
0242
0243 vector_t * m_ptr;
0244 };
0245 };
0246 }