eic-opticks/CSGOptiX/GAS_Builder.cc

0001
0002 #include <cassert>
0003 #include <csignal>
0004 #include <cstring>
0005 #include <iostream>
0006 #include <iomanip>
0007
0008 #include <optix.h>
0009 #include <optix_stubs.h>
0010
0011 #include <cuda_runtime.h>
0012 #include "scuda.h"    // roundUp
0013
0014 #include "CUDA_CHECK.h"
0015 #include "OPTIX_CHECK.h"
0016
0017 #include "Ctx.h"
0018 #include "GAS.h"
0019 #include "GAS_Builder.h"
0020
0021 #include "SLOG.hh"
0022
0023 const plog::Severity GAS_Builder::LEVEL = SLOG::EnvLevel("GAS_Builder", "DEBUG");
0024
0025
0026
0027
0028
0029
0030
0031 /**
0032 GAS_Builder::Build : SCSGPrimSpec --> GAS : Compound Solid (set of Prim level)
0033 -------------------------------------------------------------------------------
0034
0035 Canonically invoked from SBT::createGeom/SBT::createGAS using SCSGPrimSpec from CSGFoundry
0036
0037 GAS& gas
0038    output struct holding vector of BI (currently always one entry)
0039
0040 SCSGPrimSpec& ps
0041    arrays of bbox
0042
0043 **/
0044
0045 void GAS_Builder::Build( GAS& gas, const SCSGPrimSpec& ps )  // static
0046 {
0047     assert( ps.stride_in_bytes % sizeof(float) == 0 );
0048     unsigned stride_in_floats = ps.stride_in_bytes / sizeof(float) ;
0049
0050     LOG(LEVEL)
0051         << " ps.num_prim " << std::setw(4) << ps.num_prim
0052         << " ps.stride_in_bytes " << ps.stride_in_bytes
0053         << " ps.device " << ps.device
0054         << " ps.primitiveIndexOffset " << ps.primitiveIndexOffset
0055         << " stride_in_floats " << stride_in_floats
0056         ;
0057
0058     Build_11N(gas, ps);
0059 }
0060
0061 /**
0062 GAS_Builder::Build_11N GAS:BI:AABB  1:1:N  one BI with multiple AABB
0063 ------------------------------------------------------------------------
0064
0065 11N mode is the default (and now only) mode in which there is always
0066 only one BI in the bis vector.
0067
0068 **/
0069
0070 void GAS_Builder::Build_11N( GAS& gas, const SCSGPrimSpec& ps )
0071 {
0072     BI bi = MakeCustomPrimitivesBI_11N(ps);
0073     gas.bis.push_back(bi);
0074     assert( gas.bis.size() == 1 );
0075     BoilerPlate(gas);
0076 }
0077
0078
0079
0080
0081 /**
0082 GAS_Builder::DevicePointerCast
0083 ---------------------------------
0084
0085 http://www.cudahandbook.com/2013/08/why-does-cuda-cudeviceptr-use-unsigned-int-instead-of-void/
0086 CUdeviceptr is typedef to unsigned long long
0087 uintptr_t is an unsigned integer type that is capable of storing a data pointer.
0088
0089 **/
0090
0091 template<typename T>
0092 CUdeviceptr GAS_Builder::DevicePointerCast( const T* d_ptr ) // static
0093 {
0094     return (CUdeviceptr) (uintptr_t) d_ptr ;
0095 }
0096
0097
0098 /**
0099 GAS_Builder::MakeCustomPrimitivesBI_11N
0100 -----------------------------------------
0101
0102 References to bbox array from SCSGPrimSpec copyied into the BI
0103
0104 Creates buildInput using device refs of pre-uploaded aabb for all prim (aka layers) of the Solid
0105 and arranges for separate SBT records for each prim.
0106
0107 Added primitiveIndexOffset to SCSGPrimSpec in attempt to get identity info
0108 regarding what piece of geometry is intersected/closesthit.
0109
0110 **/
0111
0112 BI GAS_Builder::MakeCustomPrimitivesBI_11N( const SCSGPrimSpec& ps)
0113 {
0114     assert( ps.device == true );
0115     assert( ps.stride_in_bytes % sizeof(float) == 0 );
0116
0117     BI bi = {} ;
0118     bi.mode = 1 ;
0119     bi.flags = new unsigned[ps.num_prim];
0120     for(unsigned i=0 ; i < ps.num_prim ; i++) bi.flags[i] = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT ;
0121
0122
0123     bi.d_aabb = DevicePointerCast<float>( ps.aabb );
0124     bi.d_sbt_index = DevicePointerCast<unsigned>( ps.sbtIndexOffset );
0125
0126     bi.buildInput = {};
0127     bi.buildInput.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
0128     OptixBuildInputCustomPrimitiveArray& buildInputCPA = bi.buildInput.customPrimitiveArray ;
0129
0130     buildInputCPA.aabbBuffers = &bi.d_aabb ;
0131     buildInputCPA.numPrimitives = ps.num_prim  ;
0132     buildInputCPA.strideInBytes = ps.stride_in_bytes ;
0133     buildInputCPA.flags = bi.flags;                                  // flags per sbt record
0134     buildInputCPA.numSbtRecords = ps.num_prim ;                      // number of sbt records available to sbt index offset override.
0135     buildInputCPA.sbtIndexOffsetBuffer  = bi.d_sbt_index ;           // Device pointer to per-primitive local sbt index offset buffer, Every entry must be in range [0,numSbtRecords-1]
0136     buildInputCPA.sbtIndexOffsetSizeInBytes  = sizeof(unsigned);     // Size of type of the sbt index offset. Needs to be 0,     1, 2 or 4
0137     buildInputCPA.sbtIndexOffsetStrideInBytes = ps.stride_in_bytes ; // Stride between the index offsets. If set to zero, the offsets are assumed to be tightly packed.
0138     buildInputCPA.primitiveIndexOffset = ps.primitiveIndexOffset ;   // Primitive index bias, applied in optixGetPrimitiveIndex() see OptiX7Test.cu:__closesthit__ch
0139
0140
0141     LOG(LEVEL)
0142         << std::endl
0143         << " buildInputCPA.primitiveIndexOffset " << buildInputCPA.primitiveIndexOffset
0144         << std::endl
0145         << " buildInputCPA.aabbBuffers[0] "
0146         << " " << std::dec << buildInputCPA.aabbBuffers[0]
0147         << " " << std::hex << buildInputCPA.aabbBuffers[0]  << std::dec
0148         << std::endl
0149         << " buildInputCPA.sbtIndexOffsetBuffer "
0150         << " " << std::dec << buildInputCPA.sbtIndexOffsetBuffer
0151         << " " << std::hex << buildInputCPA.sbtIndexOffsetBuffer << std::dec
0152         << std::endl
0153         << " buildInputCPA.strideInBytes " << buildInputCPA.strideInBytes
0154         << " buildInputCPA.sbtIndexOffsetStrideInBytes " << buildInputCPA.sbtIndexOffsetStrideInBytes
0155         ;
0156
0157     return bi ;
0158 }
0159
0160
0161 void GAS_Builder::DumpAABB( const float* aabb, unsigned num_aabb, unsigned stride_in_bytes )  // static
0162 {
0163     assert( stride_in_bytes % sizeof(float) == 0 );
0164     unsigned stride_in_floats = stride_in_bytes/sizeof(float);
0165
0166     std::cout
0167         << "GAS_Builder::DumpAABB"
0168         << "  num_aabb " << num_aabb
0169         << "  stride_in_bytes " << stride_in_bytes
0170         << "  stride_in_floats " << stride_in_floats
0171         << std::endl
0172         ;
0173     for(unsigned i=0 ; i < num_aabb ; i++)
0174     {
0175         std::cout << std::setw(4) << i << " : " ;
0176         for(unsigned j=0 ; j < 6 ; j++)
0177            std::cout << std::setw(10) << std::fixed << std::setprecision(3) << *(aabb + i*stride_in_floats + j ) << " "  ;
0178         std::cout << std::endl ;
0179     }
0180 }
0181
0182
0183 /**
0184 GAS_Builder::BoilerPlate
0185 ----------------------------
0186
0187 Boilerplate building the GAS from the BI vector.
0188 In the default 11N mode there is always only one BI in the vector.
0189
0190 **/
0191
0192 void GAS_Builder::BoilerPlate(GAS& gas)   // static
0193 {
0194     //std::cout << "GAS_Builder::BoilerPlate" << std::endl ;
0195     unsigned num_bi = gas.bis.size() ;
0196
0197     bool num_bi_expect =  num_bi == 1 ;
0198     assert( num_bi_expect );
0199     if(!num_bi_expect) std::raise(SIGINT);
0200
0201     std::vector<OptixBuildInput> buildInputs ;
0202     for(unsigned i=0 ; i < gas.bis.size() ; i++)
0203     {
0204         const BI& bi = gas.bis[i];
0205         buildInputs.push_back(bi.buildInput);
0206         if(bi.mode == 1) assert( num_bi == 1 );
0207     }
0208
0209     /*
0210     std::cout
0211         << "GAS_Builder::BoilerPlate"
0212         << " num_bi " << num_bi
0213         << " buildInputs.size " << buildInputs.size()
0214         << std::endl
0215         ;
0216     */
0217
0218     OptixAccelBuildOptions accel_options = {};
0219     accel_options.buildFlags =
0220         OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
0221         OPTIX_BUILD_FLAG_ALLOW_COMPACTION ;
0222     accel_options.operation  = OPTIX_BUILD_OPERATION_BUILD;
0223
0224     OptixAccelBufferSizes gas_buffer_sizes;
0225
0226     OPTIX_CHECK( optixAccelComputeMemoryUsage( Ctx::context,
0227                                                &accel_options,
0228                                                buildInputs.data(),
0229                                                buildInputs.size(),
0230                                                &gas_buffer_sizes
0231                                              ) );
0232     CUdeviceptr d_temp_buffer_gas;
0233     CUDA_CHECK( cudaMalloc(
0234                 reinterpret_cast<void**>( &d_temp_buffer_gas ),
0235                 gas_buffer_sizes.tempSizeInBytes
0236                 ) );
0237
0238     // non-compacted output
0239     CUdeviceptr d_buffer_temp_output_gas_and_compacted_size;
0240     size_t      compactedSizeOffset = roundUp<size_t>( gas_buffer_sizes.outputSizeInBytes, 8ull );
0241
0242     CUDA_CHECK( cudaMalloc(
0243                 reinterpret_cast<void**>( &d_buffer_temp_output_gas_and_compacted_size ),
0244                 compactedSizeOffset + 8
0245                 ) );
0246
0247
0248     OptixAccelEmitDesc emitProperty = {};
0249     emitProperty.type               = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
0250     emitProperty.result             = ( CUdeviceptr )( (char*)d_buffer_temp_output_gas_and_compacted_size + compactedSizeOffset );
0251
0252     OPTIX_CHECK( optixAccelBuild( Ctx::context,
0253                                   0,                  // CUDA stream
0254                                   &accel_options,
0255                                   buildInputs.data(),
0256                                   buildInputs.size(),                  // num build inputs
0257                                   d_temp_buffer_gas,
0258                                   gas_buffer_sizes.tempSizeInBytes,
0259                                   d_buffer_temp_output_gas_and_compacted_size,
0260                                   gas_buffer_sizes.outputSizeInBytes,
0261                                   &gas.handle,
0262                                   &emitProperty,      // emitted property list
0263                                   1                   // num emitted properties
0264                                   ) );
0265
0266     CUDA_CHECK( cudaFree( (void*)d_temp_buffer_gas ) );
0267     //CUDA_CHECK( cudaFree( (void*)d_aabb_buffer ) );
0268
0269     size_t compacted_gas_size;
0270     CUDA_CHECK( cudaMemcpy( &compacted_gas_size, (void*)emitProperty.result, sizeof(size_t), cudaMemcpyDeviceToHost ) );
0271
0272     if( compacted_gas_size < gas_buffer_sizes.outputSizeInBytes )
0273     {
0274         CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &gas.d_buffer ), compacted_gas_size ) );
0275
0276         // use handle as input and output
0277         OPTIX_CHECK( optixAccelCompact( Ctx::context,
0278                                         0,
0279                                         gas.handle,
0280                                         gas.d_buffer,
0281                                         compacted_gas_size,
0282                                         &gas.handle ) );
0283
0284         CUDA_CHECK( cudaFree( (void*)d_buffer_temp_output_gas_and_compacted_size ) );
0285     }
0286     else
0287     {
0288         gas.d_buffer = d_buffer_temp_output_gas_and_compacted_size;
0289     }
0290 }
0291