Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:46

0001 #pragma once
0002 /**
0003 squad.h
0004 =========
0005 
0006 This builds on CUDA types and functions and scuda.h that extends those::
0007 
0008     /usr/local/cuda/include/vector_types.h
0009     /usr/local/cuda/include/vector_functions.h
0010     /usr/local/cuda/include/vector_functions.hpp
0011 
0012 The vector types include::
0013 
0014     float4
0015     int4
0016     uint4
0017 
0018     double4
0019     longlong4
0020     ulonglong4
0021 
0022 **/
0023 
0024 #if defined(__CUDACC__) || defined(__CUDABE__)
0025 #    define SQUAD_METHOD __host__ __device__ __forceinline__
0026 #else
0027 #    define SQUAD_METHOD inline
0028 #endif
0029 
0030 
0031 #if defined(__CUDACC__) || defined(__CUDABE__)
0032 #else
0033    #include <iostream>
0034    #include <iomanip>
0035    #include <sstream>
0036    #include <vector>
0037    #include <array>
0038    #include <cstring>
0039    #include <cassert>
0040    #include "NP.hh"
0041 #endif
0042 
0043 
0044 union UIF
0045 {
0046    float    f ;
0047    int      i ;
0048    unsigned u ;
0049 };
0050 
0051 
0052 union quad
0053 {
0054     float4 f ;
0055     int4   i ;
0056     uint4  u ;
0057 };
0058 
0059 union hquad
0060 {
0061     short4  s ;
0062     ushort4 u ;
0063 };
0064 
0065 struct hquad2
0066 {
0067     hquad q0 ;
0068     hquad q1 ;
0069 };
0070 
0071 
0072 inline unsigned int_as_unsigned( int value )
0073 {
0074    UIF uif ;
0075    uif.i = value ;
0076    return uif.u ;
0077 }
0078 
0079 inline int unsigned_as_int( unsigned value )
0080 {
0081    UIF uif ;
0082    uif.u = value ;
0083    return uif.i ;
0084 }
0085 
0086 
0087 /**
0088 squad.h/quad2
0089 --------------
0090 
0091 Previous::
0092 
0093     +----------------------+---------------------+------------------------+-----------------------------+
0094     | f:normal_x           | f:normal_y          | f:normal_z             | f:distance                  |
0095     +----------------------+---------------------+------------------------+-----------------------------+
0096     | f:lposcost           | u:iindex            | u:identity             | u:globalPrimIdx_boundary    |
0097     +----------------------+---------------------+------------------------+-----------------------------+
0098 
0099 Current::
0100 
0101     +----------------------+---------------------+------------------------+-----------------------------+
0102     | f:normal_x           | f:normal_y          | f:normal_z             | f:distance                  |
0103     +----------------------+---------------------+------------------------+-----------------------------+
0104     | f:lposcost           | f:lposfphi          | u:iindex_identity      | u:globalPrimIdx_boundary    |
0105     +----------------------+---------------------+------------------------+-----------------------------+
0106 
0107     Rearranged to squeeze in f:lposfphi, as that can avoid transform lookups with future photonlite/hitlite,
0108     as no post-sim CPU transforms would then be needed to get "muon" hits.
0109 
0110 
0111 f:lposcost
0112     Local frame position cos(theta) of intersect,
0113     canonically calculated in CSGOptiX7.cu:__intersection__is
0114     scuda.h:normalize_cost(ray_origin + isect.w*ray_direction )
0115     where scuda.h:normalize_cost is v.z/sqrtf(dot(v, v))
0116 
0117     This is kinda imagining a sphere thru the intersection point
0118     which is likely onto an ellipsoid or a box or anything
0119     to provide a standard way of giving a z-polar measure.
0120 
0121 f:lposfphi
0122     Local frame position normalized phi fraction,
0123     obtained from scuda.h:normalize_fphi
0124 
0125 
0126 u:iindex
0127     see cx:CSGOptiX7.cu:__closesthit__ch
0128     0-based index within IAS (optixGetInstanceIndex)
0129 
0130 u:identity
0131     see cx:CSGOptiX7.cu:__closesthit__ch
0132 
0133     749 extern "C" __global__ void __closesthit__ch()
0134     750 {
0135     751     unsigned iindex = optixGetInstanceIndex() ;
0136     752     unsigned identity = optixGetInstanceId() ;
0137     753     unsigned iindex_identity = (( iindex & 0xffffu ) << 16 ) | ( identity & 0xffffu ) ;
0138     754
0139 
0140     808         prd->set_iindex_identity_( iindex_identity ) ;
0141     809         prd->set_globalPrimIdx_boundary_(  globalPrimIdx_boundary ) ;
0142     810         prd->set_lpos(lposcost, lposfphi);   // __closesthit__ch.WITH_PRD.TRIANGLE
0143     811
0144 
0145 
0146 
0147 
0148 
0149     FROM July 2023
0150         instance_id which is *sensor_identifier+1* (as need lpmtid for QPMT),
0151         zero means not-a-sensor GPU side
0152 
0153         [THIS LESS AMENABLE TO PACKING, AS EXPTS MAY USE WHACKY SENSOR_IDENTIFIER RANGES]
0154 
0155 
0156     FORMERLY
0157         (( prim_idx & 0xffff ) << 16 ) | ( instance_id & 0xffff )
0158 
0159         prim_idx:optixGetPrimitiveIndex
0160             see cx:GAS_Builder::MakeCustomPrimitivesBI_11N
0161             (1+index-of-CSGPrim within CSGSolid/GAS)
0162 
0163         instance_id:optixGetInstanceId
0164             user supplied instanceId, within identity are
0165             restricted to 16 bits::
0166 
0167                 In [1]: 0xffff
0168                 Out[1]: 65535
0169 
0170             * sy:sqat4::get_IAS_OptixInstance_instanceId
0171             * cx:IAS_Builder::CollectInstances
0172 
0173 
0174 
0175 u:boundary
0176     crucial bnd index representing unique (omat,osur,isur,imat)
0177     material/surface sandwich
0178 
0179 **/
0180 struct quad2
0181 {
0182     quad q0 ;
0183     quad q1 ;
0184 
0185 
0186     SQUAD_METHOD void zero();
0187     SQUAD_METHOD float* data() ;
0188     SQUAD_METHOD const float* cdata() const ;
0189 
0190     SQUAD_METHOD float3* normal() ;
0191     SQUAD_METHOD const float3* normal() const ;
0192     SQUAD_METHOD float    distance() const ;
0193     SQUAD_METHOD void     distance_add(float delta);
0194 
0195     SQUAD_METHOD float lposcost() const ;
0196     SQUAD_METHOD float lposfphi() const ;
0197     SQUAD_METHOD void  set_lpos(float lposcost, float lposfphi );
0198 
0199     SQUAD_METHOD void set_iindex_identity_(  unsigned ii_id );
0200     SQUAD_METHOD void set_iindex_identity(unsigned ii, unsigned id );
0201     SQUAD_METHOD unsigned iindex_identity() const ;
0202     SQUAD_METHOD unsigned iindex() const ;
0203     SQUAD_METHOD unsigned identity() const ;
0204 
0205     SQUAD_METHOD void set_globalPrimIdx_boundary_(unsigned globalPrimIdx_boundary);
0206     SQUAD_METHOD void set_globalPrimIdx_boundary(unsigned gp, unsigned bn);
0207     SQUAD_METHOD unsigned globalPrimIdx_boundary() const ;
0208     SQUAD_METHOD unsigned globalPrimIdx() const ;
0209     SQUAD_METHOD unsigned boundary() const ;
0210     SQUAD_METHOD bool is_boundary_miss() const ;
0211 
0212 
0213 
0214 #if defined(__CUDACC__) || defined(__CUDABE__)
0215 #else
0216     std::string desc() const ;
0217     static quad2 make_eprd();
0218     void eprd() ;
0219 #endif
0220 };
0221 
0222 void quad2::zero()
0223 {
0224     q0.u.x = 0 ; q0.u.y = 0 ; q0.u.z = 0 ; q0.u.w = 0 ;
0225     q1.u.x = 0 ; q1.u.y = 0 ; q1.u.z = 0 ; q1.u.w = 0 ;
0226 }
0227 
0228 
0229 SQUAD_METHOD float*         quad2::data() {           return &q0.f.x ; }
0230 SQUAD_METHOD const float*   quad2::cdata() const  {   return &q0.f.x ; }
0231 
0232 SQUAD_METHOD float3*        quad2::normal() {           return (float3*)&q0.f.x ; }
0233 SQUAD_METHOD const float3*  quad2::normal() const {     return (float3*)&q0.f.x ; }
0234 SQUAD_METHOD float          quad2::distance() const {   return q0.f.w ; }
0235 SQUAD_METHOD void           quad2::distance_add( float delta ){ q0.f.w += delta ; }
0236 
0237 
0238 SQUAD_METHOD float          quad2::lposcost() const {   return q1.f.x ; }
0239 SQUAD_METHOD float          quad2::lposfphi() const {   return q1.f.y ; }
0240 SQUAD_METHOD void           quad2::set_lpos(float lposcost, float lposfphi){ q1.f.x = lposcost ; q1.f.y = lposfphi ;   }
0241 
0242 SQUAD_METHOD void           quad2::set_iindex_identity_(unsigned ii_id) {           q1.u.z = ii_id ;  }
0243 SQUAD_METHOD void           quad2::set_iindex_identity( unsigned ii, unsigned id) { q1.u.z = (( ii & 0xffffu ) << 16 ) | ( id & 0xffffu ) ;  }
0244 SQUAD_METHOD unsigned       quad2::iindex_identity() const { return q1.u.z ; }
0245 SQUAD_METHOD unsigned       quad2::iindex() const {          return q1.u.z >> 16 ; }
0246 SQUAD_METHOD unsigned       quad2::identity() const {        return q1.u.z & 0xffffu ; }
0247 
0248 
0249 SQUAD_METHOD void           quad2::set_globalPrimIdx_boundary_(unsigned globalPrimIdx_boundary) {          q1.u.w = globalPrimIdx_boundary ;  }
0250 SQUAD_METHOD void           quad2::set_globalPrimIdx_boundary(unsigned globalPrimIdx, unsigned boundary) { q1.u.w = (( globalPrimIdx & 0xffffu ) << 16 ) | ( boundary & 0xffffu ) ; }
0251 SQUAD_METHOD unsigned       quad2::globalPrimIdx_boundary() const {              return   q1.u.w ; }
0252 SQUAD_METHOD unsigned       quad2::globalPrimIdx() const {                       return   q1.u.w >> 16 ; }
0253 SQUAD_METHOD unsigned       quad2::boundary() const {                            return   q1.u.w & 0xffffu ; }
0254 
0255 SQUAD_METHOD bool           quad2::is_boundary_miss() const {                    return ( q1.u.w & 0xffffu ) == 0xffffu ; }
0256 
0257 
0258 struct quad4
0259 {
0260     quad q0 ;
0261     quad q1 ;
0262     quad q2 ;
0263     quad q3 ;
0264 
0265     SQUAD_METHOD void zero();
0266     SQUAD_METHOD float* data() ;
0267     SQUAD_METHOD const float* cdata() const ;
0268 
0269     SQUAD_METHOD float3* v0() const ;
0270     SQUAD_METHOD float3* v1() const ;
0271     SQUAD_METHOD float3* v2() const ;
0272     SQUAD_METHOD float3* v3() const ;
0273 
0274 
0275 
0276     // hmm actually *set_flags* doing all at once makes little sense,
0277     // as idx does not need to be reset and only determine the flag later
0278 
0279     SQUAD_METHOD void set_flags(unsigned  boundary, unsigned  identity, unsigned  idx, unsigned  flag, float  orient );
0280     SQUAD_METHOD void set_idx( unsigned  idx);
0281     SQUAD_METHOD void set_orient( float orient );
0282     SQUAD_METHOD void set_prd( unsigned  boundary, unsigned  identity, float  orient );
0283     SQUAD_METHOD void set_flag(unsigned  flag);
0284 
0285     SQUAD_METHOD void get_flags(unsigned& boundary, unsigned& identity, unsigned& idx, unsigned& flag, float& orient ) const ;
0286     SQUAD_METHOD void get_idx( unsigned& idx);
0287     SQUAD_METHOD void get_orient( float& orient );
0288     SQUAD_METHOD void get_prd( unsigned& boundary, unsigned& identity, float&  orient );
0289     SQUAD_METHOD void get_flag(unsigned& flag) const ;
0290     SQUAD_METHOD unsigned flagmask() const ;
0291 
0292     SQUAD_METHOD void set_wavelength( float wl );
0293     SQUAD_METHOD float wavelength() const ;
0294     SQUAD_METHOD unsigned simtrace_globalPrimIdx() const ;
0295 
0296 #if defined(__CUDACC__) || defined(__CUDABE__)
0297 #else
0298     std::string desc() const ;
0299     static NP* zeros(size_t num);
0300     static NP* select_prim( const NP* simtrace, unsigned globalPrimIdx );
0301 
0302     static quad4 make_ephoton();
0303     void ephoton() ;
0304     void normalize_mom_pol();
0305     void transverse_mom_pol();
0306     float* get_v(int i) const ;
0307     void set_v(int i, const double* src, int n);  // narrowing
0308     void zero_v(int i, int n);
0309     void load(const std::array<double,16>& a );   // narrowing
0310     void load(const double* src, int n);          // narrowing
0311 #endif
0312 };
0313 
0314 
0315 SQUAD_METHOD void quad4::zero()
0316 {
0317     q0.u.x = 0 ; q0.u.y = 0 ; q0.u.z = 0 ; q0.u.w = 0 ;
0318     q1.u.x = 0 ; q1.u.y = 0 ; q1.u.z = 0 ; q1.u.w = 0 ;
0319     q2.u.x = 0 ; q2.u.y = 0 ; q2.u.z = 0 ; q2.u.w = 0 ;
0320     q3.u.x = 0 ; q3.u.y = 0 ; q3.u.z = 0 ; q3.u.w = 0 ;
0321 }
0322 
0323 SQUAD_METHOD float*       quad4::data() {         return &q0.f.x ;  }
0324 SQUAD_METHOD const float* quad4::cdata() const  { return &q0.f.x ;  }
0325 
0326 SQUAD_METHOD float3*      quad4::v0() const { return (float3*)&q0.f.x ; }
0327 SQUAD_METHOD float3*      quad4::v1() const { return (float3*)&q1.f.x ; }
0328 SQUAD_METHOD float3*      quad4::v2() const { return (float3*)&q2.f.x ; }
0329 SQUAD_METHOD float3*      quad4::v3() const { return (float3*)&q3.f.x ; }
0330 
0331 
0332 
0333 
0334 
0335 // q3.u.z : orient_idx
0336 
0337 SQUAD_METHOD void quad4::set_idx( unsigned  idx)
0338 {
0339     q3.u.z = ( q3.u.z & 0x80000000u ) | ( 0x7fffffffu & idx ) ;   // retain bit 31 asis
0340 }
0341 SQUAD_METHOD void quad4::get_idx( unsigned& idx)
0342 {
0343     idx =  q3.u.z & 0x7fffffffu  ;
0344 }
0345 SQUAD_METHOD void quad4::set_orient( float orient )  // not typically used as set_prd more convenient, but useful for debug
0346 {
0347     q3.u.z =  ( q3.u.z & 0x7fffffffu ) | (( orient < 0.f ? 0x1 : 0x0 ) << 31 ) ;
0348 }
0349 SQUAD_METHOD void quad4::get_orient( float& orient )
0350 {
0351     orient = ( q3.u.z & 0x80000000u ) ? -1.f : 1.f ;
0352 }
0353 
0354 SQUAD_METHOD void quad4::set_prd( unsigned  boundary, unsigned  identity, float  orient )
0355 {
0356     q3.u.x = ( q3.u.x & 0x0000ffffu ) | (( boundary & 0xffffu ) << 16 ) ;  // clear boundary bits then set them
0357     q3.u.y = identity ;
0358     q3.u.z = ( q3.u.z & 0x7fffffffu ) | (( orient < 0.f ? 0x1 : 0x0 ) << 31 ) ;
0359 
0360     // HMM: can identity spare a bit for orient : as that would be cleaner ?
0361 }
0362 SQUAD_METHOD void quad4::get_prd( unsigned& boundary, unsigned& identity, float& orient )
0363 {
0364     boundary = q3.u.x >> 16 ;
0365     identity = q3.u.y ;
0366     orient = ( q3.u.z & 0x80000000u ) ? -1.f : 1.f ;
0367 }
0368 
0369 SQUAD_METHOD void quad4::set_flag( unsigned flag )
0370 {
0371     q3.u.x = ( q3.u.x & 0xffff0000u ) | ( flag & 0xffffu ) ;   // clear flag bits then set them
0372     q3.u.w |= flag ;    // bitwise-OR into flagmask
0373 }
0374 SQUAD_METHOD void quad4::get_flag( unsigned& flag ) const
0375 {
0376     flag = q3.u.x & 0xffff ;
0377 }
0378 
0379 SQUAD_METHOD unsigned quad4::flagmask() const
0380 {
0381     return q3.u.w ;
0382 }
0383 
0384 
0385 
0386 SQUAD_METHOD void quad4::set_wavelength( float wl ){ q2.f.w = wl ; }
0387 SQUAD_METHOD float quad4::wavelength() const { return q2.f.w ; }
0388 
0389 
0390 SQUAD_METHOD void quad4::set_flags(unsigned boundary, unsigned identity, unsigned idx, unsigned flag, float orient )
0391 {
0392     q3.u.x = ((boundary & 0xffffu ) << 16) | (flag & 0xffffu ) ;    // hmm boundary only needs 8bits 0xff really
0393     q3.u.y = identity ;      // identity needs 32bit as already bit packed primIdx and instanceIdx
0394     q3.u.z = ( orient < 0.f ? 0x80000000u : 0u ) | ( idx & 0x7fffffffu ) ;   // ~30bit gets up to a billion, ~2 bits spare
0395     q3.u.w |= flag ;         // OpticksPhoton.h flags up to  0x1 << 15 : bitwise combination needs 16 bits,  16 bits spare
0396     // hmm: could swap the general purpose identity for sensor index when this is a hit ?
0397 }
0398 
0399 SQUAD_METHOD void quad4::get_flags(unsigned& boundary, unsigned& identity, unsigned& idx, unsigned& flag, float& orient ) const
0400 {
0401     boundary = q3.u.x >> 16 ;
0402     flag = q3.u.x & 0xffffu ;
0403     identity = q3.u.y ;
0404     idx = ( q3.u.z & 0x7fffffffu ) ;
0405     orient = ( q3.u.z & 0x80000000u ) ? -1.f : 1.f ;
0406 }
0407 
0408 
0409 /**
0410 quad4::simtrace_globalPrimIdx
0411 ------------------------------
0412 
0413 NB simtrace layout is different from standard, see sevent::add_simtrace
0414 
0415 **/
0416 
0417 SQUAD_METHOD unsigned quad4::simtrace_globalPrimIdx() const
0418 {
0419     return q2.u.w >> 16 ;
0420 }
0421 
0422 
0423 
0424 
0425 
0426 
0427 
0428 
0429 
0430 
0431 
0432 
0433 
0434 
0435 
0436 
0437 
0438 struct quad4_selector
0439 {
0440     unsigned hitmask ;
0441     quad4_selector(unsigned hitmask_) : hitmask(hitmask_) {};
0442     SQUAD_METHOD bool operator() (const quad4& p) const { return ( p.flagmask() & hitmask ) == hitmask  ; }   // require all bits of the mask to be set
0443 };
0444 
0445 template <typename T>
0446 struct qselector
0447 {
0448     unsigned hitmask ;
0449     qselector(unsigned hitmask_) : hitmask(hitmask_) {};
0450     SQUAD_METHOD bool operator() (const T& p) const { return ( p.flagmask() & hitmask ) == hitmask  ; }   // require all bits of the mask to be set
0451 };
0452 
0453 
0454 
0455 
0456 
0457 
0458 /**
0459 Full example of numpy reporting of photon and record flags and boundaries in QUDARap/tests/mock_propagate.py::
0460 
0461     from opticks.ana.hismask import HisMask
0462     hm = HisMask()
0463 
0464     boundary_ = lambda p:p.view(np.uint32)[3,0] >> 16
0465     flag_     = lambda p:p.view(np.uint32)[3,0] & 0xffff
0466     identity_ = lambda p:p.view(np.uint32)[3,1]
0467     idx_      = lambda p:p.view(np.uint32)[3,2] & 0x7fffffff
0468     orient_   = lambda p:p.view(np.uint32)[3,2] >> 31
0469     flagmask_ = lambda p:p.view(np.uint32)[3,3]
0470     flagdesc_ = lambda p:" %4d %10s id:%6d ori:%d idx:%6d %10s " % ( boundary_(p), hm.label(flag_(p)), identity_(p), orient_(p), idx_(p), hm.label( flagmask_(p) ))
0471 
0472     from opticks.CSG.CSGFoundry import CSGFoundry
0473     cf = CSGFoundry.Load()
0474     bflagdesc_ = lambda p:"%s : %s " % ( flagdesc_(p), cf.bndnamedict[boundary_(p)] )
0475 
0476 **/
0477 
0478 
0479 struct quad6
0480 {
0481     quad q0 ;
0482     quad q1 ;
0483     quad q2 ;
0484     quad q3 ;
0485     quad q4 ;
0486     quad q5 ;
0487 
0488 
0489 #if defined(__CUDACC__) || defined(__CUDABE__)
0490 #else
0491     SQUAD_METHOD void zero();
0492     SQUAD_METHOD const float* cdata() const ;
0493     SQUAD_METHOD std::string desc() const ;
0494 
0495     SQUAD_METHOD unsigned gentype() const {   return q0.u.x ; }
0496     SQUAD_METHOD unsigned trackid() const {   return q0.u.y ; }
0497     SQUAD_METHOD unsigned matline() const {   return q0.u.z ; }
0498     SQUAD_METHOD unsigned numphoton() const { return q0.u.w ; }
0499 
0500     SQUAD_METHOD void set_gentype(  unsigned gt) { q0.u.x = gt ; }
0501     SQUAD_METHOD void set_trackid(  unsigned tk) { q0.u.y = tk ; }
0502     SQUAD_METHOD void set_matline(  unsigned ml) { q0.u.z = ml ; }
0503     SQUAD_METHOD void set_numphoton(unsigned np) { q0.u.w = np ; }
0504 
0505     template<typename T>
0506     SQUAD_METHOD void read_transform( const T* v16, bool skip_col3 );
0507 
0508 #endif
0509 
0510 };
0511 
0512 
0513 
0514 struct quad8
0515 {
0516     quad q0 ;
0517     quad q1 ;
0518     quad q2 ;
0519     quad q3 ;
0520     quad q4 ;
0521     quad q5 ;
0522     quad q6 ;
0523     quad q7 ;
0524 };
0525 
0526 
0527 
0528 
0529 #if defined(__CUDACC__) || defined(__CUDABE__)
0530 #else
0531 
0532 inline void quad6::zero()
0533 {
0534     q0.u.x = 0 ; q0.u.y = 0 ; q0.u.z = 0 ; q0.u.w = 0 ;
0535     q1.u.x = 0 ; q1.u.y = 0 ; q1.u.z = 0 ; q1.u.w = 0 ;
0536     q2.u.x = 0 ; q2.u.y = 0 ; q2.u.z = 0 ; q2.u.w = 0 ;
0537     q3.u.x = 0 ; q3.u.y = 0 ; q3.u.z = 0 ; q3.u.w = 0 ;
0538     q4.u.x = 0 ; q4.u.y = 0 ; q4.u.z = 0 ; q4.u.w = 0 ;
0539     q5.u.x = 0 ; q5.u.y = 0 ; q5.u.z = 0 ; q5.u.w = 0 ;
0540 }
0541 
0542 inline const float* quad6::cdata() const { return &q0.f.x ; }
0543 
0544 
0545 inline std::string quad6::desc() const
0546 {
0547     std::stringstream ss ;
0548     ss
0549         << " q0.i " << q0.i << std::endl
0550         << " q1.f " << q1.f << std::endl
0551         << " q2.f " << q2.f << std::endl
0552         << " q3.f " << q3.f << std::endl
0553         << " q4.f " << q4.f << std::endl
0554         << " q5.f " << q5.f << std::endl
0555         ;
0556     std::string str = ss.str();
0557     return str ;
0558 }
0559 
0560 
0561 template<typename T>
0562 inline void quad6::read_transform( const T* v, bool skip_col3  )
0563 {
0564     T zero(0);
0565     T one(1);
0566 
0567     T xx =  v[0] ; T xy =  v[1] ; T xz =  v[2] ; T xw = skip_col3 ? zero :  v[3] ;
0568     T yx =  v[4] ; T yy =  v[5] ; T yz =  v[6] ; T yw = skip_col3 ? zero :  v[7] ;
0569     T zx =  v[8] ; T zy =  v[9] ; T zz = v[10] ; T zw = skip_col3 ? zero : v[11] ;
0570     T wx = v[12] ; T wy = v[13] ; T wz = v[14] ; T ww = skip_col3 ?  one : v[15] ;
0571 
0572     q2.f.x = xx ; q2.f.y = xy ; q2.f.z = xz ; q2.f.w = xw ;
0573     q3.f.x = yx ; q3.f.y = yy ; q3.f.z = yz ; q3.f.w = yw ;
0574     q4.f.x = zx ; q4.f.y = zy ; q4.f.z = zz ; q4.f.w = zw ;
0575     q5.f.x = wx ; q5.f.y = wy ; q5.f.z = wz ; q5.f.w = ww ;
0576 }
0577 
0578 
0579 #endif
0580 
0581 
0582 
0583 #if defined(__CUDACC__) || defined(__CUDABE__)
0584 #else
0585 
0586 
0587 inline std::ostream& operator<<(std::ostream& os, const quad& q)
0588 {
0589     os
0590        << "f " << q.f
0591        << "i " << q.i
0592        << "u " << q.u
0593        ;
0594     return os;
0595 }
0596 
0597 
0598 
0599 
0600 inline std::ostream& operator<<(std::ostream& os, const quad4& v)
0601 {
0602     os
0603        << v.q0.f
0604        << v.q1.f
0605        << v.q2.f
0606        << v.q3.f
0607        ;
0608     return os;
0609 }
0610 
0611 
0612 
0613 
0614 inline std::ostream& operator<<(std::ostream& os, const quad6& v)
0615 {
0616     os
0617        << v.q0.f
0618        << v.q1.f
0619        << v.q2.f
0620        << v.q3.f
0621        << v.q4.f
0622        << v.q5.f
0623        ;
0624     return os;
0625 }
0626 
0627 
0628 /**
0629 qvals
0630 ------
0631 
0632 qvals extracts a sequence of floats from an environment string without
0633 specifying a delimiter between the floats instead simply the numerical
0634 digits and + - . are used to find the floats.
0635 
0636 **/
0637 
0638 inline void qvals( std::vector<float>& vals, const char* key, const char* fallback, int num_expect )
0639 {
0640     char* val = getenv(key);
0641     char* p = const_cast<char*>( ( val && strlen(val) > 0)  ? val : fallback );
0642     while (*p)
0643     {
0644         if( (*p >= '0' && *p <= '9') || *p == '+' || *p == '-' || *p == '.') vals.push_back(strtof(p, &p)) ;
0645         else p++ ;
0646     }
0647     if( num_expect > 0 ) assert( vals.size() == unsigned(num_expect) );
0648 }
0649 
0650 inline void qvals( std::vector<long>& vals, const char* key, const char* fallback, int num_expect )
0651 {
0652     char* val = getenv(key);
0653     char* p = const_cast<char*>( ( val && strlen(val) > 0) ? val : fallback );
0654     while (*p)
0655     {
0656         if( (*p >= '0' && *p <= '9') || *p == '+' || *p == '-' ) vals.push_back(strtol(p, &p, 10)) ;
0657         else p++ ;
0658     }
0659     if( num_expect > 0 ) assert( vals.size() == unsigned(num_expect) );
0660 }
0661 
0662 
0663 inline void qvals( float& v,   const char* key, const char* fallback )
0664 {
0665     std::vector<float> vals ;
0666     qvals( vals, key, fallback, 1 );
0667     v = vals[0] ;
0668 }
0669 
0670 inline float qenvfloat( const char* key, const char* fallback )
0671 {
0672     float v ;
0673     qvals(v, key, fallback);
0674     return v ;
0675 }
0676 
0677 inline const char* qenv(const char* key, const char* fallback )
0678 {
0679     char* val = getenv(key);
0680     char* p = const_cast<char*>( val ? val : fallback );
0681     return p ;
0682 }
0683 
0684 inline int qenvint( const char* key, const char* fallback )
0685 {
0686     const char* p = qenv(key, fallback);
0687     return std::atoi(p);
0688 }
0689 
0690 inline void qvals( float2& v,  const char* key, const char* fallback )
0691 {
0692     std::vector<float> vals ;
0693     qvals( vals, key, fallback, 2 );
0694     v.x = vals[0] ;
0695     v.y = vals[1] ;
0696 }
0697 inline void qvals( float3& v,  const char* key, const char* fallback )
0698 {
0699     std::vector<float> vals ;
0700     qvals( vals, key, fallback, 3 );
0701     v.x = vals[0] ;
0702     v.y = vals[1] ;
0703     v.z = vals[2] ;
0704 }
0705 
0706 inline void qvals( float3& v0,  float3& v1, const char* key, const char* fallback )
0707 {
0708     std::vector<float> vals ;
0709     qvals( vals, key, fallback, 6 );
0710 
0711     v0.x = vals[0] ;
0712     v0.y = vals[1] ;
0713     v0.z = vals[2] ;
0714 
0715     v1.x = vals[3] ;
0716     v1.y = vals[4] ;
0717     v1.z = vals[5] ;
0718 }
0719 
0720 inline void qvals( float4& v0,  float4& v1, const char* key, const char* fallback )
0721 {
0722     std::vector<float> vals ;
0723     qvals( vals, key, fallback, 8 );
0724 
0725     v0.x = vals[0] ;
0726     v0.y = vals[1] ;
0727     v0.z = vals[2] ;
0728     v0.w = vals[3] ;
0729 
0730     v1.x = vals[4] ;
0731     v1.y = vals[5] ;
0732     v1.z = vals[6] ;
0733     v1.w = vals[7] ;
0734 }
0735 
0736 inline void qvals( std::vector<float4>& v, const char* key, const char* fallback, bool normalize_ )
0737 {
0738     std::vector<float> vals ;
0739     qvals( vals, key, fallback, -1 );
0740 
0741     unsigned num_values = vals.size() ;
0742     assert( num_values % 4 == 0 );
0743 
0744     unsigned num_v = num_values/4 ;
0745     v.resize( num_v );
0746 
0747     for(unsigned i=0 ; i < num_v ; i++)
0748     {
0749         float4 vec = make_float4( vals[i*4+0], vals[i*4+1], vals[i*4+2], vals[i*4+3] );
0750         float3* v3 = (float3*)&vec.x ;
0751         if(normalize_) *v3 = normalize(*v3);
0752 
0753         v[i].x = vec.x ;
0754         v[i].y = vec.y ;
0755         v[i].z = vec.z ;
0756         v[i].w = vec.w ;
0757     }
0758 }
0759 
0760 inline void qvals( std::vector<float3>& v, const char* key, const char* fallback, bool normalize_ )
0761 {
0762     std::vector<float> vals ;
0763     qvals( vals, key, fallback, -1 );
0764 
0765     unsigned num_values = vals.size() ;
0766     assert( num_values % 3 == 0 );
0767 
0768     unsigned num_v = num_values/3 ;
0769     v.resize( num_v );
0770 
0771     for(unsigned i=0 ; i < num_v ; i++)
0772     {
0773         float3 vec = make_float3( vals[i*3+0], vals[i*3+1], vals[i*3+2]) ;
0774         if(normalize_) vec = normalize(vec);
0775 
0776         v[i].x = vec.x ;
0777         v[i].y = vec.y ;
0778         v[i].z = vec.z ;
0779     }
0780 }
0781 
0782 
0783 inline void qvals( float4& v,  const char* key, const char* fallback )
0784 {
0785     std::vector<float> vals ;
0786     qvals( vals, key, fallback, 4 );
0787     v.x = vals[0] ;
0788     v.y = vals[1] ;
0789     v.z = vals[2] ;
0790     v.w = vals[3] ;
0791 }
0792 
0793 inline int qvals4( float& x, float& y, float& z, float& w,  const char* key, const char* fallback )
0794 {
0795     std::vector<float> vals ;
0796     qvals( vals, key, fallback, 4 );
0797 
0798     x = vals[0] ;
0799     y = vals[1] ;
0800     z = vals[2] ;
0801     w = vals[3] ;
0802 
0803     return 0 ;
0804 }
0805 
0806 inline int qvals3( float& x, float& y, float& z, const char* key, const char* fallback )
0807 {
0808     std::vector<float> vals ;
0809     qvals( vals, key, fallback, 3 );
0810 
0811     x = vals[0] ;
0812     y = vals[1] ;
0813     z = vals[2] ;
0814 
0815     return 0 ;
0816 }
0817 
0818 
0819 
0820 inline void qvals( int& v,   const char* key, const char* fallback )
0821 {
0822     std::vector<long> vals ;
0823     qvals( vals, key, fallback, 1 );
0824     v = vals[0] ;
0825 }
0826 inline void qvals( int2& v,  const char* key, const char* fallback )
0827 {
0828     std::vector<long> vals ;
0829     qvals( vals, key, fallback, 2 );
0830     v.x = vals[0] ;
0831     v.y = vals[1] ;
0832 }
0833 inline void qvals( int3& v,  const char* key, const char* fallback )
0834 {
0835     std::vector<long> vals ;
0836     qvals( vals, key, fallback, 3 );
0837     v.x = vals[0] ;
0838     v.y = vals[1] ;
0839     v.z = vals[2] ;
0840 }
0841 inline void qvals( int4& v,  const char* key, const char* fallback )
0842 {
0843     std::vector<long> vals ;
0844     qvals( vals, key, fallback, 4 );
0845     v.x = vals[0] ;
0846     v.y = vals[1] ;
0847     v.z = vals[2] ;
0848     v.w = vals[3] ;
0849 }
0850 
0851 
0852 inline void qvals( uint3& v,  const char* key, const char* fallback, int num_expect )
0853 {
0854     std::vector<long> vals ;
0855     qvals( vals, key, fallback, num_expect );
0856     v.x = vals.size() > 0 ? vals[0] : -1 ;
0857     v.y = vals.size() > 1 ? vals[1] : -1 ;
0858     v.z = vals.size() > 2 ? vals[2] : -1 ;
0859 }
0860 
0861 
0862 
0863 
0864 inline std::string quad4::desc() const
0865 {
0866     std::stringstream ss ;
0867     ss
0868         << " post " << q0.f << std::endl
0869         << " momw " << q1.f << std::endl
0870         << " polw " << q2.f << std::endl
0871         << " flag " << q3.i << std::endl
0872         ;
0873     std::string s = ss.str();
0874     return s ;
0875 }
0876 
0877 inline NP* quad4::zeros(size_t num) // static
0878 {
0879     NP* qq = NP::Make<float>(num, 4, 4 );
0880     return qq ;
0881 }
0882 
0883 /**
0884 quad4::select_prim
0885 -------------------
0886 
0887 Select subset of the input simtrace array
0888 
0889 **/
0890 
0891 inline NP* quad4::select_prim( const NP* _simtrace, unsigned q_globalPrimIdx ) // static
0892 {
0893     quad4* simtrace = (quad4*)(_simtrace->bytes()) ;
0894     size_t num_simtrace = _simtrace->shape[0] ;
0895 
0896     size_t count0 = 0 ;
0897     for(size_t i=0 ; i < num_simtrace ; i++) if( simtrace[i].simtrace_globalPrimIdx() == q_globalPrimIdx ) count0 += 1 ;
0898 
0899     NP* _qq = zeros(count0);
0900     quad4* qq = (quad4*)(_qq->bytes());
0901 
0902     size_t count1 = 0 ;
0903     for(size_t i=0 ; i < num_simtrace ; i++)
0904     {
0905         const quad4& q = simtrace[i];
0906         if( q.simtrace_globalPrimIdx() != q_globalPrimIdx ) continue ;
0907         qq[count1] = q ;
0908         count1 += 1 ;
0909     }
0910     assert( count0 == count1 );
0911     return _qq ;
0912 }
0913 
0914 
0915 
0916 
0917 
0918 
0919 
0920 /**
0921 quad4::ephoton
0922 ---------------
0923 
0924 *ephoton* is used from qudarap/tests/QSimTest generate tests as the initial photon,
0925 which gets persisted to p0.npy
0926 The script qudarap/tests/ephoton.sh sets the envvars defining the photon
0927 depending on the TEST envvar.
0928 
0929 **/
0930 
0931 inline void quad4::ephoton()
0932 {
0933     qvals( q0.f ,      "EPHOTON_POST" , "0,0,0,0" );                      // position, time
0934     qvals( q1.f, q2.f, "EPHOTON_MOMW_POLW", "1,0,0,1,0,1,0,500" );  // direction, weight,  polarization, wavelength
0935     qvals( q3.i ,      "EPHOTON_FLAG", "0,0,0,0" );
0936     normalize_mom_pol();
0937     transverse_mom_pol();
0938 }
0939 
0940 inline void quad4::normalize_mom_pol()
0941 {
0942     float3* mom = (float3*)&q1.f.x ;
0943     float3* pol = (float3*)&q2.f.x ;
0944 
0945     *mom = normalize(*mom);
0946     *pol = normalize(*pol);
0947 }
0948 
0949 inline void quad4::transverse_mom_pol()
0950 {
0951     float3* mom = (float3*)&q1.f.x ;
0952     float3* pol = (float3*)&q2.f.x ;
0953     float mom_pol = fabsf( dot(*mom, *pol)) ;
0954     float eps = 1e-5 ;
0955     bool is_transverse = mom_pol < eps ;
0956 
0957     if(!is_transverse )
0958     {
0959         std::cout
0960              << " quad4::transverse_mom_pol "
0961              << " FAIL "
0962              << " mom " << *mom
0963              << " pol " << *pol
0964              << " mom_pol " << mom_pol
0965              << " eps " << eps
0966              << " is_transverse " << is_transverse
0967              << std::endl
0968              ;
0969     }
0970     assert(is_transverse);
0971 }
0972 
0973 
0974 inline float* quad4::get_v(int i) const
0975 {
0976     float* v = nullptr ;
0977     switch(i)
0978     {
0979        case 0: v = (float*)&q0.f.x ; break ;
0980        case 1: v = (float*)&q1.f.x ; break ;
0981        case 2: v = (float*)&q2.f.x ; break ;
0982        case 3: v = (float*)&q3.f.x ; break ;
0983     }
0984     return v ;
0985 }
0986 
0987 inline void quad4::set_v(int i, const double* src, int n) // narrowing
0988 {
0989     if(src == nullptr) return ;
0990     assert( n > -1 && n <= 4 );
0991     assert( i > -1 && i < 4 );
0992     float* v = get_v(i);
0993     if(v == nullptr) return ;
0994     for(int j=0 ; j < n ; j++)  v[j] = float(src[j]) ;
0995 }
0996 
0997 inline void quad4::zero_v(int i, int n)
0998 {
0999     assert( n > -1 && n <= 4 );
1000     assert( i > -1 && i < 4 );
1001     float* v = get_v(i);
1002     for(int j=0 ; j < n ; j++)  v[j] = 0.f ;
1003 }
1004 
1005 
1006 inline void quad4::load(const std::array<double,16>& a )
1007 {
1008     for(int i=0 ; i < 4 ; i++) set_v(i, &a[4*i], 4) ;
1009 }
1010 inline void quad4::load(const double* src, int n )
1011 {
1012     if(src == nullptr) return ;
1013     for(int s=0 ; s < 16 ; s++)
1014     {
1015         int i = s/4 ;
1016         int j = s%4 ;
1017         float* v = get_v(i) ;
1018         *(v+j) = s < n ? float(src[s]) : 0. ;
1019     }
1020 }
1021 
1022 
1023 
1024 
1025 
1026 inline quad4 quad4::make_ephoton()  // static
1027 {
1028     quad4 q ;
1029     q.ephoton();
1030     return q ;
1031 }
1032 
1033 
1034 inline void quad2::eprd()
1035 {
1036    qvals( q0.f, "EPRD_NRMT", "-1,0,0,100" );
1037    qvals( q1.i, "EPRD_FLAG", "101,0,0,10" );
1038    float3* nrm = normal();
1039    *nrm = normalize( *nrm );
1040 }
1041 
1042 inline quad2 quad2::make_eprd()  // static
1043 {
1044     quad2 prd ;
1045     prd.eprd();
1046     return prd ;
1047 }
1048 
1049 
1050 inline std::string quad2::desc() const
1051 {
1052     std::stringstream ss ;
1053     ss
1054         << " nrmt " << q0.f
1055         << " lpct " << q1.f.x
1056         << " flag ("
1057         << std::setw(5) << q1.i.y << " "
1058         << std::setw(5) << q1.i.z << " "
1059         << std::setw(5) << q1.i.w << " "
1060         << ")"
1061         ;
1062     std::string s = ss.str();
1063     return s ;
1064 }
1065 
1066 
1067 #endif
1068 
1069 
1070 
1071