#include #include #define T_IDX threadIdx.x #define T_IDX2 glm::uvec2(threadIdx.x, threadIdx.y) #define T_IDX3 glm::uvec3(threadIdx.x, threadIdx.y, threadIdx.z) #define B_IDX blockIdx.x #define B_IDX2 glm::uvec2(blockIdx.x, blockIdx.y) #define B_IDX3 glm::uvec3(blockIdx.x, blockIdx.y, blockIdx.z) #define IDX blockIdx.x *blockDim.x + threadIdx.x #define IDX2 glm::uvec2(blockIdx.x *blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y) #define IDX3 \ glm::uvec3(blockIdx.x *blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y, \ blockIdx.z * blockDim.z + threadIdx.z) #define FLAT_INDEX utils::cuda::flattenIdx(IDX3) #define DEFINE_IDX(__var1__) uint __var1__ = blockIdx.x * blockDim.x + threadIdx.x; #define DEFINE_IDX2(__var1__, __var2__) \ uint __var1__ = blockIdx.x * blockDim.x + threadIdx.x; \ uint __var2__ = blockIdx.y * blockDim.y + threadIdx.y; #define DEFINE_IDX3(__var1__, __var2__, __var3__) \ uint __var1__ = blockIdx.x * blockDim.x + threadIdx.x; \ uint __var2__ = blockIdx.y * blockDim.y + threadIdx.y; \ uint __var3__ = blockIdx.z * blockDim.z + threadIdx.z; #define DEFINE_FLAT_INDEX(__var1__) uint __var1__ = FLAT_INDEX; namespace utils::cuda { __device__ __forceinline__ uint flattenIdx(glm::uvec3 idx3) { return idx3.x + idx3.y * blockDim.x * gridDim.x + idx3.z * blockDim.x * gridDim.x * blockDim.y * gridDim.y; } __device__ __forceinline__ uint flattenIdx(glm::uvec2 idx2) { return idx2.x + idx2.y * blockDim.x * gridDim.x; } } // namespace utils::cuda