Commit c10f614f authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent dcba5844
......@@ -9,6 +9,8 @@ public:
Net *fcNet1;
Net *fcNet2;
Net *catNet;
uint batchSize;
uint samples;
Nmsl2(int batchSize, int samples);
......@@ -18,6 +20,6 @@ public:
virtual bool infer();
virtual bool dispose();
virtual void dispose();
};
#include "Renderer.h"
#include "thread_index.h"
#include "../utils/cuda.h"
/// Dispatch (n, 1, 1)
__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint n)
{
/// Dispatch (n_rays, -)
__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint nRays) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
uint rayIdx = idx3.x;
if (rayIdx >= nRays)
return;
glm::vec4 outColor;
for (int i = samples - 1; i >= 0; --i)
{
glm::vec4 c = layeredColors[idx3.x * samples + i];
for (int si = samples - 1; si >= 0; --si) {
glm::vec4 c = layeredColors[rayIdx * samples + si];
outColor = outColor * (1 - c.a) + c * c.a;
}
outColor.a = 1.0f;
o_colors[idx3.x] = outColor;
}
Renderer::Renderer()
{
}
Renderer::Renderer() {}
void Renderer::render(sptr<CudaArray<glm::vec4>> o_colors,
sptr<CudaArray<glm::vec4>> layeredColors)
{
dim3 blockSize(1024);
dim3 gridSize((uint)ceil(o_colors->n() / (float)blockSize.x));
cu_render<<<gridSize, blockSize>>>(*o_colors, *layeredColors, layeredColors->n() / o_colors->n(),
o_colors->n());
sptr<CudaArray<glm::vec4>> layeredColors) {
dim3 blkSize(1024);
dim3 grdSize(ceilDiv(o_colors->n(), blkSize.x));
CU_INVOKE(cu_render)
(*o_colors, *layeredColors, layeredColors->n() / o_colors->n(), o_colors->n());
CHECK_EX(cudaGetLastError());
}
\ No newline at end of file
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Renderer {
public:
Renderer();
/**
* @brief
*
* @param o_colors
* @param layeredColors
*/
void render(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec4>> layeredColors);
};
\ No newline at end of file
#pragma once
#include <map>
#include <vector>
class Resource
{
public:
virtual ~Resource() {}
virtual void *getBuffer() const = 0;
virtual size_t size() const = 0;
};
class CudaBuffer : public Resource
{
public:
CudaBuffer(void *buffer = nullptr, size_t size = 0) : _buffer(buffer), _ownBuffer(false), _size(size) {}
CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size)
{
CHECK_EX(cudaMalloc(&_buffer, size));
}
CudaBuffer(const CudaBuffer &rhs) = delete;
virtual ~CudaBuffer()
{
if (!_ownBuffer || _buffer == nullptr)
return;
try
{
CHECK_EX(cudaFree(_buffer));
}
catch (std::exception &ex)
{
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_buffer = nullptr;
_ownBuffer = false;
}
virtual void *getBuffer() const { return _buffer; }
virtual size_t size() const { return _size; }
private:
void *_buffer;
bool _ownBuffer;
size_t _size;
};
template <typename T>
class CudaArray : public CudaBuffer
{
public:
CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
CudaArray(const CudaArray<T> &rhs) = delete;
size_t n() const { return size() / sizeof(T); }
operator T *() { return (T *)getBuffer(); }
};
class GraphicsResource : public Resource
{
public:
cudaGraphicsResource_t getHandler() { return _res; }
virtual ~GraphicsResource()
{
if (_res == nullptr)
return;
try
{
CHECK_EX(cudaGraphicsUnregisterResource(_res));
}
catch (std::exception &ex)
{
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_res = nullptr;
}
virtual size_t size() const { return _size; }
protected:
cudaGraphicsResource_t _res;
size_t _size;
GraphicsResource() : _res(nullptr), _size(0) {}
};
template<typename T>
class GlTextureResource : public GraphicsResource
{
public:
GlTextureResource(GLuint textureID, glm::uvec2 textureSize)
{
CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsWriteDiscard));
_size = textureSize.x * textureSize.y * sizeof(T);
_textureSize = textureSize;
}
virtual ~GlTextureResource()
{
cudaGraphicsUnmapResources(1, &_res, 0);
}
virtual void *getBuffer() const {
cudaArray_t buffer;
try{
CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
} catch (...) {
return nullptr;
}
return buffer;
}
operator T *() { return (T *)getBuffer(); }
glm::uvec2 textureSize () { return _textureSize; }
private:
glm::uvec2 _textureSize;
};
class Resources
{
public:
std::map<std::string, Resource *> resources;
std::vector<cudaGraphicsResource_t> graphicsResources;
void addResource(const std::string &name, Resource *res)
{
auto gres = dynamic_cast<GraphicsResource *>(res);
if (gres != nullptr)
graphicsResources.push_back(gres->getHandler());
resources[name] = res;
}
void clear()
{
resources.clear();
graphicsResources.clear();
}
};
template <typename T>
void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
size_t elemsPerRow = 1)
{
T *hostArr = new T[arr.n()];
cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T),
sizeof(T) / sizeof(float) * elemsPerRow, maxDumpRows);
delete[] hostArr;
}
\ No newline at end of file
#include "Sampler.h"
#include "thread_index.h"
#include "../utils/cuda.h"
__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth)
{
__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) {
float pp = glm::dot(p, p);
float vv = glm::dot(v, v);
float pv = glm::dot(p, v);
......@@ -10,36 +9,36 @@ __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, floa
return p + o_depth * v;
}
__device__ float _getAngle(float x, float y)
{
__device__ float _getAngle(float x, float y) {
return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI;
}
/**
* Dispatch with block_size=(n_samples, 1024)
* Dispatch with block_size=(n_samples, *), grid_size=(1, nRays/*)
* Index with (sample_idx, ray_idx)
*/
__global__ void cu_sampleOnRays(glm::vec3 *o_sphericalCoords, float *o_depths, glm::vec3 *rays,
glm::vec3 rayCenter, float range0, float rangeStep, uint n)
{
__global__ void cu_sampleOnRays(float *o_coords, float *o_depths, glm::vec3 *rays, uint nRays,
glm::vec3 origin, Range range, bool outputRadius) {
glm::uvec3 idx3 = IDX3;
uint rayIdx = flattenIdx({idx3.y, idx3.z, 0});
if (rayIdx >= n)
return;
uint idx = flattenIdx(idx3);
float r_reciprocal = rangeStep * idx3.x + range0;
glm::vec3 p = _raySphereIntersect(rayCenter, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
o_sphericalCoords[idx] = glm::vec3(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
uint sampleIdx = idx3.x;
uint rayIdx = idx3.y;
if (rayIdx >= nRays)
return;
float r_reciprocal = range.get(sampleIdx);
glm::vec3 p = _raySphereIntersect(origin, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
glm::vec3 sp(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
if (outputRadius)
((glm::vec3 *)o_coords)[idx] = sp;
else
((glm::vec2 *)o_coords)[idx] = {sp.y, sp.z};
}
void Sampler::sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayCenter)
{
dim3 blockSize(_samples, 1024 / _samples);
dim3 gridSize(1, (uint)ceil(rays->n() / (float)blockSize.y));
cu_sampleOnRays<<<gridSize, blockSize>>>(*o_sphericalCoords, *o_depths, *rays, rayCenter,
_dispRange.x,
(_dispRange.y - _dispRange.x) / (_samples - 1),
rays->n());
void Sampler::sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter) {
dim3 blkSize(_dispRange.steps(), 1024 / _dispRange.steps());
dim3 grdSize(1, (uint)ceil(rays->n() / (float)blkSize.y));
CU_INVOKE(cu_sampleOnRays)
(*o_coords, *o_depths, *rays, rays->n(), rayCenter, _dispRange, _outputRadius);
CHECK_EX(cudaGetLastError());
}
\ No newline at end of file
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Sampler
{
class Sampler {
public:
Sampler(glm::vec2 depthRange, uint samples) : _dispRange(1.0f / depthRange.x, 1.0f / depthRange.y),
_samples(samples) {}
Sampler(glm::vec2 depthRange, uint samples, bool outputRadius)
: _dispRange(1.0f / depthRange, samples), _outputRadius(outputRadius) {}
void sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords,
sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayCenter);
void sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter);
private:
glm::vec2 _dispRange;
uint _samples;
Range _dispRange;
bool _outputRadius;
};
\ No newline at end of file
#include "SynthesisPipeline.h"
SynthesisPipeline::SynthesisPipeline(
const std::string &netDir, bool isNmsl, uint batchSize,
uint samples) : _batchSize(batchSize),
_samples(samples),
_inferPipeline(new InferPipeline(netDir, isNmsl, batchSize, samples)),
_rays(new CudaArray<glm::vec3>(batchSize)),
_colors(new CudaArray<glm::vec4>(batchSize))
{
_glResultBuffer = _createGlResultBuffer(_batchSize);
SynthesisPipeline::SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples,
glm::vec2 depthRange, int encodeDim, int coordChns,
float enhanceSigma, float enhanceFe)
: _nRays(cam->res().x * cam->res().y),
_nSamples(nSamples),
_enhanceSigma(enhanceSigma),
_enhanceFe(enhanceFe),
_cam(cam),
_inferPipeline(new InferPipeline(net, _nRays, nSamples, depthRange, encodeDim, coordChns)),
_enhancement(new Enhancement(cam->res())),
_rays(new CudaArray<glm::vec3>(_nRays)),
_colors(new CudaArray<glm::vec4>(_nRays)) {
_glResultBuffer = _createGlResultBuffer(_nRays);
_glResultTextures.push_back(_createGlResultTexture(_cam->res()));
}
void SynthesisPipeline::run(View &view)
{
void SynthesisPipeline::run(View &view) {
CudaEvent eStart, eGenRays, eInferred, eEnhanced;
cudaEventRecord(eStart);
......@@ -38,8 +42,9 @@ void SynthesisPipeline::run(View &view)
cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
{
std::ostringstream sout;
sout << typeid(*this).name() << " => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
<< "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
sout << typeid(*this).name() << " => Total: " << timeTotal
<< "ms (Gen rays: " << timeGenRays << "ms, Infer: " << timeInfer
<< "ms, Enhance: " << timeEnhance << "ms)";
Logger::instance.info(sout.str());
}
......@@ -53,13 +58,21 @@ void SynthesisPipeline::run(View &view)
_uploadResultToTextures();
}
GLuint SynthesisPipeline::getGlResultTexture(int index)
{
return _glResultTextures[index];
GLuint SynthesisPipeline::getGlResultTexture(int index) { return _glResultTextures[index]; }
void SynthesisPipeline::_genRays(View &view) { view.transVectors(_rays, _cam->localRays()); }
void SynthesisPipeline::_enhance() { _enhancement->run(_colors, _enhanceSigma, _enhanceFe); }
void SynthesisPipeline::_uploadResultToTextures() {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _cam->res().x, _cam->res().y, GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
{
GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) {
GLuint textureID;
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &textureID);
......@@ -72,91 +85,11 @@ GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
return textureID;
}
GLuint SynthesisPipeline::_createGlResultBuffer(uint elements)
{
GLuint SynthesisPipeline::_createGlResultBuffer(uint elements) {
GLuint glBuffer;
glGenBuffers(1, &glBuffer);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBuffer);
glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4),
nullptr, GL_STREAM_DRAW);
glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
return glBuffer;
}
FoveaSynthesisPipeline::FoveaSynthesisPipeline(
glm::uvec2 res, float fov,
uint samples) : SynthesisPipeline("../nets/fovea_mono/", false, res.x * res.y, samples),
_foveaCamera(fov, res / 2u, res),
_enhancement(new Enhancement(res))
{
_glResultTextures.push_back(_createGlResultTexture(res));
}
void FoveaSynthesisPipeline::_genRays(View &view)
{
view.transVectors(_rays, _foveaCamera.localRays());
}
void FoveaSynthesisPipeline::_enhance()
{
_enhancement->run(_colors, 3.0f, 0.2f);
}
void FoveaSynthesisPipeline::_uploadResultToTextures()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _foveaCamera.res().x, _foveaCamera.res().y,
GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
PeriphSynthesisPipeline::PeriphSynthesisPipeline(
glm::uvec2 midRes, float midFov, glm::uvec2 periphRes, float periphFov,
uint samples) : SynthesisPipeline("../nets/periph/", false,
midRes.x * midRes.y + periphRes.x * periphRes.y,
samples),
_midCamera(midFov, midRes / 2u, midRes),
_periphCamera(periphFov, periphRes / 2u, periphRes),
_midEnhancement(new Enhancement(midRes)),
_periphEnhancement(new Enhancement(periphRes))
{
uint midPixels = midRes.x * midRes.y;
uint periphPixels = periphRes.x * periphRes.y;
_midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*_rays, midPixels));
_periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(
(glm::vec3 *)*_rays + midPixels, periphPixels));
_glResultTextures.push_back(_createGlResultTexture(midRes));
_glResultTextures.push_back(_createGlResultTexture(periphRes));
_midColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(*_colors, midPixels));
_periphColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(
(glm::vec4 *)*_colors + midPixels, periphPixels));
}
void PeriphSynthesisPipeline::_genRays(View &view)
{
view.transVectors(_midRays, _midCamera.localRays());
view.transVectors(_periphRays, _periphCamera.localRays());
}
void PeriphSynthesisPipeline::_enhance()
{
_midEnhancement->run(_midColors, 5.0f, 0.2f);
_periphEnhancement->run(_periphColors, 5.0f, 0.2f);
}
void PeriphSynthesisPipeline::_uploadResultToTextures()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _midCamera.res().x, _midCamera.res().y,
GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
_periphCamera.res().x, _periphCamera.res().y, GL_RGBA, GL_FLOAT,
(void *)(_midCamera.res().x * _midCamera.res().y * sizeof(glm::vec4)));
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
}
\ No newline at end of file
#pragma once
#include "Common.h"
#include "../utils/common.h"
#include "InferPipeline.h"
#include "View.h"
#include "Enhancement.h"
class SynthesisPipeline
{
class SynthesisPipeline {
public:
SynthesisPipeline(const std::string &netDir, bool isNmsl,
uint batchSize, uint samples);
SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples, glm::vec2 depthRange,
int encodeDim, int coordChns, float enhanceSigma, float enhanceFe);
void run(View& view);
void run(View &view);
GLuint getGlResultTexture(int index);
protected:
uint _batchSize;
uint _samples;
std::vector<GLuint> _glResultTextures;
GLuint _glResultBuffer;
uint _nRays;
uint _nSamples;
float _enhanceSigma;
float _enhanceFe;
sptr<Camera> _cam;
sptr<InferPipeline> _inferPipeline;
sptr<Enhancement> _enhancement;
sptr<CudaArray<glm::vec3>> _rays;
sptr<CudaArray<glm::vec4>> _colors;
std::vector<GLuint> _glResultTextures;
GLuint _glResultBuffer;
virtual void _genRays(View& view) = 0;
virtual void _enhance() = 0;
virtual void _uploadResultToTextures() = 0;
void _genRays(View &view);
void _enhance();
void _uploadResultToTextures();
GLuint _createGlResultTexture(glm::uvec2 res);
GLuint _createGlResultBuffer(uint elements);
};
class FoveaSynthesisPipeline : public SynthesisPipeline
{
public:
FoveaSynthesisPipeline(glm::uvec2 res, float fov, uint samples);
protected:
virtual void _genRays(View& view);
virtual void _enhance();
virtual void _uploadResultToTextures();
private:
Camera _foveaCamera;
sptr<Enhancement> _enhancement;
};
class PeriphSynthesisPipeline : public SynthesisPipeline
{
public:
PeriphSynthesisPipeline(glm::uvec2 midRes, float midFov,
glm::uvec2 periphRes, float periphFov,
uint samples);
protected:
virtual void _genRays(View& view);
virtual void _enhance();
virtual void _uploadResultToTextures();
private:
Camera _midCamera;
Camera _periphCamera;
sptr<CudaArray<glm::vec3>> _midRays;
sptr<CudaArray<glm::vec3>> _periphRays;
sptr<CudaArray<glm::vec4>> _midColors;
sptr<CudaArray<glm::vec4>> _periphColors;
sptr<Enhancement> _midEnhancement;
sptr<Enhancement> _periphEnhancement;
};
\ No newline at end of file
#include "View.h"
#include <cuda_runtime.h>
#include "thread_index.h"
#include "../utils/cuda.h"
__global__ void cu_genLocalRays(glm::vec3 *o_rays, glm::vec2 f, glm::vec2 c, glm::uvec2 res)
{
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Camera {
......
OUTNAME_RELEASE = msl_infer_test
OUTNAME_DEBUG = msl_infer_test_debug
EXTRA_DIRECTORIES = ../msl_infer
EXTRA_DIRECTORIES = ../msl_infer ../utils
.NOTPARALLEL:
MAKEFILE ?= ../Makefile.config
include $(MAKEFILE)
This diff is collapsed.
......@@ -39,7 +39,7 @@ public:
return false;
}
virtual void log(nv::ILogger::Severity severity, const char* msg) override {
virtual void log(nv::ILogger::Severity severity, const char* msg) noexcept {
if ((int)severity > logLevel)
return;
if (externalLogFunc == nullptr) {
......
#pragma once
#include <map>
#include <vector>
class Resource {
public:
virtual ~Resource() {}
virtual void *getBuffer() const = 0;
virtual size_t size() const = 0;
};
class CudaBuffer : public Resource {
public:
CudaBuffer(void *buffer = nullptr, size_t size = 0)
: _buffer(buffer), _ownBuffer(false), _size(size) {}
CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size) {
CHECK_EX(cudaMalloc(&_buffer, size));
}
CudaBuffer(const CudaBuffer &rhs) = delete;
virtual ~CudaBuffer() {
if (!_ownBuffer || _buffer == nullptr)
return;
try {
CHECK_EX(cudaFree(_buffer));
} catch (std::exception &ex) {
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_buffer = nullptr;
_ownBuffer = false;
}
virtual void *getBuffer() const { return _buffer; }
template <class T> T *getBuffer() const { return (T *)getBuffer(); }
virtual size_t size() const { return _size; }
private:
void *_buffer;
bool _ownBuffer;
size_t _size;
};
template <typename T> class CudaArray : public CudaBuffer {
public:
CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
CudaArray(const CudaArray<T> &rhs) = delete;
size_t n() const { return size() / sizeof(T); }
operator T *() { return (T *)getBuffer(); }
};
class GraphicsResource : public Resource {
public:
cudaGraphicsResource_t getHandler() { return _res; }
virtual ~GraphicsResource() {
if (_res == nullptr)
return;
try {
CHECK_EX(cudaGraphicsUnregisterResource(_res));
} catch (std::exception &ex) {
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_res = nullptr;
}
virtual size_t size() const { return _size; }
protected:
cudaGraphicsResource_t _res;
size_t _size;
GraphicsResource() : _res(nullptr), _size(0) {}
};
template <typename T> class GlTextureResource : public GraphicsResource {
public:
GlTextureResource(GLuint textureID, glm::uvec2 textureSize) {
CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsWriteDiscard));
_size = textureSize.x * textureSize.y * sizeof(T);
_textureSize = textureSize;
}
virtual ~GlTextureResource() { cudaGraphicsUnmapResources(1, &_res, 0); }
virtual void *getBuffer() const {
cudaArray_t buffer;
try {
CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
} catch (...) {
return nullptr;
}
return buffer;
}
operator T *() { return (T *)getBuffer(); }
glm::uvec2 textureSize() { return _textureSize; }
private:
glm::uvec2 _textureSize;
};
class Resources {
public:
std::map<std::string, Resource *> resources;
std::vector<cudaGraphicsResource_t> graphicsResources;
void addResource(const std::string &name, Resource *res) {
auto gres = dynamic_cast<GraphicsResource *>(res);
if (gres != nullptr)
graphicsResources.push_back(gres->getHandler());
resources[name] = res;
}
void clear() {
resources.clear();
graphicsResources.clear();
}
};
template <typename T>
void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
size_t elemsPerRow = 1) {
T *hostArr = new T[arr.n()];
cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T), sizeof(T) / sizeof(float) * elemsPerRow,
maxDumpRows);
delete[] hostArr;
}
\ No newline at end of file
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"
inline unsigned int getElementSize(nv::DataType t) {
switch (t) {
case nv::DataType::kINT32:
return 4;
case nv::DataType::kFLOAT:
return 4;
case nv::DataType::kHALF:
return 2;
case nv::DataType::kBOOL:
case nv::DataType::kINT8:
return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
template <typename T> void dumpRow(std::ostream &os, T *buf, size_t n) {
os << buf[0];
for (size_t i = 1; i < n; ++i) {
os << " " << buf[i];
}
os << std::endl;
}
template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount,
size_t maxDumpRows = 0) {
T *typedBuf = static_cast<T *>(buf);
size_t numItems = bufSize / sizeof(T);
size_t nInLastRow = numItems % rowCount;
size_t rows;
if (nInLastRow == 0) {
rows = numItems / rowCount;
nInLastRow = rowCount;
} else {
rows = numItems / rowCount + 1;
}
if (maxDumpRows == 0) {
for (size_t i = 0; i < rows - 1; ++i) {
dumpRow(os, typedBuf, rowCount);
typedBuf += rowCount;
}
dumpRow(os, typedBuf, nInLastRow);
} else {
for (size_t i = 0; i < maxDumpRows / 2; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
os << "..." << std::endl;
for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
}
}
class CudaStream {
public:
CudaStream() { cudaStreamCreate(&stream); }
operator cudaStream_t() { return stream; }
virtual ~CudaStream() { cudaStreamDestroy(stream); }
private:
cudaStream_t stream;
};
class CudaEvent {
public:
CudaEvent() { cudaEventCreate(&mEvent); }
operator cudaEvent_t() { return mEvent; }
virtual ~CudaEvent() { cudaEventDestroy(mEvent); }
private:
cudaEvent_t mEvent;
};
struct CudaMapScope {
std::vector<cudaGraphicsResource_t> resources_;
cudaStream_t stream_;
CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
cudaStream_t stream = nullptr)
: resources_(resources), stream_(stream) {}
~CudaMapScope() {
if (!resources_.empty())
cudaGraphicsUnmapResources(resources_.size(), resources_.data(), stream_);
}
cudaError_t map() {
if (!resources_.empty())
return cudaGraphicsMapResources(resources_.size(), resources_.data(), stream_);
return cudaSuccess;
}
};
template <typename T> struct Destroy {
void operator()(T *t) {
if (t != nullptr)
t->destroy();
}
};
class Range {
public:
Range(glm::vec2 bound, uint steps)
: _start(bound.x), _step((bound.y - bound.x) / (steps - 1)), _steps(steps) {}
__host__ __device__ float get(uint i) { return _start + i * _step; }
__host__ __device__ float start() { return _start; }
__host__ __device__ float stop() { return _start + _step * _steps; }
__host__ __device__ float steps() { return _steps; }
private:
float _start;
float _step;
uint _steps;
};
template <class T> using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T> using sptr = std::shared_ptr<T>;
#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
#include "Resource.h"
#include "Formatter.h"
\ No newline at end of file
#include "thread_index.h"
#ifdef __INTELLISENSE__
#define CU_INVOKE(__func__) __func__
#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__
#else
#define CU_INVOKE(__func__) __func__<<<grdSize, blkSize>>>
#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__<<<__grdSize__, __blkSize__>>>
#endif
inline uint ceilDiv(uint a, uint b) { return (uint)ceil(a / (float)b); }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment