Commit c10f614f authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent dcba5844
...@@ -9,6 +9,8 @@ public: ...@@ -9,6 +9,8 @@ public:
Net *fcNet1; Net *fcNet1;
Net *fcNet2; Net *fcNet2;
Net *catNet; Net *catNet;
uint batchSize;
uint samples;
Nmsl2(int batchSize, int samples); Nmsl2(int batchSize, int samples);
...@@ -18,6 +20,6 @@ public: ...@@ -18,6 +20,6 @@ public:
virtual bool infer(); virtual bool infer();
virtual bool dispose(); virtual void dispose();
}; };
#include "Renderer.h" #include "Renderer.h"
#include "thread_index.h" #include "../utils/cuda.h"
/// Dispatch (n, 1, 1) /// Dispatch (n_rays, -)
__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint n) __global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint nRays) {
{
glm::uvec3 idx3 = IDX3; glm::uvec3 idx3 = IDX3;
if (idx3.x >= n) uint rayIdx = idx3.x;
if (rayIdx >= nRays)
return; return;
glm::vec4 outColor; glm::vec4 outColor;
for (int i = samples - 1; i >= 0; --i) for (int si = samples - 1; si >= 0; --si) {
{ glm::vec4 c = layeredColors[rayIdx * samples + si];
glm::vec4 c = layeredColors[idx3.x * samples + i];
outColor = outColor * (1 - c.a) + c * c.a; outColor = outColor * (1 - c.a) + c * c.a;
} }
outColor.a = 1.0f; outColor.a = 1.0f;
o_colors[idx3.x] = outColor; o_colors[idx3.x] = outColor;
} }
Renderer::Renderer() Renderer::Renderer() {}
{
}
void Renderer::render(sptr<CudaArray<glm::vec4>> o_colors, void Renderer::render(sptr<CudaArray<glm::vec4>> o_colors,
sptr<CudaArray<glm::vec4>> layeredColors) sptr<CudaArray<glm::vec4>> layeredColors) {
{ dim3 blkSize(1024);
dim3 blockSize(1024); dim3 grdSize(ceilDiv(o_colors->n(), blkSize.x));
dim3 gridSize((uint)ceil(o_colors->n() / (float)blockSize.x)); CU_INVOKE(cu_render)
cu_render<<<gridSize, blockSize>>>(*o_colors, *layeredColors, layeredColors->n() / o_colors->n(), (*o_colors, *layeredColors, layeredColors->n() / o_colors->n(), o_colors->n());
o_colors->n());
CHECK_EX(cudaGetLastError()); CHECK_EX(cudaGetLastError());
} }
\ No newline at end of file
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Renderer { class Renderer {
public: public:
Renderer(); Renderer();
/**
* @brief
*
* @param o_colors
* @param layeredColors
*/
void render(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec4>> layeredColors); void render(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec4>> layeredColors);
}; };
\ No newline at end of file
#pragma once
#include <map>
#include <vector>
class Resource
{
public:
virtual ~Resource() {}
virtual void *getBuffer() const = 0;
virtual size_t size() const = 0;
};
class CudaBuffer : public Resource
{
public:
CudaBuffer(void *buffer = nullptr, size_t size = 0) : _buffer(buffer), _ownBuffer(false), _size(size) {}
CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size)
{
CHECK_EX(cudaMalloc(&_buffer, size));
}
CudaBuffer(const CudaBuffer &rhs) = delete;
virtual ~CudaBuffer()
{
if (!_ownBuffer || _buffer == nullptr)
return;
try
{
CHECK_EX(cudaFree(_buffer));
}
catch (std::exception &ex)
{
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_buffer = nullptr;
_ownBuffer = false;
}
virtual void *getBuffer() const { return _buffer; }
virtual size_t size() const { return _size; }
private:
void *_buffer;
bool _ownBuffer;
size_t _size;
};
template <typename T>
class CudaArray : public CudaBuffer
{
public:
CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
CudaArray(const CudaArray<T> &rhs) = delete;
size_t n() const { return size() / sizeof(T); }
operator T *() { return (T *)getBuffer(); }
};
class GraphicsResource : public Resource
{
public:
cudaGraphicsResource_t getHandler() { return _res; }
virtual ~GraphicsResource()
{
if (_res == nullptr)
return;
try
{
CHECK_EX(cudaGraphicsUnregisterResource(_res));
}
catch (std::exception &ex)
{
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_res = nullptr;
}
virtual size_t size() const { return _size; }
protected:
cudaGraphicsResource_t _res;
size_t _size;
GraphicsResource() : _res(nullptr), _size(0) {}
};
template<typename T>
class GlTextureResource : public GraphicsResource
{
public:
GlTextureResource(GLuint textureID, glm::uvec2 textureSize)
{
CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsWriteDiscard));
_size = textureSize.x * textureSize.y * sizeof(T);
_textureSize = textureSize;
}
virtual ~GlTextureResource()
{
cudaGraphicsUnmapResources(1, &_res, 0);
}
virtual void *getBuffer() const {
cudaArray_t buffer;
try{
CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
} catch (...) {
return nullptr;
}
return buffer;
}
operator T *() { return (T *)getBuffer(); }
glm::uvec2 textureSize () { return _textureSize; }
private:
glm::uvec2 _textureSize;
};
class Resources
{
public:
std::map<std::string, Resource *> resources;
std::vector<cudaGraphicsResource_t> graphicsResources;
void addResource(const std::string &name, Resource *res)
{
auto gres = dynamic_cast<GraphicsResource *>(res);
if (gres != nullptr)
graphicsResources.push_back(gres->getHandler());
resources[name] = res;
}
void clear()
{
resources.clear();
graphicsResources.clear();
}
};
template <typename T>
void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
size_t elemsPerRow = 1)
{
T *hostArr = new T[arr.n()];
cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T),
sizeof(T) / sizeof(float) * elemsPerRow, maxDumpRows);
delete[] hostArr;
}
\ No newline at end of file
#include "Sampler.h" #include "Sampler.h"
#include "thread_index.h" #include "../utils/cuda.h"
__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) {
{
float pp = glm::dot(p, p); float pp = glm::dot(p, p);
float vv = glm::dot(v, v); float vv = glm::dot(v, v);
float pv = glm::dot(p, v); float pv = glm::dot(p, v);
...@@ -10,36 +9,36 @@ __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, floa ...@@ -10,36 +9,36 @@ __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, floa
return p + o_depth * v; return p + o_depth * v;
} }
__device__ float _getAngle(float x, float y) __device__ float _getAngle(float x, float y) {
{
return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI; return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI;
} }
/** /**
* Dispatch with block_size=(n_samples, 1024) * Dispatch with block_size=(n_samples, *), grid_size=(1, nRays/*)
* Index with (sample_idx, ray_idx)
*/ */
__global__ void cu_sampleOnRays(glm::vec3 *o_sphericalCoords, float *o_depths, glm::vec3 *rays, __global__ void cu_sampleOnRays(float *o_coords, float *o_depths, glm::vec3 *rays, uint nRays,
glm::vec3 rayCenter, float range0, float rangeStep, uint n) glm::vec3 origin, Range range, bool outputRadius) {
{
glm::uvec3 idx3 = IDX3; glm::uvec3 idx3 = IDX3;
uint rayIdx = flattenIdx({idx3.y, idx3.z, 0});
if (rayIdx >= n)
return;
uint idx = flattenIdx(idx3); uint idx = flattenIdx(idx3);
float r_reciprocal = rangeStep * idx3.x + range0; uint sampleIdx = idx3.x;
glm::vec3 p = _raySphereIntersect(rayCenter, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]); uint rayIdx = idx3.y;
o_sphericalCoords[idx] = glm::vec3(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal)); if (rayIdx >= nRays)
return;
float r_reciprocal = range.get(sampleIdx);
glm::vec3 p = _raySphereIntersect(origin, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
glm::vec3 sp(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
if (outputRadius)
((glm::vec3 *)o_coords)[idx] = sp;
else
((glm::vec2 *)o_coords)[idx] = {sp.y, sp.z};
} }
void Sampler::sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords, sptr<CudaArray<float>> o_depths, void Sampler::sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays, sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter) {
glm::vec3 rayCenter) dim3 blkSize(_dispRange.steps(), 1024 / _dispRange.steps());
{ dim3 grdSize(1, (uint)ceil(rays->n() / (float)blkSize.y));
dim3 blockSize(_samples, 1024 / _samples); CU_INVOKE(cu_sampleOnRays)
dim3 gridSize(1, (uint)ceil(rays->n() / (float)blockSize.y)); (*o_coords, *o_depths, *rays, rays->n(), rayCenter, _dispRange, _outputRadius);
cu_sampleOnRays<<<gridSize, blockSize>>>(*o_sphericalCoords, *o_depths, *rays, rayCenter,
_dispRange.x,
(_dispRange.y - _dispRange.x) / (_samples - 1),
rays->n());
CHECK_EX(cudaGetLastError()); CHECK_EX(cudaGetLastError());
} }
\ No newline at end of file
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Sampler class Sampler {
{
public: public:
Sampler(glm::vec2 depthRange, uint samples) : _dispRange(1.0f / depthRange.x, 1.0f / depthRange.y), Sampler(glm::vec2 depthRange, uint samples, bool outputRadius)
_samples(samples) {} : _dispRange(1.0f / depthRange, samples), _outputRadius(outputRadius) {}
void sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords, void sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<float>> o_depths, sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter);
sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayCenter);
private: private:
glm::vec2 _dispRange; Range _dispRange;
uint _samples; bool _outputRadius;
}; };
\ No newline at end of file
#include "SynthesisPipeline.h" #include "SynthesisPipeline.h"
SynthesisPipeline::SynthesisPipeline( SynthesisPipeline::SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples,
const std::string &netDir, bool isNmsl, uint batchSize, glm::vec2 depthRange, int encodeDim, int coordChns,
uint samples) : _batchSize(batchSize), float enhanceSigma, float enhanceFe)
_samples(samples), : _nRays(cam->res().x * cam->res().y),
_inferPipeline(new InferPipeline(netDir, isNmsl, batchSize, samples)), _nSamples(nSamples),
_rays(new CudaArray<glm::vec3>(batchSize)), _enhanceSigma(enhanceSigma),
_colors(new CudaArray<glm::vec4>(batchSize)) _enhanceFe(enhanceFe),
{ _cam(cam),
_glResultBuffer = _createGlResultBuffer(_batchSize); _inferPipeline(new InferPipeline(net, _nRays, nSamples, depthRange, encodeDim, coordChns)),
_enhancement(new Enhancement(cam->res())),
_rays(new CudaArray<glm::vec3>(_nRays)),
_colors(new CudaArray<glm::vec4>(_nRays)) {
_glResultBuffer = _createGlResultBuffer(_nRays);
_glResultTextures.push_back(_createGlResultTexture(_cam->res()));
} }
void SynthesisPipeline::run(View &view) void SynthesisPipeline::run(View &view) {
{
CudaEvent eStart, eGenRays, eInferred, eEnhanced; CudaEvent eStart, eGenRays, eInferred, eEnhanced;
cudaEventRecord(eStart); cudaEventRecord(eStart);
...@@ -38,8 +42,9 @@ void SynthesisPipeline::run(View &view) ...@@ -38,8 +42,9 @@ void SynthesisPipeline::run(View &view)
cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced); cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
{ {
std::ostringstream sout; std::ostringstream sout;
sout << typeid(*this).name() << " => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays sout << typeid(*this).name() << " => Total: " << timeTotal
<< "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)"; << "ms (Gen rays: " << timeGenRays << "ms, Infer: " << timeInfer
<< "ms, Enhance: " << timeEnhance << "ms)";
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
...@@ -53,13 +58,21 @@ void SynthesisPipeline::run(View &view) ...@@ -53,13 +58,21 @@ void SynthesisPipeline::run(View &view)
_uploadResultToTextures(); _uploadResultToTextures();
} }
GLuint SynthesisPipeline::getGlResultTexture(int index) GLuint SynthesisPipeline::getGlResultTexture(int index) { return _glResultTextures[index]; }
{
return _glResultTextures[index]; void SynthesisPipeline::_genRays(View &view) { view.transVectors(_rays, _cam->localRays()); }
void SynthesisPipeline::_enhance() { _enhancement->run(_colors, _enhanceSigma, _enhanceFe); }
void SynthesisPipeline::_uploadResultToTextures() {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _cam->res().x, _cam->res().y, GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
} }
GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) {
{
GLuint textureID; GLuint textureID;
glEnable(GL_TEXTURE_2D); glEnable(GL_TEXTURE_2D);
glGenTextures(1, &textureID); glGenTextures(1, &textureID);
...@@ -72,91 +85,11 @@ GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) ...@@ -72,91 +85,11 @@ GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
return textureID; return textureID;
} }
GLuint SynthesisPipeline::_createGlResultBuffer(uint elements) GLuint SynthesisPipeline::_createGlResultBuffer(uint elements) {
{
GLuint glBuffer; GLuint glBuffer;
glGenBuffers(1, &glBuffer); glGenBuffers(1, &glBuffer);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBuffer); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBuffer);
glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4), glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
return glBuffer; return glBuffer;
} }
\ No newline at end of file
FoveaSynthesisPipeline::FoveaSynthesisPipeline(
glm::uvec2 res, float fov,
uint samples) : SynthesisPipeline("../nets/fovea_mono/", false, res.x * res.y, samples),
_foveaCamera(fov, res / 2u, res),
_enhancement(new Enhancement(res))
{
_glResultTextures.push_back(_createGlResultTexture(res));
}
void FoveaSynthesisPipeline::_genRays(View &view)
{
view.transVectors(_rays, _foveaCamera.localRays());
}
void FoveaSynthesisPipeline::_enhance()
{
_enhancement->run(_colors, 3.0f, 0.2f);
}
void FoveaSynthesisPipeline::_uploadResultToTextures()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _foveaCamera.res().x, _foveaCamera.res().y,
GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
PeriphSynthesisPipeline::PeriphSynthesisPipeline(
glm::uvec2 midRes, float midFov, glm::uvec2 periphRes, float periphFov,
uint samples) : SynthesisPipeline("../nets/periph/", false,
midRes.x * midRes.y + periphRes.x * periphRes.y,
samples),
_midCamera(midFov, midRes / 2u, midRes),
_periphCamera(periphFov, periphRes / 2u, periphRes),
_midEnhancement(new Enhancement(midRes)),
_periphEnhancement(new Enhancement(periphRes))
{
uint midPixels = midRes.x * midRes.y;
uint periphPixels = periphRes.x * periphRes.y;
_midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*_rays, midPixels));
_periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(
(glm::vec3 *)*_rays + midPixels, periphPixels));
_glResultTextures.push_back(_createGlResultTexture(midRes));
_glResultTextures.push_back(_createGlResultTexture(periphRes));
_midColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(*_colors, midPixels));
_periphColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(
(glm::vec4 *)*_colors + midPixels, periphPixels));
}
void PeriphSynthesisPipeline::_genRays(View &view)
{
view.transVectors(_midRays, _midCamera.localRays());
view.transVectors(_periphRays, _periphCamera.localRays());
}
void PeriphSynthesisPipeline::_enhance()
{
_midEnhancement->run(_midColors, 5.0f, 0.2f);
_periphEnhancement->run(_periphColors, 5.0f, 0.2f);
}
void PeriphSynthesisPipeline::_uploadResultToTextures()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _midCamera.res().x, _midCamera.res().y,
GL_RGBA, GL_FLOAT, 0);
glBindTexture(GL_TEXTURE_2D, _glResultTextures[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
_periphCamera.res().x, _periphCamera.res().y, GL_RGBA, GL_FLOAT,
(void *)(_midCamera.res().x * _midCamera.res().y * sizeof(glm::vec4)));
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
#include "InferPipeline.h" #include "InferPipeline.h"
#include "View.h" #include "View.h"
#include "Enhancement.h" #include "Enhancement.h"
class SynthesisPipeline class SynthesisPipeline {
{
public: public:
SynthesisPipeline(const std::string &netDir, bool isNmsl, SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples, glm::vec2 depthRange,
uint batchSize, uint samples); int encodeDim, int coordChns, float enhanceSigma, float enhanceFe);
void run(View& view); void run(View &view);
GLuint getGlResultTexture(int index); GLuint getGlResultTexture(int index);
protected: protected:
uint _batchSize; uint _nRays;
uint _samples; uint _nSamples;
std::vector<GLuint> _glResultTextures; float _enhanceSigma;
GLuint _glResultBuffer; float _enhanceFe;
sptr<Camera> _cam;
sptr<InferPipeline> _inferPipeline; sptr<InferPipeline> _inferPipeline;
sptr<Enhancement> _enhancement;
sptr<CudaArray<glm::vec3>> _rays; sptr<CudaArray<glm::vec3>> _rays;
sptr<CudaArray<glm::vec4>> _colors; sptr<CudaArray<glm::vec4>> _colors;
std::vector<GLuint> _glResultTextures;
GLuint _glResultBuffer;
virtual void _genRays(View& view) = 0; void _genRays(View &view);
virtual void _enhance() = 0; void _enhance();
virtual void _uploadResultToTextures() = 0; void _uploadResultToTextures();
GLuint _createGlResultTexture(glm::uvec2 res); GLuint _createGlResultTexture(glm::uvec2 res);
GLuint _createGlResultBuffer(uint elements); GLuint _createGlResultBuffer(uint elements);
};
class FoveaSynthesisPipeline : public SynthesisPipeline
{
public:
FoveaSynthesisPipeline(glm::uvec2 res, float fov, uint samples);
protected:
virtual void _genRays(View& view);
virtual void _enhance();
virtual void _uploadResultToTextures();
private:
Camera _foveaCamera;
sptr<Enhancement> _enhancement;
};
class PeriphSynthesisPipeline : public SynthesisPipeline
{
public:
PeriphSynthesisPipeline(glm::uvec2 midRes, float midFov,
glm::uvec2 periphRes, float periphFov,
uint samples);
protected:
virtual void _genRays(View& view);
virtual void _enhance();
virtual void _uploadResultToTextures();
private:
Camera _midCamera;
Camera _periphCamera;
sptr<CudaArray<glm::vec3>> _midRays;
sptr<CudaArray<glm::vec3>> _periphRays;
sptr<CudaArray<glm::vec4>> _midColors;
sptr<CudaArray<glm::vec4>> _periphColors;
sptr<Enhancement> _midEnhancement;
sptr<Enhancement> _periphEnhancement;
}; };
\ No newline at end of file
#include "View.h" #include "View.h"
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include "thread_index.h" #include "../utils/cuda.h"
__global__ void cu_genLocalRays(glm::vec3 *o_rays, glm::vec2 f, glm::vec2 c, glm::uvec2 res) __global__ void cu_genLocalRays(glm::vec3 *o_rays, glm::vec2 f, glm::vec2 c, glm::uvec2 res)
{ {
......
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Camera { class Camera {
......
OUTNAME_RELEASE = msl_infer_test OUTNAME_RELEASE = msl_infer_test
OUTNAME_DEBUG = msl_infer_test_debug OUTNAME_DEBUG = msl_infer_test_debug
EXTRA_DIRECTORIES = ../msl_infer EXTRA_DIRECTORIES = ../msl_infer ../utils
.NOTPARALLEL: .NOTPARALLEL:
MAKEFILE ?= ../Makefile.config MAKEFILE ?= ../Makefile.config
include $(MAKEFILE) include $(MAKEFILE)
...@@ -7,18 +7,15 @@ ...@@ -7,18 +7,15 @@
#include "../msl_infer/View.h" #include "../msl_infer/View.h"
#include "../glm/gtx/transform.hpp" #include "../glm/gtx/transform.hpp"
static const struct static const struct {
{
float x, y; float x, y;
float u, v; float u, v;
} vertices[4] = { } vertices[4] = {{-1.0f, -1.0f, 0.f, 1.f},
{-1.0f, -1.0f, 0.f, 1.f},
{1.0f, -1.0f, 1.f, 1.f}, {1.0f, -1.0f, 1.f, 1.f},
{1.0f, 1.0f, 1.f, 0.f}, {1.0f, 1.0f, 1.f, 0.f},
{-1.0f, 1.0f, 0.f, 0.f}}; {-1.0f, 1.0f, 0.f, 0.f}};
static const char *vertex_shader_text = static const char *vertex_shader_text = "#version 300 es\n"
"#version 300 es\n"
"uniform mat4 MVP;\n" "uniform mat4 MVP;\n"
"in vec2 vUV;\n" "in vec2 vUV;\n"
"in vec2 vPos;\n" "in vec2 vPos;\n"
...@@ -60,8 +57,7 @@ static const char *fragment_shader_text = ...@@ -60,8 +57,7 @@ static const char *fragment_shader_text =
" FragColor = c;\n" " FragColor = c;\n"
"}\n"; "}\n";
void inferFovea(void *o_imageData, View &view) /*void inferFovea(void *o_imageData, View &view) {
{
glm::uvec2 foveaRes(128, 128); glm::uvec2 foveaRes(128, 128);
size_t foveaPixels = foveaRes.x * foveaRes.y; size_t foveaPixels = foveaRes.x * foveaRes.y;
size_t totalPixels = foveaPixels; size_t totalPixels = foveaPixels;
...@@ -106,8 +102,7 @@ void inferFovea(void *o_imageData, View &view) ...@@ -106,8 +102,7 @@ void inferFovea(void *o_imageData, View &view)
cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost); cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
} }
void inferOther(void *o_imageData, View &view) void inferOther(void *o_imageData, View &view) {
{
glm::uvec2 midRes(256, 256); glm::uvec2 midRes(256, 256);
glm::uvec2 periphRes(230, 256); glm::uvec2 periphRes(230, 256);
size_t midPixels = midRes.x * midRes.y; size_t midPixels = midRes.x * midRes.y;
...@@ -123,7 +118,8 @@ void inferOther(void *o_imageData, View &view) ...@@ -123,7 +118,8 @@ void inferOther(void *o_imageData, View &view)
auto periphLocalRays = periphCam.localRays(); auto periphLocalRays = periphCam.localRays();
auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels)); auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
auto midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*rays, midPixels)); auto midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*rays, midPixels));
auto periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels)); auto periphRays = sptr<CudaArray<glm::vec3>>(
new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels));
auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels)); auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
CudaEvent eStart, eGenRays, eInferred, eEnhanced; CudaEvent eStart, eGenRays, eInferred, eEnhanced;
...@@ -157,28 +153,25 @@ void inferOther(void *o_imageData, View &view) ...@@ -157,28 +153,25 @@ void inferOther(void *o_imageData, View &view)
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost); cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
} }*/
static void error_callback(int error, const char *description) static void error_callback(int error, const char *description) {
{
fprintf(stderr, "Error: %s\n", description); fprintf(stderr, "Error: %s\n", description);
} }
static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) {
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GLFW_TRUE); glfwSetWindowShouldClose(window, GLFW_TRUE);
} }
GLFWwindow *initGl(uint windowWidth, uint windowHeight) GLFWwindow *initGl(uint windowWidth, uint windowHeight) {
{
glfwSetErrorCallback(error_callback); glfwSetErrorCallback(error_callback);
if (!glfwInit()) if (!glfwInit())
return nullptr; return nullptr;
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
//glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); // glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
//glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
/*glfwWindowHint(GLFW_DEPTH_BITS, 0); /*glfwWindowHint(GLFW_DEPTH_BITS, 0);
glfwWindowHint(GLFW_STENCIL_BITS, 0); glfwWindowHint(GLFW_STENCIL_BITS, 0);
...@@ -190,8 +183,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight) ...@@ -190,8 +183,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight)
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
*/ */
GLFWwindow *window = glfwCreateWindow(windowWidth, windowHeight, "LearnOpenGL", NULL, NULL); GLFWwindow *window = glfwCreateWindow(windowWidth, windowHeight, "LearnOpenGL", NULL, NULL);
if (!window) if (!window) {
{
glfwTerminate(); glfwTerminate();
return nullptr; return nullptr;
} }
...@@ -214,8 +206,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight) ...@@ -214,8 +206,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight)
return window; return window;
} }
GLuint createGlTexture(uint width, uint height) GLuint createGlTexture(uint width, uint height) {
{
GLuint textureID; GLuint textureID;
glEnable(GL_TEXTURE_2D); glEnable(GL_TEXTURE_2D);
glGenTextures(1, &textureID); glGenTextures(1, &textureID);
...@@ -228,34 +219,29 @@ GLuint createGlTexture(uint width, uint height) ...@@ -228,34 +219,29 @@ GLuint createGlTexture(uint width, uint height)
return textureID; return textureID;
} }
void checkCompileErrors(unsigned int shader, std::string type) void checkCompileErrors(unsigned int shader, std::string type) {
{
int success; int success;
char infoLog[1024]; char infoLog[1024];
if (type != "PROGRAM") if (type != "PROGRAM") {
{
glGetShaderiv(shader, GL_COMPILE_STATUS, &success); glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
if (!success) if (!success) {
{
glGetShaderInfoLog(shader, 1024, NULL, infoLog); glGetShaderInfoLog(shader, 1024, NULL, infoLog);
std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n" std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"
<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl; << infoLog << "\n -- --------------------------------------------------- -- "
<< std::endl;
} }
} } else {
else
{
glGetProgramiv(shader, GL_LINK_STATUS, &success); glGetProgramiv(shader, GL_LINK_STATUS, &success);
if (!success) if (!success) {
{
glGetProgramInfoLog(shader, 1024, NULL, infoLog); glGetProgramInfoLog(shader, 1024, NULL, infoLog);
std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n" std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"
<< infoLog << "\n -- --------------------------------------------------- -- " << std::endl; << infoLog << "\n -- --------------------------------------------------- -- "
<< std::endl;
} }
} }
} }
GLuint loadShaderProgram() GLuint loadShaderProgram() {
{
GLuint vertex_shader, fragment_shader, program; GLuint vertex_shader, fragment_shader, program;
vertex_shader = glCreateShader(GL_VERTEX_SHADER); vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vertex_shader_text, NULL); glShaderSource(vertex_shader, 1, &vertex_shader_text, NULL);
...@@ -277,8 +263,7 @@ GLuint loadShaderProgram() ...@@ -277,8 +263,7 @@ GLuint loadShaderProgram()
return program; return program;
} }
int main(void) int main(void) {
{
Logger::instance.logLevel = 3; Logger::instance.logLevel = 3;
GLFWwindow *window; GLFWwindow *window;
...@@ -302,20 +287,34 @@ int main(void) ...@@ -302,20 +287,34 @@ int main(void)
vcol_location = glGetAttribLocation(program, "vUV"); vcol_location = glGetAttribLocation(program, "vUV");
glEnableVertexAttribArray(vpos_location); glEnableVertexAttribArray(vpos_location);
glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE, glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]), (void *)0);
sizeof(vertices[0]), (void *)0);
glEnableVertexAttribArray(vcol_location); glEnableVertexAttribArray(vcol_location);
glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE, glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]),
sizeof(vertices[0]), (void *)(sizeof(float) * 2)); (void *)(sizeof(float) * 2));
sptr<FoveaSynthesisPipeline> foveaSynthesisPipeline( sptr<Msl> foveaNet(new Msl());
new FoveaSynthesisPipeline({128, 128}, 20, 32)); foveaNet->load("");
sptr<PeriphSynthesisPipeline> periphSynthesisPipeline( sptr<Msl> periphNet(new Msl());
new PeriphSynthesisPipeline({256, 256}, 45, {230, 256}, 110, 16)); periphNet->load("");
sptr<Camera> foveaCam(new Camera(20, {128, 128}, {256, 256}));
sptr<Camera> midCam(new Camera(45, {128, 128}, {256, 256}));
sptr<Camera> periphCam(new Camera(110, {115, 128}, {230, 256}));
uint nSamples = 64;
uint encodeDim = 6;
uint coordChns = 2;
glm::vec2 depthRange(1.0f, 7.0f);
sptr<SynthesisPipeline> synthesisPipelines[] = {
sptr<SynthesisPipeline>(new SynthesisPipeline(foveaNet, foveaCam, nSamples, depthRange,
encodeDim, coordChns, 3.0f, 0.2f)),
sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, midCam, nSamples, depthRange,
encodeDim, coordChns, 5.0f, 0.2f)),
sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, periphCam, nSamples, depthRange,
encodeDim, coordChns, 5.0f, 0.2f)),
};
View view({}, {}); View view({}, {});
auto glFoveaTex = foveaSynthesisPipeline->getGlResultTexture(0); auto glFoveaTex = synthesisPipelines[0]->getGlResultTexture(0);
auto glMidTex = periphSynthesisPipeline->getGlResultTexture(0); auto glMidTex = synthesisPipelines[1]->getGlResultTexture(0);
auto glPeriphTex = periphSynthesisPipeline->getGlResultTexture(1); auto glPeriphTex = synthesisPipelines[2]->getGlResultTexture(0);
Logger::instance.info("Start main loop"); Logger::instance.info("Start main loop");
...@@ -328,10 +327,9 @@ int main(void) ...@@ -328,10 +327,9 @@ int main(void)
GLuint queries[1]; GLuint queries[1];
glGenQueries(1, queries); glGenQueries(1, queries);
while (!glfwWindowShouldClose(window)) while (!glfwWindowShouldClose(window)) {
{ for (int i = 0; i < 3; ++i)
foveaSynthesisPipeline->run(view); synthesisPipelines[i]->run(view);
periphSynthesisPipeline->run(view);
glClear(GL_COLOR_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT);
...@@ -384,8 +382,8 @@ int main(void) ...@@ -384,8 +382,8 @@ int main(void)
glfwPollEvents(); glfwPollEvents();
} }
foveaSynthesisPipeline = nullptr; for (int i = 0; i < 3; ++i)
periphSynthesisPipeline = nullptr; synthesisPipelines[i] = nullptr;
glfwDestroyWindow(window); glfwDestroyWindow(window);
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
return false; return false;
} }
virtual void log(nv::ILogger::Severity severity, const char* msg) override { virtual void log(nv::ILogger::Severity severity, const char* msg) noexcept {
if ((int)severity > logLevel) if ((int)severity > logLevel)
return; return;
if (externalLogFunc == nullptr) { if (externalLogFunc == nullptr) {
......
#pragma once
#include <map>
#include <vector>
class Resource {
public:
virtual ~Resource() {}
virtual void *getBuffer() const = 0;
virtual size_t size() const = 0;
};
class CudaBuffer : public Resource {
public:
CudaBuffer(void *buffer = nullptr, size_t size = 0)
: _buffer(buffer), _ownBuffer(false), _size(size) {}
CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size) {
CHECK_EX(cudaMalloc(&_buffer, size));
}
CudaBuffer(const CudaBuffer &rhs) = delete;
virtual ~CudaBuffer() {
if (!_ownBuffer || _buffer == nullptr)
return;
try {
CHECK_EX(cudaFree(_buffer));
} catch (std::exception &ex) {
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_buffer = nullptr;
_ownBuffer = false;
}
virtual void *getBuffer() const { return _buffer; }
template <class T> T *getBuffer() const { return (T *)getBuffer(); }
virtual size_t size() const { return _size; }
private:
void *_buffer;
bool _ownBuffer;
size_t _size;
};
template <typename T> class CudaArray : public CudaBuffer {
public:
CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
CudaArray(const CudaArray<T> &rhs) = delete;
size_t n() const { return size() / sizeof(T); }
operator T *() { return (T *)getBuffer(); }
};
class GraphicsResource : public Resource {
public:
cudaGraphicsResource_t getHandler() { return _res; }
virtual ~GraphicsResource() {
if (_res == nullptr)
return;
try {
CHECK_EX(cudaGraphicsUnregisterResource(_res));
} catch (std::exception &ex) {
Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
}
_res = nullptr;
}
virtual size_t size() const { return _size; }
protected:
cudaGraphicsResource_t _res;
size_t _size;
GraphicsResource() : _res(nullptr), _size(0) {}
};
template <typename T> class GlTextureResource : public GraphicsResource {
public:
GlTextureResource(GLuint textureID, glm::uvec2 textureSize) {
CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
cudaGraphicsRegisterFlagsWriteDiscard));
_size = textureSize.x * textureSize.y * sizeof(T);
_textureSize = textureSize;
}
virtual ~GlTextureResource() { cudaGraphicsUnmapResources(1, &_res, 0); }
virtual void *getBuffer() const {
cudaArray_t buffer;
try {
CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
} catch (...) {
return nullptr;
}
return buffer;
}
operator T *() { return (T *)getBuffer(); }
glm::uvec2 textureSize() { return _textureSize; }
private:
glm::uvec2 _textureSize;
};
class Resources {
public:
std::map<std::string, Resource *> resources;
std::vector<cudaGraphicsResource_t> graphicsResources;
void addResource(const std::string &name, Resource *res) {
auto gres = dynamic_cast<GraphicsResource *>(res);
if (gres != nullptr)
graphicsResources.push_back(gres->getHandler());
resources[name] = res;
}
void clear() {
resources.clear();
graphicsResources.clear();
}
};
template <typename T>
void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
size_t elemsPerRow = 1) {
T *hostArr = new T[arr.n()];
cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T), sizeof(T) / sizeof(float) * elemsPerRow,
maxDumpRows);
delete[] hostArr;
}
\ No newline at end of file
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"
inline unsigned int getElementSize(nv::DataType t) {
switch (t) {
case nv::DataType::kINT32:
return 4;
case nv::DataType::kFLOAT:
return 4;
case nv::DataType::kHALF:
return 2;
case nv::DataType::kBOOL:
case nv::DataType::kINT8:
return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
template <typename T> void dumpRow(std::ostream &os, T *buf, size_t n) {
os << buf[0];
for (size_t i = 1; i < n; ++i) {
os << " " << buf[i];
}
os << std::endl;
}
template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount,
size_t maxDumpRows = 0) {
T *typedBuf = static_cast<T *>(buf);
size_t numItems = bufSize / sizeof(T);
size_t nInLastRow = numItems % rowCount;
size_t rows;
if (nInLastRow == 0) {
rows = numItems / rowCount;
nInLastRow = rowCount;
} else {
rows = numItems / rowCount + 1;
}
if (maxDumpRows == 0) {
for (size_t i = 0; i < rows - 1; ++i) {
dumpRow(os, typedBuf, rowCount);
typedBuf += rowCount;
}
dumpRow(os, typedBuf, nInLastRow);
} else {
for (size_t i = 0; i < maxDumpRows / 2; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
os << "..." << std::endl;
for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
}
}
class CudaStream {
public:
CudaStream() { cudaStreamCreate(&stream); }
operator cudaStream_t() { return stream; }
virtual ~CudaStream() { cudaStreamDestroy(stream); }
private:
cudaStream_t stream;
};
class CudaEvent {
public:
CudaEvent() { cudaEventCreate(&mEvent); }
operator cudaEvent_t() { return mEvent; }
virtual ~CudaEvent() { cudaEventDestroy(mEvent); }
private:
cudaEvent_t mEvent;
};
struct CudaMapScope {
std::vector<cudaGraphicsResource_t> resources_;
cudaStream_t stream_;
CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
cudaStream_t stream = nullptr)
: resources_(resources), stream_(stream) {}
~CudaMapScope() {
if (!resources_.empty())
cudaGraphicsUnmapResources(resources_.size(), resources_.data(), stream_);
}
cudaError_t map() {
if (!resources_.empty())
return cudaGraphicsMapResources(resources_.size(), resources_.data(), stream_);
return cudaSuccess;
}
};
template <typename T> struct Destroy {
void operator()(T *t) {
if (t != nullptr)
t->destroy();
}
};
class Range {
public:
Range(glm::vec2 bound, uint steps)
: _start(bound.x), _step((bound.y - bound.x) / (steps - 1)), _steps(steps) {}
__host__ __device__ float get(uint i) { return _start + i * _step; }
__host__ __device__ float start() { return _start; }
__host__ __device__ float stop() { return _start + _step * _steps; }
__host__ __device__ float steps() { return _steps; }
private:
float _start;
float _step;
uint _steps;
};
template <class T> using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T> using sptr = std::shared_ptr<T>;
#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
#include "Resource.h"
#include "Formatter.h"
\ No newline at end of file
#include "thread_index.h"
#ifdef __INTELLISENSE__
#define CU_INVOKE(__func__) __func__
#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__
#else
#define CU_INVOKE(__func__) __func__<<<grdSize, blkSize>>>
#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__<<<__grdSize__, __blkSize__>>>
#endif
inline uint ceilDiv(uint a, uint b) { return (uint)ceil(a / (float)b); }
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment