sync

c10f614f · Nianchen Deng · dcba5844 · c10f614f · c10f614f · c10f614f
Commit c10f614f authored May 08, 2021 by Nianchen Deng
--- a/cpp/msl_infer/Nmsl2.h
+++ b/cpp/msl_infer/Nmsl2.h
@@ -9,6 +9,8 @@ public:
 	Net *fcNet1;
 	Net *fcNet2;
 	Net *catNet;
+	uint batchSize;
+	uint samples;
 	Nmsl2(int batchSize, int samples);
@@ -18,6 +20,6 @@ public:
 	virtual bool infer();
-	virtual bool dispose();
+	virtual void dispose();
 };
--- a/cpp/msl_infer/Renderer.cu
+++ b/cpp/msl_infer/Renderer.cu
 #include "Renderer.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
-/// Dispatch (n, 1, 1)
+/// Dispatch (n_rays, -)
-__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint n)
+__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint nRays) {
-{
    glm::uvec3 idx3 = IDX3;
-    if (idx3.x >= n)
+    uint rayIdx = idx3.x;
+    if (rayIdx >= nRays)
        return;
    glm::vec4 outColor;
-    for (int i = samples - 1; i >= 0; --i)
+    for (int si = samples - 1; si >= 0; --si) {
-    {
+        glm::vec4 c = layeredColors[rayIdx * samples + si];
-        glm::vec4 c = layeredColors[idx3.x * samples + i];
        outColor = outColor * (1 - c.a) + c * c.a;
    }
    outColor.a = 1.0f;
    o_colors[idx3.x] = outColor;
 }
-Renderer::Renderer()
+Renderer::Renderer() {}
-{
-}
 void Renderer::render(sptr<CudaArray<glm::vec4>> o_colors,
-                      sptr<CudaArray<glm::vec4>> layeredColors)
+                      sptr<CudaArray<glm::vec4>> layeredColors) {
-{
+    dim3 blkSize(1024);
-    dim3 blockSize(1024);
+    dim3 grdSize(ceilDiv(o_colors->n(), blkSize.x));
-    dim3 gridSize((uint)ceil(o_colors->n() / (float)blockSize.x));
+    CU_INVOKE(cu_render)
-    cu_render<<<gridSize, blockSize>>>(*o_colors, *layeredColors, layeredColors->n() / o_colors->n(),
+    (*o_colors, *layeredColors, layeredColors->n() / o_colors->n(), o_colors->n());
-                                       o_colors->n());
    CHECK_EX(cudaGetLastError());
 }
\ No newline at end of file
--- a/cpp/msl_infer/Renderer.h
+++ b/cpp/msl_infer/Renderer.h
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 class Renderer {
 public:
    Renderer();
+    /**
+     * @brief
+     *
+     * @param o_colors
+     * @param layeredColors
+     */
    void render(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec4>> layeredColors);
 };
\ No newline at end of file
--- a/cpp/msl_infer/Resource.h
+++ b/cpp/msl_infer/Resource.h
-#pragma once
-#include <map>
-#include <vector>
-class Resource
-{
-public:
-	virtual ~Resource() {}
-	virtual void *getBuffer() const = 0;
-	virtual size_t size() const = 0;
-};
-class CudaBuffer : public Resource
-{
-public:
-	CudaBuffer(void *buffer = nullptr, size_t size = 0) : _buffer(buffer), _ownBuffer(false), _size(size) {}
-	CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size)
-	{
-		CHECK_EX(cudaMalloc(&_buffer, size));
-	}
-	CudaBuffer(const CudaBuffer &rhs) = delete;
-	virtual ~CudaBuffer()
-	{
-		if (!_ownBuffer || _buffer == nullptr)
-			return;
-		try
-		{
-			CHECK_EX(cudaFree(_buffer));
-		}
-		catch (std::exception &ex)
-		{
-			Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
-		}
-		_buffer = nullptr;
-		_ownBuffer = false;
-	}
-	virtual void *getBuffer() const { return _buffer; }
-	virtual size_t size() const { return _size; }
-private:
-	void *_buffer;
-	bool _ownBuffer;
-	size_t _size;
-};
-template <typename T>
-class CudaArray : public CudaBuffer
-{
-public:
-	CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
-	CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
-	CudaArray(const CudaArray<T> &rhs) = delete;
-	size_t n() const { return size() / sizeof(T); }
-	operator T *() { return (T *)getBuffer(); }
-};
-class GraphicsResource : public Resource
-{
-public:
-	cudaGraphicsResource_t getHandler() { return _res; }
-	virtual ~GraphicsResource()
-	{
-		if (_res == nullptr)
-			return;
-		try
-		{
-			CHECK_EX(cudaGraphicsUnregisterResource(_res));
-		}
-		catch (std::exception &ex)
-		{
-			Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
-		}
-		_res = nullptr;
-	}
-	virtual size_t size() const { return _size; }
-protected:
-	cudaGraphicsResource_t _res;
-	size_t _size;
-	GraphicsResource() : _res(nullptr), _size(0) {}
-};
-template<typename T>
-class GlTextureResource : public GraphicsResource
-{
-public:
-	GlTextureResource(GLuint textureID, glm::uvec2 textureSize)
-	{
-		CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
-											 cudaGraphicsRegisterFlagsWriteDiscard));
-		_size = textureSize.x * textureSize.y * sizeof(T);
-		_textureSize = textureSize;
-	}
-	virtual ~GlTextureResource()
-	{
-		cudaGraphicsUnmapResources(1, &_res, 0);
-	}
-	virtual void *getBuffer() const {
-		cudaArray_t buffer;
-		try{
-			CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
-		} catch (...) {
-			return nullptr;
-		}
-		return buffer;
-	}
-	operator T *() { return (T *)getBuffer(); }
-	glm::uvec2 textureSize () { return _textureSize; }
-private:
-	glm::uvec2 _textureSize;
-};
-class Resources
-{
-public:
-	std::map<std::string, Resource *> resources;
-	std::vector<cudaGraphicsResource_t> graphicsResources;
-	void addResource(const std::string &name, Resource *res)
-	{
-		auto gres = dynamic_cast<GraphicsResource *>(res);
-		if (gres != nullptr)
-			graphicsResources.push_back(gres->getHandler());
-		resources[name] = res;
-	}
-	void clear()
-	{
-		resources.clear();
-		graphicsResources.clear();
-	}
-};
-template <typename T>
-void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
-					size_t elemsPerRow = 1)
-{
-	T *hostArr = new T[arr.n()];
-	cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
-	dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T),
-						  sizeof(T) / sizeof(float) * elemsPerRow, maxDumpRows);
-	delete[] hostArr;
-}
\ No newline at end of file
--- a/cpp/msl_infer/Sampler.cu
+++ b/cpp/msl_infer/Sampler.cu
 #include "Sampler.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
-__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth)
+__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) {
-{
    float pp = glm::dot(p, p);
    float vv = glm::dot(v, v);
    float pv = glm::dot(p, v);
@@ -10,36 +9,36 @@ __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, floa
    return p + o_depth * v;
 }
-__device__ float _getAngle(float x, float y)
+__device__ float _getAngle(float x, float y) {
-{
    return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI;
 }
 /**
- * Dispatch with block_size=(n_samples, 1024)
+ * Dispatch with block_size=(n_samples, *), grid_size=(1, nRays/*)
+ * Index with (sample_idx, ray_idx)
 */
-__global__ void cu_sampleOnRays(glm::vec3 *o_sphericalCoords, float *o_depths, glm::vec3 *rays,
+__global__ void cu_sampleOnRays(float *o_coords, float *o_depths, glm::vec3 *rays, uint nRays,
-                                glm::vec3 rayCenter, float range0, float rangeStep, uint n)
+                                glm::vec3 origin, Range range, bool outputRadius) {
-{
    glm::uvec3 idx3 = IDX3;
-    uint rayIdx = flattenIdx({idx3.y, idx3.z, 0});
-    if (rayIdx >= n)
-        return;
    uint idx = flattenIdx(idx3);
-    float r_reciprocal = rangeStep * idx3.x + range0;
+    uint sampleIdx = idx3.x;
-    glm::vec3 p = _raySphereIntersect(rayCenter, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
+    uint rayIdx = idx3.y;
-    o_sphericalCoords[idx] = glm::vec3(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
+    if (rayIdx >= nRays)
+        return;
+    float r_reciprocal = range.get(sampleIdx);
+    glm::vec3 p = _raySphereIntersect(origin, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
+    glm::vec3 sp(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
+    if (outputRadius)
+        ((glm::vec3 *)o_coords)[idx] = sp;
+    else
+        ((glm::vec2 *)o_coords)[idx] = {sp.y, sp.z};
 }
-void Sampler::sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords, sptr<CudaArray<float>> o_depths,
+void Sampler::sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
-                           sptr<CudaArray<glm::vec3>> rays,
+                           sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter) {
-                           glm::vec3 rayCenter)
+    dim3 blkSize(_dispRange.steps(), 1024 / _dispRange.steps());
-{
+    dim3 grdSize(1, (uint)ceil(rays->n() / (float)blkSize.y));
-    dim3 blockSize(_samples, 1024 / _samples);
+    CU_INVOKE(cu_sampleOnRays)
-    dim3 gridSize(1, (uint)ceil(rays->n() / (float)blockSize.y));
+    (*o_coords, *o_depths, *rays, rays->n(), rayCenter, _dispRange, _outputRadius);
-    cu_sampleOnRays<<<gridSize, blockSize>>>(*o_sphericalCoords, *o_depths, *rays, rayCenter,
-                                             _dispRange.x,
-                                             (_dispRange.y - _dispRange.x) / (_samples - 1),
-                                             rays->n());
    CHECK_EX(cudaGetLastError());
 }
\ No newline at end of file
--- a/cpp/msl_infer/Sampler.h
+++ b/cpp/msl_infer/Sampler.h
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
-class Sampler
+class Sampler {
-{
 public:
-    Sampler(glm::vec2 depthRange, uint samples) : _dispRange(1.0f / depthRange.x, 1.0f / depthRange.y),
+    Sampler(glm::vec2 depthRange, uint samples, bool outputRadius)
-                                                  _samples(samples) {}
+        : _dispRange(1.0f / depthRange, samples), _outputRadius(outputRadius) {}
-    void sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords,
+    void sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
-                      sptr<CudaArray<float>> o_depths,
+                      sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter);
-                      sptr<CudaArray<glm::vec3>> rays,
-                      glm::vec3 rayCenter);
 private:
-    glm::vec2 _dispRange;
+    Range _dispRange;
-    uint _samples;
+    bool _outputRadius;
 };
\ No newline at end of file
--- a/cpp/msl_infer/SynthesisPipeline.cpp
+++ b/cpp/msl_infer/SynthesisPipeline.cpp
 #include "SynthesisPipeline.h"
-SynthesisPipeline::SynthesisPipeline(
+SynthesisPipeline::SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples,
-    const std::string &netDir, bool isNmsl, uint batchSize,
+                                     glm::vec2 depthRange, int encodeDim, int coordChns,
-    uint samples) : _batchSize(batchSize),
+                                     float enhanceSigma, float enhanceFe)
-                    _samples(samples),
+    : _nRays(cam->res().x * cam->res().y),
-                    _inferPipeline(new InferPipeline(netDir, isNmsl, batchSize, samples)),
+      _nSamples(nSamples),
-                    _rays(new CudaArray<glm::vec3>(batchSize)),
+      _enhanceSigma(enhanceSigma),
-                    _colors(new CudaArray<glm::vec4>(batchSize))
+      _enhanceFe(enhanceFe),
-{
+      _cam(cam),
-    _glResultBuffer = _createGlResultBuffer(_batchSize);
+      _inferPipeline(new InferPipeline(net, _nRays, nSamples, depthRange, encodeDim, coordChns)),
+      _enhancement(new Enhancement(cam->res())),
+      _rays(new CudaArray<glm::vec3>(_nRays)),
+      _colors(new CudaArray<glm::vec4>(_nRays)) {
+    _glResultBuffer = _createGlResultBuffer(_nRays);
+    _glResultTextures.push_back(_createGlResultTexture(_cam->res()));
 }
-void SynthesisPipeline::run(View &view)
+void SynthesisPipeline::run(View &view) {
-{
    CudaEvent eStart, eGenRays, eInferred, eEnhanced;
    cudaEventRecord(eStart);
@@ -38,8 +42,9 @@ void SynthesisPipeline::run(View &view)
    cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
    {
        std::ostringstream sout;
-        sout << typeid(*this).name() << " => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
+        sout << typeid(*this).name() << " => Total: " << timeTotal
-             << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
+             << "ms (Gen rays: " << timeGenRays << "ms, Infer: " << timeInfer
+             << "ms, Enhance: " << timeEnhance << "ms)";
        Logger::instance.info(sout.str());
    }
@@ -53,13 +58,21 @@ void SynthesisPipeline::run(View &view)
    _uploadResultToTextures();
 }
-GLuint SynthesisPipeline::getGlResultTexture(int index)
+GLuint SynthesisPipeline::getGlResultTexture(int index) { return _glResultTextures[index]; }
-{
-    return _glResultTextures[index];
+void SynthesisPipeline::_genRays(View &view) { view.transVectors(_rays, _cam->localRays()); }
+void SynthesisPipeline::_enhance() { _enhancement->run(_colors, _enhanceSigma, _enhanceFe); }
+void SynthesisPipeline::_uploadResultToTextures() {
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
+    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _cam->res().x, _cam->res().y, GL_RGBA, GL_FLOAT, 0);
+    glBindTexture(GL_TEXTURE_2D, 0);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 }
-GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
+GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) {
-{
    GLuint textureID;
    glEnable(GL_TEXTURE_2D);
    glGenTextures(1, &textureID);
@@ -72,91 +85,11 @@ GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
    return textureID;
 }
-GLuint SynthesisPipeline::_createGlResultBuffer(uint elements)
+GLuint SynthesisPipeline::_createGlResultBuffer(uint elements) {
-{
    GLuint glBuffer;
    glGenBuffers(1, &glBuffer);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBuffer);
-    glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4),
+    glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
-                 nullptr, GL_STREAM_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
    return glBuffer;
 }
\ No newline at end of file
-FoveaSynthesisPipeline::FoveaSynthesisPipeline(
-    glm::uvec2 res, float fov,
-    uint samples) : SynthesisPipeline("../nets/fovea_mono/", false, res.x * res.y, samples),
-                    _foveaCamera(fov, res / 2u, res),
-                    _enhancement(new Enhancement(res))
-{
-    _glResultTextures.push_back(_createGlResultTexture(res));
-}
-void FoveaSynthesisPipeline::_genRays(View &view)
-{
-    view.transVectors(_rays, _foveaCamera.localRays());
-}
-void FoveaSynthesisPipeline::_enhance()
-{
-    _enhancement->run(_colors, 3.0f, 0.2f);
-}
-void FoveaSynthesisPipeline::_uploadResultToTextures()
-{
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _foveaCamera.res().x, _foveaCamera.res().y,
-                    GL_RGBA, GL_FLOAT, 0);
-    glBindTexture(GL_TEXTURE_2D, 0);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
-PeriphSynthesisPipeline::PeriphSynthesisPipeline(
-    glm::uvec2 midRes, float midFov, glm::uvec2 periphRes, float periphFov,
-    uint samples) : SynthesisPipeline("../nets/periph/", false,
-                                      midRes.x * midRes.y + periphRes.x * periphRes.y,
-                                      samples),
-                    _midCamera(midFov, midRes / 2u, midRes),
-                    _periphCamera(periphFov, periphRes / 2u, periphRes),
-                    _midEnhancement(new Enhancement(midRes)),
-                    _periphEnhancement(new Enhancement(periphRes))
-{
-    uint midPixels = midRes.x * midRes.y;
-    uint periphPixels = periphRes.x * periphRes.y;
-    _midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*_rays, midPixels));
-    _periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(
-        (glm::vec3 *)*_rays + midPixels, periphPixels));
-    _glResultTextures.push_back(_createGlResultTexture(midRes));
-    _glResultTextures.push_back(_createGlResultTexture(periphRes));
-    _midColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(*_colors, midPixels));
-    _periphColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(
-        (glm::vec4 *)*_colors + midPixels, periphPixels));
-}
-void PeriphSynthesisPipeline::_genRays(View &view)
-{
-    view.transVectors(_midRays, _midCamera.localRays());
-    view.transVectors(_periphRays, _periphCamera.localRays());
-}
-void PeriphSynthesisPipeline::_enhance()
-{
-    _midEnhancement->run(_midColors, 5.0f, 0.2f);
-    _periphEnhancement->run(_periphColors, 5.0f, 0.2f);
-}
-void PeriphSynthesisPipeline::_uploadResultToTextures()
-{
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _midCamera.res().x, _midCamera.res().y,
-                    GL_RGBA, GL_FLOAT, 0);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[1]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
-                    _periphCamera.res().x, _periphCamera.res().y, GL_RGBA, GL_FLOAT,
-                    (void *)(_midCamera.res().x * _midCamera.res().y * sizeof(glm::vec4)));
-    glBindTexture(GL_TEXTURE_2D, 0);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
--- a/cpp/msl_infer/SynthesisPipeline.h
+++ b/cpp/msl_infer/SynthesisPipeline.h
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 #include "InferPipeline.h"
 #include "View.h"
 #include "Enhancement.h"
-class SynthesisPipeline
+class SynthesisPipeline {
-{
 public:
-    SynthesisPipeline(const std::string &netDir, bool isNmsl,
+    SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples, glm::vec2 depthRange,
-                      uint batchSize, uint samples);
+                      int encodeDim, int coordChns, float enhanceSigma, float enhanceFe);
-    void run(View& view);
+    void run(View &view);
    GLuint getGlResultTexture(int index);
 protected:
-    uint _batchSize;
+    uint _nRays;
-    uint _samples;
+    uint _nSamples;
-    std::vector<GLuint> _glResultTextures;
+    float _enhanceSigma;
-    GLuint _glResultBuffer;
+    float _enhanceFe;
+    sptr<Camera> _cam;
    sptr<InferPipeline> _inferPipeline;
+    sptr<Enhancement> _enhancement;
    sptr<CudaArray<glm::vec3>> _rays;
    sptr<CudaArray<glm::vec4>> _colors;
+    std::vector<GLuint> _glResultTextures;
+    GLuint _glResultBuffer;
-    virtual void _genRays(View& view) = 0;
+    void _genRays(View &view);
-    virtual void _enhance() = 0;
+    void _enhance();
-    virtual void _uploadResultToTextures() = 0;
+    void _uploadResultToTextures();
    GLuint _createGlResultTexture(glm::uvec2 res);
    GLuint _createGlResultBuffer(uint elements);
-};
-class FoveaSynthesisPipeline : public SynthesisPipeline
-{
-public:
-    FoveaSynthesisPipeline(glm::uvec2 res, float fov, uint samples);
-protected:
-    virtual void _genRays(View& view);
-    virtual void _enhance();
-    virtual void _uploadResultToTextures();
-private:
-    Camera _foveaCamera;
-    sptr<Enhancement> _enhancement;
-};
-class PeriphSynthesisPipeline : public SynthesisPipeline
-{
-public:
-    PeriphSynthesisPipeline(glm::uvec2 midRes, float midFov,
-                            glm::uvec2 periphRes, float periphFov,
-                            uint samples);
-protected:
-    virtual void _genRays(View& view);
-    virtual void _enhance();
-    virtual void _uploadResultToTextures();
-private:
-    Camera _midCamera;
-    Camera _periphCamera;
-    sptr<CudaArray<glm::vec3>> _midRays;
-    sptr<CudaArray<glm::vec3>> _periphRays;
-    sptr<CudaArray<glm::vec4>> _midColors;
-    sptr<CudaArray<glm::vec4>> _periphColors;
-    sptr<Enhancement> _midEnhancement;
-    sptr<Enhancement> _periphEnhancement;
 };
\ No newline at end of file
--- a/cpp/msl_infer/View.cu
+++ b/cpp/msl_infer/View.cu
 #include "View.h"
 #include <cuda_runtime.h>
-#include "thread_index.h"
+#include "../utils/cuda.h"
 __global__ void cu_genLocalRays(glm::vec3 *o_rays, glm::vec2 f, glm::vec2 c, glm::uvec2 res)
 {

--- a/cpp/msl_infer/View.h
+++ b/cpp/msl_infer/View.h
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 class Camera {

--- a/cpp/msl_infer_test/Makefile
+++ b/cpp/msl_infer_test/Makefile
 OUTNAME_RELEASE = msl_infer_test
 OUTNAME_DEBUG   = msl_infer_test_debug
-EXTRA_DIRECTORIES = ../msl_infer
+EXTRA_DIRECTORIES = ../msl_infer ../utils
 .NOTPARALLEL:
 MAKEFILE ?= ../Makefile.config
 include $(MAKEFILE)
--- a/cpp/msl_infer_test/main.cpp
+++ b/cpp/msl_infer_test/main.cpp
@@ -7,18 +7,15 @@
 #include "../msl_infer/View.h"
 #include "../glm/gtx/transform.hpp"
-static const struct
+static const struct {
-{
    float x, y;
    float u, v;
-} vertices[4] = {
+} vertices[4] = {{-1.0f, -1.0f, 0.f, 1.f},
-	{-1.0f, -1.0f, 0.f, 1.f},
                 {1.0f, -1.0f, 1.f, 1.f},
                 {1.0f, 1.0f, 1.f, 0.f},
                 {-1.0f, 1.0f, 0.f, 0.f}};
-static const char *vertex_shader_text =
+static const char *vertex_shader_text = "#version 300 es\n"
-	"#version 300 es\n"
                                        "uniform mat4 MVP;\n"
                                        "in vec2 vUV;\n"
                                        "in vec2 vPos;\n"
@@ -60,8 +57,7 @@ static const char *fragment_shader_text =
    "    FragColor = c;\n"
    "}\n";
-void inferFovea(void *o_imageData, View &view)
+/*void inferFovea(void *o_imageData, View &view) {
-{
    glm::uvec2 foveaRes(128, 128);
    size_t foveaPixels = foveaRes.x * foveaRes.y;
    size_t totalPixels = foveaPixels;
@@ -106,8 +102,7 @@ void inferFovea(void *o_imageData, View &view)
    cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
 }
-void inferOther(void *o_imageData, View &view)
+void inferOther(void *o_imageData, View &view) {
-{
    glm::uvec2 midRes(256, 256);
    glm::uvec2 periphRes(230, 256);
    size_t midPixels = midRes.x * midRes.y;
@@ -123,7 +118,8 @@ void inferOther(void *o_imageData, View &view)
    auto periphLocalRays = periphCam.localRays();
    auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
    auto midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*rays, midPixels));
-	auto periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels));
+    auto periphRays = sptr<CudaArray<glm::vec3>>(
+        new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels));
    auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
    CudaEvent eStart, eGenRays, eInferred, eEnhanced;
@@ -157,28 +153,25 @@ void inferOther(void *o_imageData, View &view)
        Logger::instance.info(sout.str());
    }
    cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
-}
+}*/
-static void error_callback(int error, const char *description)
+static void error_callback(int error, const char *description) {
-{
    fprintf(stderr, "Error: %s\n", description);
 }
-static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods)
+static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) {
-{
    if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
        glfwSetWindowShouldClose(window, GLFW_TRUE);
 }
-GLFWwindow *initGl(uint windowWidth, uint windowHeight)
+GLFWwindow *initGl(uint windowWidth, uint windowHeight) {
-{
    glfwSetErrorCallback(error_callback);
    if (!glfwInit())
        return nullptr;
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
-	//glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    // glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
-	//glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
+    // glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
    /*glfwWindowHint(GLFW_DEPTH_BITS, 0);
    glfwWindowHint(GLFW_STENCIL_BITS, 0);
@@ -190,8 +183,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight)
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
    */
    GLFWwindow *window = glfwCreateWindow(windowWidth, windowHeight, "LearnOpenGL", NULL, NULL);
-	if (!window)
+    if (!window) {
-	{
        glfwTerminate();
        return nullptr;
    }
@@ -214,8 +206,7 @@ GLFWwindow *initGl(uint windowWidth, uint windowHeight)
    return window;
 }
-GLuint createGlTexture(uint width, uint height)
+GLuint createGlTexture(uint width, uint height) {
-{
    GLuint textureID;
    glEnable(GL_TEXTURE_2D);
    glGenTextures(1, &textureID);
@@ -228,34 +219,29 @@ GLuint createGlTexture(uint width, uint height)
    return textureID;
 }
-void checkCompileErrors(unsigned int shader, std::string type)
+void checkCompileErrors(unsigned int shader, std::string type) {
-{
    int success;
    char infoLog[1024];
-	if (type != "PROGRAM")
+    if (type != "PROGRAM") {
-	{
        glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
-		if (!success)
+        if (!success) {
-		{
            glGetShaderInfoLog(shader, 1024, NULL, infoLog);
            std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"
-					  << infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
+                      << infoLog << "\n -- --------------------------------------------------- -- "
+                      << std::endl;
        }
-	}
+    } else {
-	else
-	{
        glGetProgramiv(shader, GL_LINK_STATUS, &success);
-		if (!success)
+        if (!success) {
-		{
            glGetProgramInfoLog(shader, 1024, NULL, infoLog);
            std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"
-					  << infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
+                      << infoLog << "\n -- --------------------------------------------------- -- "
+                      << std::endl;
        }
    }
 }
-GLuint loadShaderProgram()
+GLuint loadShaderProgram() {
-{
    GLuint vertex_shader, fragment_shader, program;
    vertex_shader = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(vertex_shader, 1, &vertex_shader_text, NULL);
@@ -277,8 +263,7 @@ GLuint loadShaderProgram()
    return program;
 }
-int main(void)
+int main(void) {
-{
    Logger::instance.logLevel = 3;
    GLFWwindow *window;
@@ -302,20 +287,34 @@ int main(void)
    vcol_location = glGetAttribLocation(program, "vUV");
    glEnableVertexAttribArray(vpos_location);
-	glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE,
+    glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]), (void *)0);
-						  sizeof(vertices[0]), (void *)0);
    glEnableVertexAttribArray(vcol_location);
-	glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE,
+    glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]),
-						  sizeof(vertices[0]), (void *)(sizeof(float) * 2));
+                          (void *)(sizeof(float) * 2));
-	sptr<FoveaSynthesisPipeline> foveaSynthesisPipeline(
+    sptr<Msl> foveaNet(new Msl());
-		new FoveaSynthesisPipeline({128, 128}, 20, 32));
+    foveaNet->load("");
-	sptr<PeriphSynthesisPipeline> periphSynthesisPipeline(
+    sptr<Msl> periphNet(new Msl());
-		new PeriphSynthesisPipeline({256, 256}, 45, {230, 256}, 110, 16));
+    periphNet->load("");
+    sptr<Camera> foveaCam(new Camera(20, {128, 128}, {256, 256}));
+    sptr<Camera> midCam(new Camera(45, {128, 128}, {256, 256}));
+    sptr<Camera> periphCam(new Camera(110, {115, 128}, {230, 256}));
+    uint nSamples = 64;
+    uint encodeDim = 6;
+    uint coordChns = 2;
+    glm::vec2 depthRange(1.0f, 7.0f);
+    sptr<SynthesisPipeline> synthesisPipelines[] = {
+        sptr<SynthesisPipeline>(new SynthesisPipeline(foveaNet, foveaCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 3.0f, 0.2f)),
+        sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, midCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 5.0f, 0.2f)),
+        sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, periphCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 5.0f, 0.2f)),
+    };
    View view({}, {});
-	auto glFoveaTex = foveaSynthesisPipeline->getGlResultTexture(0);
+    auto glFoveaTex = synthesisPipelines[0]->getGlResultTexture(0);
-	auto glMidTex = periphSynthesisPipeline->getGlResultTexture(0);
+    auto glMidTex = synthesisPipelines[1]->getGlResultTexture(0);
-	auto glPeriphTex = periphSynthesisPipeline->getGlResultTexture(1);
+    auto glPeriphTex = synthesisPipelines[2]->getGlResultTexture(0);
    Logger::instance.info("Start main loop");
@@ -328,10 +327,9 @@ int main(void)
    GLuint queries[1];
    glGenQueries(1, queries);
-	while (!glfwWindowShouldClose(window))
+    while (!glfwWindowShouldClose(window)) {
-	{
+		for (int i = 0; i < 3; ++i)
-		foveaSynthesisPipeline->run(view);
+			synthesisPipelines[i]->run(view);
-		periphSynthesisPipeline->run(view);
        glClear(GL_COLOR_BUFFER_BIT);
@@ -384,8 +382,8 @@ int main(void)
        glfwPollEvents();
    }
-	foveaSynthesisPipeline = nullptr;
+	for (int i = 0; i < 3; ++i)
-	periphSynthesisPipeline = nullptr;
+		synthesisPipelines[i] = nullptr;
    glfwDestroyWindow(window);

--- a/cpp/msl_infer/Formatter.h
+++ b/cpp/msl_infer/Formatter.h
--- a/cpp/msl_infer/Logger.cpp
+++ b/cpp/msl_infer/Logger.cpp
--- a/cpp/msl_infer/Logger.h
+++ b/cpp/msl_infer/Logger.h
@@ -39,7 +39,7 @@ public:
 		return false;
 	}
-	virtual void log(nv::ILogger::Severity severity, const char* msg) override {
+	virtual void log(nv::ILogger::Severity severity, const char* msg) noexcept {
 		if ((int)severity > logLevel)
 			return;
 		if (externalLogFunc == nullptr) {

--- a/cpp/utils/Resource.h
+++ b/cpp/utils/Resource.h
+#pragma once
+#include <map>
+#include <vector>
+class Resource {
+  public:
+    virtual ~Resource() {}
+    virtual void *getBuffer() const = 0;
+    virtual size_t size() const = 0;
+};
+class CudaBuffer : public Resource {
+  public:
+    CudaBuffer(void *buffer = nullptr, size_t size = 0)
+        : _buffer(buffer), _ownBuffer(false), _size(size) {}
+    CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size) {
+        CHECK_EX(cudaMalloc(&_buffer, size));
+    }
+    CudaBuffer(const CudaBuffer &rhs) = delete;
+    virtual ~CudaBuffer() {
+        if (!_ownBuffer || _buffer == nullptr)
+            return;
+        try {
+            CHECK_EX(cudaFree(_buffer));
+        } catch (std::exception &ex) {
+            Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
+        }
+        _buffer = nullptr;
+        _ownBuffer = false;
+    }
+    virtual void *getBuffer() const { return _buffer; }
+    template <class T> T *getBuffer() const { return (T *)getBuffer(); }
+    virtual size_t size() const { return _size; }
+  private:
+    void *_buffer;
+    bool _ownBuffer;
+    size_t _size;
+};
+template <typename T> class CudaArray : public CudaBuffer {
+  public:
+    CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
+    CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
+    CudaArray(const CudaArray<T> &rhs) = delete;
+    size_t n() const { return size() / sizeof(T); }
+    operator T *() { return (T *)getBuffer(); }
+};
+class GraphicsResource : public Resource {
+  public:
+    cudaGraphicsResource_t getHandler() { return _res; }
+    virtual ~GraphicsResource() {
+        if (_res == nullptr)
+            return;
+        try {
+            CHECK_EX(cudaGraphicsUnregisterResource(_res));
+        } catch (std::exception &ex) {
+            Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
+        }
+        _res = nullptr;
+    }
+    virtual size_t size() const { return _size; }
+  protected:
+    cudaGraphicsResource_t _res;
+    size_t _size;
+    GraphicsResource() : _res(nullptr), _size(0) {}
+};
+template <typename T> class GlTextureResource : public GraphicsResource {
+  public:
+    GlTextureResource(GLuint textureID, glm::uvec2 textureSize) {
+        CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
+                                             cudaGraphicsRegisterFlagsWriteDiscard));
+        _size = textureSize.x * textureSize.y * sizeof(T);
+        _textureSize = textureSize;
+    }
+    virtual ~GlTextureResource() { cudaGraphicsUnmapResources(1, &_res, 0); }
+    virtual void *getBuffer() const {
+        cudaArray_t buffer;
+        try {
+            CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
+        } catch (...) {
+            return nullptr;
+        }
+        return buffer;
+    }
+    operator T *() { return (T *)getBuffer(); }
+    glm::uvec2 textureSize() { return _textureSize; }
+  private:
+    glm::uvec2 _textureSize;
+};
+class Resources {
+  public:
+    std::map<std::string, Resource *> resources;
+    std::vector<cudaGraphicsResource_t> graphicsResources;
+    void addResource(const std::string &name, Resource *res) {
+        auto gres = dynamic_cast<GraphicsResource *>(res);
+        if (gres != nullptr)
+            graphicsResources.push_back(gres->getHandler());
+        resources[name] = res;
+    }
+    void clear() {
+        resources.clear();
+        graphicsResources.clear();
+    }
+};
+template <typename T>
+void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
+                    size_t elemsPerRow = 1) {
+    T *hostArr = new T[arr.n()];
+    cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
+    dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T), sizeof(T) / sizeof(float) * elemsPerRow,
+                          maxDumpRows);
+    delete[] hostArr;
+}
\ No newline at end of file
--- a/cpp/utils/common.h
+++ b/cpp/utils/common.h
+#pragma once
+#include <memory>
+#include <stdexcept>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <GL/glew.h>
+#include <cuda_gl_interop.h>
+#include "../glm/glm.hpp"
+#include "Logger.h"
+inline unsigned int getElementSize(nv::DataType t) {
+    switch (t) {
+    case nv::DataType::kINT32:
+        return 4;
+    case nv::DataType::kFLOAT:
+        return 4;
+    case nv::DataType::kHALF:
+        return 2;
+    case nv::DataType::kBOOL:
+    case nv::DataType::kINT8:
+        return 1;
+    }
+    throw std::runtime_error("Invalid DataType.");
+    return 0;
+}
+template <typename T> void dumpRow(std::ostream &os, T *buf, size_t n) {
+    os << buf[0];
+    for (size_t i = 1; i < n; ++i) {
+        os << " " << buf[i];
+    }
+    os << std::endl;
+}
+template <typename T>
+void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount,
+                    size_t maxDumpRows = 0) {
+    T *typedBuf = static_cast<T *>(buf);
+    size_t numItems = bufSize / sizeof(T);
+    size_t nInLastRow = numItems % rowCount;
+    size_t rows;
+    if (nInLastRow == 0) {
+        rows = numItems / rowCount;
+        nInLastRow = rowCount;
+    } else {
+        rows = numItems / rowCount + 1;
+    }
+    if (maxDumpRows == 0) {
+        for (size_t i = 0; i < rows - 1; ++i) {
+            dumpRow(os, typedBuf, rowCount);
+            typedBuf += rowCount;
+        }
+        dumpRow(os, typedBuf, nInLastRow);
+    } else {
+        for (size_t i = 0; i < maxDumpRows / 2; ++i)
+            dumpRow(os, typedBuf + i * rowCount, rowCount);
+        os << "..." << std::endl;
+        for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
+            dumpRow(os, typedBuf + i * rowCount, rowCount);
+        dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
+    }
+}
+class CudaStream {
+public:
+    CudaStream() { cudaStreamCreate(&stream); }
+    operator cudaStream_t() { return stream; }
+    virtual ~CudaStream() { cudaStreamDestroy(stream); }
+private:
+    cudaStream_t stream;
+};
+class CudaEvent {
+public:
+    CudaEvent() { cudaEventCreate(&mEvent); }
+    operator cudaEvent_t() { return mEvent; }
+    virtual ~CudaEvent() { cudaEventDestroy(mEvent); }
+private:
+    cudaEvent_t mEvent;
+};
+struct CudaMapScope {
+    std::vector<cudaGraphicsResource_t> resources_;
+    cudaStream_t stream_;
+    CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
+                 cudaStream_t stream = nullptr)
+        : resources_(resources), stream_(stream) {}
+    ~CudaMapScope() {
+        if (!resources_.empty())
+            cudaGraphicsUnmapResources(resources_.size(), resources_.data(), stream_);
+    }
+    cudaError_t map() {
+        if (!resources_.empty())
+            return cudaGraphicsMapResources(resources_.size(), resources_.data(), stream_);
+        return cudaSuccess;
+    }
+};
+template <typename T> struct Destroy {
+    void operator()(T *t) {
+        if (t != nullptr)
+            t->destroy();
+    }
+};
+class Range {
+public:
+    Range(glm::vec2 bound, uint steps)
+        : _start(bound.x), _step((bound.y - bound.x) / (steps - 1)), _steps(steps) {}
+    __host__ __device__ float get(uint i) { return _start + i * _step; }
+    __host__ __device__ float start() { return _start; }
+    __host__ __device__ float stop() { return _start + _step * _steps; }
+    __host__ __device__ float steps() { return _steps; }
+private:
+    float _start;
+    float _step;
+    uint _steps;
+};
+template <class T> using uptr = std::unique_ptr<T, ::Destroy<T>>;
+template <class T> using sptr = std::shared_ptr<T>;
+#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
+#include "Resource.h"
+#include "Formatter.h"
\ No newline at end of file
--- a/cpp/utils/cuda.h
+++ b/cpp/utils/cuda.h
+#include "thread_index.h"
+#ifdef __INTELLISENSE__
+#define CU_INVOKE(__func__) __func__
+#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__
+#else
+#define CU_INVOKE(__func__) __func__<<<grdSize, blkSize>>>
+#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__<<<__grdSize__, __blkSize__>>>
+#endif
+inline uint ceilDiv(uint a, uint b) { return (uint)ceil(a / (float)b); }
\ No newline at end of file
--- a/cpp/msl_infer/half.h
+++ b/cpp/msl_infer/half.h
--- a/cpp/msl_infer/thread_index.h
+++ b/cpp/msl_infer/thread_index.h