Commit 1bc644a1 authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent 6294701e
...@@ -64,8 +64,4 @@ public: ...@@ -64,8 +64,4 @@ public:
} }
externalLogFunc((int)severity, msg); externalLogFunc((int)severity, msg);
} }
}; };
\ No newline at end of file
#define CHECK(__ERR_CODE__) do { if (!Logger::instance.checkErr((__ERR_CODE__), __FILE__, __LINE__)) return false; } while (0)
#define CHECK_EX(__ERR_CODE__) do { if (!Logger::instance.checkErr((__ERR_CODE__), __FILE__, __LINE__)) throw std::exception(); } while (0)
#include "FsNeRF.h"
namespace fields
{
FsNeRF::FsNeRF(const std::string &netPath) : _net(nullptr) {
_net = new Net();
if (!_net->load(netPath)) {
dispose();
throw std::runtime_error("Failed to load net: " + netPath);
}
}
void FsNeRF::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
{
_net->bindResource("Encoded", resEncoded);
_net->bindResource("Depths", resDepths);
_net->bindResource("Colors", resColors);
}
bool FsNeRF::infer() { return _net->infer(); }
void FsNeRF::dispose()
{
if (_net != nullptr)
{
_net->dispose();
delete _net;
_net = nullptr;
}
}
}
\ No newline at end of file
#pragma once
#include "../utils/common.h"
#include "Net.h"
namespace fields
{
class FsNeRF
{
public:
FsNeRF(const std::string &netPath);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
virtual bool infer();
virtual void dispose();
private:
Net *_net;
};
}
\ No newline at end of file
#include "Encoder.h"
#include "../utils/cuda.h"
/// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
/// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
/// ...
/// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
/// cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
/// Dispatch (n, in_chns, n_freqs)
__global__ void cu_encode0(float *output, float *input, uint n, uint nFreqs) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
return;
uint inChns = blockDim.y;
uint outChns = inChns * (nFreqs * 2 + 1);
uint i = idx3.x, chn = idx3.y;
output[i * outChns + chn] = input[i * inChns + chn];
}
__global__ void cu_encode(float *output, float *input, float *freqs, uint n, bool catInput) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
return;
uint offset = (uint)catInput;
uint inChns = blockDim.y, nFreqs = blockDim.z;
uint i = idx3.x, chn = idx3.y, freq = idx3.z;
uint elem = i * inChns + chn;
uint outChns = inChns * (nFreqs * 2 + offset);
uint base = i * outChns + chn;
if (freq == 0 && catInput)
output[base] = input[elem];
float x = freqs[freq] * input[elem];
float s, c;
__sincosf(x, &s, &c);
output[base + inChns * (freq * 2 + offset)] = s;
output[base + inChns * (freq * 2 + offset + 1)] = c;
}
__global__ void cu_encode2(glm::vec2 *output, glm::vec2 *input, float *freqs, uint n) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
return;
uint nFreqs = blockDim.y;
uint i = idx3.x, freq = idx3.y;
uint outChns = nFreqs * 2 + 1;
uint base = i * outChns;
if (freq == 0)
output[base] = input[i];
glm::vec2 x = freqs[freq] * input[i];
glm::vec2 s, c;
__sincosf(x.x, &s.x, &c.x);
__sincosf(x.y, &s.y, &c.y);
output[base + (freq * 2 + 1)] = s;
output[base + (freq * 2 + 2)] = c;
}
/**
* @brief
*
* @param output encoded data, n x out_chns
* @param input coord data, n x in_chns
*/
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
std::ostringstream sout;
sout << "Encoder => input size: (" << input->n() / _chns << ", " << _chns << "), output size: ("
<< output->n() / outDim() << ", " << outDim() << ")";
//Logger::instance.info(sout.str());
uint n = input->n() / _chns;
dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
dim3 grdSize(ceilDiv(n, blkSize.x), 1, 1);
CU_INVOKE(cu_encode)(*output, *input, *_freqs, n, _catInput);
// blkSize = dim3(1024 / _chns, _chns);
// grdSize = dim3(ceilDiv(n, blkSize.x), 1, 1);
// CU_INVOKE(cu_encode0)(*output, *input, n, _multires);
CHECK_EX(cudaGetLastError());
}
void Encoder::_genFreqArray() {
float *arr = new float[_multires];
arr[0] = 1.0f;
for (auto i = 1; i < _multires; ++i)
arr[i] = arr[i - 1] * 2.0f;
_freqs = sptr<CudaArray<float>>(new CudaArray<float>(_multires));
cudaMemcpy(_freqs->getBuffer(), arr, _multires * sizeof(float), cudaMemcpyHostToDevice);
delete[] arr;
}
#pragma once
#include "../utils/common.h"
class Encoder {
public:
Encoder(unsigned int multires, unsigned int chns, bool catInput)
: _multires(multires), _chns(chns), _catInput(catInput) {
_genFreqArray();
}
unsigned int outDim() const { return _chns * ((int)_catInput + _multires * 2); }
void encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input);
private:
unsigned int _multires;
unsigned int _chns;
bool _catInput;
sptr<CudaArray<float>> _freqs;
void _genFreqArray();
};
\ No newline at end of file
#include "InferPipeline.h"
#include "Nmsl2.h"
InferPipeline::InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay, glm::vec2 depthRange,
uint encodeDim, uint coordChns)
: _nRays(nRays),
_nSamplesPerRay(nSamplesPerRay),
_coordChns(coordChns),
_net(net),
_sampler(new Sampler(depthRange, nSamplesPerRay, coordChns == 3)),
_encoder(new Encoder(encodeDim, coordChns)),
_renderer(new Renderer()) {
auto nSamples = _nRays * _nSamplesPerRay;
_coords = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * coordChns));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(nSamples));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * _encoder->outDim()));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(nSamples));
_net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get());
}
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
glm::vec3 origin, bool showPerf) {
rays = sptr<CudaArray<glm::vec3>>(rays->subArray(0, _nRays));
o_colors = sptr<CudaArray<glm::vec4>>(o_colors->subArray(0, _nRays));
CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered;
cudaEventRecord(eStart);
_sampler->sampleOnRays(_coords, _depths, rays, origin);
CHECK_EX(cudaDeviceSynchronize());
cudaEventRecord(eSampled);
_encoder->encode(_encoded, _coords);
CHECK_EX(cudaDeviceSynchronize());
cudaEventRecord(eEncoded);
_net->infer();
CHECK_EX(cudaDeviceSynchronize());
cudaEventRecord(eInferred);
_renderer->render(o_colors, _layeredColors);
cudaEventRecord(eRendered);
if (showPerf) {
CHECK_EX(cudaDeviceSynchronize());
float timeTotal, timeSample, timeEncode, timeInfer, timeRender;
cudaEventElapsedTime(&timeTotal, eStart, eRendered);
cudaEventElapsedTime(&timeSample, eStart, eSampled);
cudaEventElapsedTime(&timeEncode, eSampled, eEncoded);
cudaEventElapsedTime(&timeInfer, eEncoded, eInferred);
cudaEventElapsedTime(&timeRender, eInferred, eRendered);
std::ostringstream sout;
sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample
<< "ms, Encode: " << timeEncode << "ms, Infer: " << timeInfer
<< "ms, Render: " << timeRender << "ms)";
Logger::instance.info(sout.str().c_str());
}
/*
{
std::ostringstream sout;
sout << "Rays:" << std::endl;
dumpArray<glm::vec3, float>(sout, *rays, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Spherical coords:" << std::endl;
dumpArray(sout, *_coords, 10, _coordChns * _nSamplesPerRay);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Depths:" << std::endl;
dumpArray(sout, *_depths, 10, _nSamplesPerRay);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Encoded:" << std::endl;
dumpArray(sout, *_encoded, 10, _encoder->outDim() * _nSamplesPerRay);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Color:" << std::endl;
dumpArray<glm::vec4, float>(sout, *o_colors, 10);
Logger::instance.info(sout.str());
}
*/
}
\ No newline at end of file
#pragma once
#include "../utils/common.h"
#include "Sampler.h"
#include "Encoder.h"
#include "Renderer.h"
#include "Msl.h"
class InferPipeline {
public:
InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay,
glm::vec2 depthRange, uint encodeDim, uint coordChns);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin,
bool showPerf = false);
uint nRays() const { return _nRays; }
private:
uint _nRays;
uint _nSamplesPerRay;
uint _coordChns;
sptr<Msl> _net;
sptr<Sampler> _sampler;
sptr<Encoder> _encoder;
sptr<Renderer> _renderer;
sptr<CudaArray<float>> _coords;
sptr<CudaArray<float>> _depths;
sptr<CudaArray<float>> _encoded;
sptr<CudaArray<glm::vec4>> _layeredColors;
};
\ No newline at end of file
#include "NeuralSynthesis.h"
#include "InferPipeline.h"
#include "Enhancement.h"
#include "ImageGen.h"
constexpr auto NUM_LAYERS = 3u;
constexpr auto STEREO_FOVEA_R = NUM_LAYERS;
constexpr auto NUM_NETS = 2u;
class NeuralSynthesis_Impl {
public:
NeuralSynthesis_Impl(models::Model& model, Camera& cam);
void run(View& view);
GLuint getGlResultTexture(uint index);
private:
models::Model& model;
Camera& _cam;
uint _nRays;
sptr<InferPipeline> _infers[NUM_NETS];
sptr<Enhancement> _enhancements[NUM_LAYERS];
sptr<ImageGen> _imageGens[NUM_LAYERS + 1];
sptr<CudaArray<glm::vec3>> _rays;
sptr<CudaArray<glm::vec4>> _clrs;
sptr<CudaArray<glm::vec4>> _imageData[NUM_LAYERS + 1];
};
NeuralSynthesis_Impl::NeuralSynthesis_Impl(const std::string& dataDir, glm::vec2 depthRange,
uint nSamples[], uint encodeDim, uint coordChns, sptr<Camera> cam,
const std::vector<sptr<Camera>>& layerCams, bool stereo) :
_fullCam(cam), _stereo(stereo) {
// Load nets
for (uint i = 0; i < NUM_NETS; ++i)
_nets[i].reset(new Msl());
_nets[0]->load(dataDir + "/fovea.trt");
_nets[1]->load(dataDir + "/periph.trt");
// Init cams
for (uint i = 0; i < NUM_LAYERS; ++i)
_cams[i] = layerCams[i];
uint nRays[NUM_LAYERS];
uint nTotRays = 0;
for (uint i = 0; i < NUM_LAYERS; ++i)
nTotRays += nRays[i] = _cams[i]->nRays();
if (_stereo)
nTotRays += nRays[0];
// Init infers
_infers[0].reset(new InferPipeline(_nets[0], nRays[0], nSamples[0],
depthRange, encodeDim, coordChns));
_infers[1].reset(new InferPipeline(_nets[1], nRays[1] + nRays[2], nSamples[1],
depthRange, encodeDim, coordChns));
// Init image gens
for (uint i = 0; i < NUM_LAYERS; ++i)
_imageGens[i].reset(new ImageGen(_cams[i]->res()));
if (_stereo)
_imageGens[STEREO_FOVEA_R].reset(new ImageGen(_cams[0]->res()));
// Init enhancements
glm::vec2 enhancementParams[] = {
{3.0f, 0.2f}, {5.0f, 0.2f}, {5.0f, 0.2f}
};
for (uint i = 0; i < NUM_LAYERS; ++i)
_enhancements[i].reset(new Enhancement(_cams[i]->res(), enhancementParams[i]));
// Create buffers
_rays.reset(new CudaArray<glm::vec3>(nTotRays));
_clrs.reset(new CudaArray<glm::vec4>(nTotRays));
for (uint i = 0; i < NUM_LAYERS; ++i)
_imageData[i].reset(new CudaArray<glm::vec4>(_cams[i]->nPixels()));
if (_stereo)
_imageData[STEREO_FOVEA_R].reset(new CudaArray<glm::vec4>(_cams[0]->nPixels()));
}
void NeuralSynthesis_Impl::run(View& view, glm::vec2 foveaPos, bool showPerf, glm::vec2 foveaPosR) {
CudaEvent eStart, eGenRays, eInferred, eGenImage, eEnhance;
uint offset;
cudaEventRecord(eStart);
glm::vec2 foveaOffset(foveaPos - (glm::vec2)_fullCam->res() / 2.0f);
foveaOffset /= _fullCam->f();
glm::vec3 foveaOffset3(foveaOffset.x, foveaOffset.y, 0.0f);
glm::vec2 foveaOffsetR(foveaPosR - (glm::vec2)_fullCam->res() / 2.0f);
foveaOffsetR /= _fullCam->f();
glm::vec3 foveaOffset3R(foveaOffsetR.x, foveaOffsetR.y, 0.0f);
auto viewL = view.getStereoEye(0.06f, Eye_Left);
auto viewR = view.getStereoEye(0.06f, Eye_Right);
if (_stereo) {
offset = 0;
_cams[0]->getRays(sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), viewL, foveaOffset3);
offset += _cams[0]->nRays();
_cams[1]->getRays(sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), view, (foveaOffset3 + foveaOffset3R) / 2.0f);
offset += _cams[1]->nRays();
_cams[2]->getRays(sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), view, {});
offset += _cams[2]->nRays();
_cams[0]->getRays(sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), viewR, foveaOffset3R);
} else {
offset = 0;
for (uint i = 0; i < NUM_LAYERS; ++i) {
_cams[i]->getRays(sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)),
view, i == NUM_LAYERS - 1 ? glm::vec3() : foveaOffset3);
offset += _cams[i]->nRays();
}
}
cudaEventRecord(eGenRays);
if (_stereo) {
offset = 0;
_infers[0]->run(sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)),
sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), viewL.t(), showPerf);
offset += _infers[0]->nRays();
_infers[1]->run(sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)),
sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), view.t(), showPerf);
offset += _infers[1]->nRays();
_infers[0]->run(sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)),
sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), viewR.t(), showPerf);
} else {
offset = 0;
for (uint i = 0; i < NUM_NETS; ++i) {
_infers[i]->run(sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)),
sptr<CudaArray<glm::vec3>>(_rays->subArray(offset)), view.t(), showPerf);
offset += _infers[i]->nRays();
}
}
cudaEventRecord(eInferred);
offset = 0;
for (uint i = 0; i < NUM_LAYERS; ++i) {
_cams[i]->restoreImage(_imageData[i], sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)));
offset += _cams[i]->nRays();
}
if (_stereo)
_cams[0]->restoreImage(_imageData[STEREO_FOVEA_R], sptr<CudaArray<glm::vec4>>(_clrs->subArray(offset)));
cudaEventRecord(eGenImage);
for (uint i = 0; i < NUM_LAYERS; ++i)
_enhancements[i]->run(_imageData[i]);
if (_stereo)
_enhancements[0]->run(_imageData[STEREO_FOVEA_R]);
cudaEventRecord(eEnhance);
CHECK_EX(cudaDeviceSynchronize());
for (uint i = 0; i < NUM_LAYERS; ++i)
_imageGens[i]->run(_imageData[i]);
if (_stereo)
_imageGens[STEREO_FOVEA_R]->run(_imageData[STEREO_FOVEA_R]);
float timeTotal, timeGenRays, timeInfer, timeGenImage, timeEnhance;
cudaEventElapsedTime(&timeTotal, eStart, eGenImage);
cudaEventElapsedTime(&timeGenRays, eStart, eGenRays);
cudaEventElapsedTime(&timeInfer, eGenRays, eInferred);
cudaEventElapsedTime(&timeGenImage, eInferred, eGenImage);
cudaEventElapsedTime(&timeEnhance, eGenImage, eEnhance);
if (showPerf) {
std::ostringstream sout;
sout << "Synthesis => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
<< "ms, Infer: " << timeInfer << "ms, Gen image: " << timeGenImage
<< "ms, Enhance: " << timeEnhance << "ms)";
Logger::instance.info(sout.str().c_str());
}
}
GLuint NeuralSynthesis_Impl::getGlResultTexture(uint index) {
return _imageGens[index]->getGlResultTexture();
}
NeuralSynthesis::NeuralSynthesis(const std::string& dataDir, glm::vec2 depthRange,
uint nSamples[], uint encodeDim, uint coordChns, sptr<Camera> cam,
const std::vector<sptr<Camera>>& layerCams, bool stereo) :
_impl(new NeuralSynthesis_Impl(dataDir, depthRange, nSamples, encodeDim, coordChns, cam, layerCams, stereo)) {
}
void NeuralSynthesis::run(View& view, glm::vec2 foveaPos, bool showPerf, glm::vec2 foveaPosR) {
_impl->run(view, foveaPos, showPerf, foveaPosR);
}
GLuint NeuralSynthesis::getGlResultTexture(uint index) {
return _impl->getGlResultTexture(index);
}
#pragma once
#include "../utils/common.h"
#include "View.h"
#include "../models/Model.h"
class NeuralSynthesis_Impl;
class NeuralSynthesis {
public:
NeuralSynthesis(models::Model& model, Camera& cam);
void operator()(View& view);
GLuint getGlResultTexture(uint index);
private:
sptr<NeuralSynthesis_Impl> _impl;
};
\ No newline at end of file
#include "Sampler.h"
#define _USE_MATH_DEFINES
#include <math.h>
#include "../utils/cuda.h"
__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) {
float pp = glm::dot(p, p);
float vv = glm::dot(v, v);
float pv = glm::dot(p, v);
o_depth = (sqrtf(pv * pv - vv * (pp - r * r)) - pv) / vv;
return p + o_depth * v;
}
__device__ float _getAngle(float x, float y) {
return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI;
}
/**
* Dispatch with block_size=(n_samples, *), grid_size=(1, nRays/*)
* Index with (sample_idx, ray_idx)
*/
__global__ void cu_sampleOnRays(float *o_coords, float *o_depths, glm::vec3 *rays, uint nRays,
glm::vec3 origin, Range range, bool outputRadius) {
glm::uvec3 idx3 = IDX3;
uint idx = flattenIdx(idx3);
uint sampleIdx = idx3.x;
uint rayIdx = idx3.y;
if (rayIdx >= nRays)
return;
float r_reciprocal = range.get(sampleIdx);
glm::vec3 p = _raySphereIntersect(origin, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
glm::vec3 sp(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
if (outputRadius)
((glm::vec3 *)o_coords)[idx] = sp;
else
((glm::vec2 *)o_coords)[idx] = {sp.y, sp.z};
}
void Sampler::sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter) {
dim3 blkSize(_dispRange.steps(), 1024 / _dispRange.steps());
dim3 grdSize(1, (uint)ceil(rays->n() / (float)blkSize.y));
CU_INVOKE(cu_sampleOnRays)
(*o_coords, *o_depths, *rays, rays->n(), rayCenter, _dispRange, _outputRadius);
CHECK_EX(cudaGetLastError());
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment