Skip to content
Snippets Groups Projects
Commit c10f614f authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent dcba5844
Branches
No related merge requests found
Showing
with 367 additions and 393 deletions
---
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: true
AfterControlStatement: false
AfterEnum: true
AfterFunction: true
AfterNamespace: true
AfterObjCDeclaration: true
AfterStruct: true
AfterUnion: true
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
...
......@@ -11,5 +11,4 @@
"__nullptr": "cpp"
},
"python.pythonPath": "/home/dengnc/miniconda3/bin/python",
"jupyter.jupyterServerType": "local"
}
\ No newline at end of file
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "echo",
"type": "shell",
"command": "echo Hello",
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
\ No newline at end of file
#/usr/bin/bash
datadir='data/__new/classroom_fovea_r360x80_t0.6'
onnxdir="$datadir/eval_onnx"
trtdir="$datadir/eval_trt"
epochs=50
if [ ! -d "$onnxdir" ]; then
echo "make directory for ONNX"
mkdir $onnxdir
fi
if [ ! -d "$trtdir" ]; then
echo "make directory for TensorRT"
mkdir $trtdir
mkdir $trtdir/time
fi
# nets: 1, 2, 4, 8
# layers: 2, 4, 8
# channels: 64 128 256 512 1024
for n_nets in 1 2 4 8; do
for n_layers in 2 4 8; do
for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
exportname="eval_${n_nets}x${nf}x${n_layers}"
pth_path="$datadir/$configid/model-epoch_$epochs.pth"
onnx_path="$onnxdir/$exportname.onnx"
trt_path="$trtdir/$exportname.trt"
time_perf_path="$trtdir/time/$exportname.json"
if [ -f "$pth_path" ]; then
if [ ! -f "$onnx_path" ]; then
# Export ONNX model
python tools/export_snerf_fast.py $pth_path -b 65536 -o $onnx_path
fi
if [ ! -f "$trt_path" ]; then
# Export TensorRT engine
trtexec --onnx=$onnx_path --fp16 --saveEngine=$trt_path --workspace=4096 --exportTimes=$time_perf_path --noDataTransfers
fi
fi
done
done
done
\ No newline at end of file
......@@ -8,7 +8,7 @@ epochs=50
# nets: 1, 2, 4, 8
# layers: 2, 4, 8
# channels: 128 256 512
# channels: 64 128 256 512 1024
n_nets_arr=(1 2 4 8 1 2 4 8 1 2 4 8)
n_layers_arr=(2 2 2 2 4 4 4 4 8 8 8 8)
n_nets=${n_nets_arr[$testcase]}
......@@ -16,9 +16,7 @@ n_layers=${n_layers_arr[$testcase]}
for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
if [ -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
continue
fi
if [ ! -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
cont_epoch=0
for ((i=$epochs-1;i>0;i--)) do
if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
......@@ -31,6 +29,9 @@ for nf in 64 128 256 512 1024; do
else
python run_spherical_view_syn.py $trainset -i $configid -e $epochs
fi
fi
if ! ls $datadir/$configid/output_$epochs/perf_r120x80* >/dev/null 2>&1; then
python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
fi
done
\ No newline at end of file
......@@ -14,7 +14,6 @@ class FoveatedNeuralRenderer(object):
layers_res: List[Tuple[int, int]],
layers_net: nn.ModuleList,
output_res: Tuple[int, int], *,
using_mask=True,
device: torch.device = None):
super().__init__()
self.layers_net = layers_net.to(device=device)
......@@ -34,7 +33,6 @@ class FoveatedNeuralRenderer(object):
'normalized': True
}, output_res, device=device)
self.foveation = Foveation(layers_fov, layers_res, output_res, device=device)
self.layers_mask = self.foveation.get_layers_mask() if using_mask else None
self.device = device
def to(self, device: torch.device):
......@@ -43,8 +41,6 @@ class FoveatedNeuralRenderer(object):
self.cam.to(device)
for cam in self.layers_cam:
cam.to(device)
if self.layers_mask is not None:
self.layers_mask = self.layers_mask.to(device)
self.device = device
return self
......@@ -52,32 +48,46 @@ class FoveatedNeuralRenderer(object):
return self.render(*args, **kwds)
def render(self, view: Trans, gaze, right_gaze=None, *,
stereo_disparity=0, ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
stereo_disparity=0,
using_mask=True,
ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
if stereo_disparity > TINY_FLOAT:
left_view = Trans(
view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=view.device())),
view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=self.device)),
view.r)
right_view = Trans(
view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=view.device())),
view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=self.device)),
view.r)
left_gaze = gaze
right_gaze = gaze if right_gaze is None else right_gaze
left_layers_mask = self.foveation.get_layers_mask(left_gaze) \
if using_mask else [None] * 3
right_layers_mask = self.foveation.get_layers_mask(right_gaze) \
if using_mask else [None] * 3
res_raw_left = [
self._render(i, left_view, left_gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], left_view,
left_gaze if i < 2 else None,
layer_mask=left_layers_mask[i])['color']
for i in range(3)
]
res_raw_right = [
self._render(i, right_view, right_gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], right_view,
right_gaze if i < 2 else None,
layer_mask=right_layers_mask[i])['color']
for i in range(3)
]
return self._gen_output(res_raw_left, left_gaze, ret_raw), \
self._gen_output(res_raw_right, right_gaze, ret_raw)
else:
layers_mask = self.foveation.get_layers_mask(gaze) if using_mask else None
res_raw = [
self._render(i, view, gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], view, gaze if i < 2 else None,
layer_mask=layers_mask[i] if layers_mask is not None else None)['color']
for i in range(3)
]
return self._gen_output(res_raw, gaze, ret_raw)
'''
if mono_trans != None and shift == 0: # do warp
fovea_depth[torch.isnan(fovea_depth)] = 50
......@@ -105,25 +115,25 @@ class FoveatedNeuralRenderer(object):
], (gaze[0], gaze[1]), [0, shift, shift] if shift != 0 else None)
'''
def _render(self, layer: int, view: Trans, gaze=None, ret_depth=False) -> Mapping[str, torch.Tensor]:
net = self.layers_net[layer]
cam = self.layers_cam[layer]
def _render(self, net, cam: CameraParam, view: Trans, gaze=None, *,
ret_depth=False,
layer_mask=None) -> Mapping[str, torch.Tensor]:
if gaze is not None:
cam = self._adjust_cam(cam, gaze)
rays_o, rays_d = cam.get_global_rays(view, True) # (1, N, 3)
if self.layers_mask is not None and layer < len(self.layers_mask):
mask = self.layers_mask[layer] >= 0
rays_o = rays_o[:, mask]
rays_d = rays_d[:, mask]
rays_o, rays_d = cam.get_global_rays(view, False) # (1, H, W, 3)
if layer_mask is not None:
infer_mask = layer_mask >= 0
rays_o = rays_o[:, infer_mask]
rays_d = rays_d[:, infer_mask]
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
ret = {
'color': torch.zeros(1, cam.res[0], cam.res[1], 3)
'color': torch.zeros(1, cam.res[0], cam.res[1], 3, device=self.device)
}
ret['color'][:, mask] = net_output['color']
ret['color'][:, infer_mask] = net_output['color']
ret['color'] = ret['color'].permute(0, 3, 1, 2)
if ret_depth:
ret['depth'] = torch.zeros(1, cam.res[0], cam.res[1])
ret['depth'][:, mask] = net_output['depth']
ret['depth'][:, infer_mask] = net_output['depth']
return ret
else:
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
......@@ -140,7 +150,7 @@ class FoveatedNeuralRenderer(object):
'blended': blended
}
if ret_raw:
ret['layers_raw'] = layers_img,
ret['layers_raw'] = layers_img
ret['blended_raw'] = self.foveation.synthesis(layers_img, gaze)
return ret
......
......@@ -31,7 +31,7 @@ class Foveation(object):
def synthesis(self, layers: List[torch.Tensor],
fovea_center: Tuple[float, float],
shifts: List[int] = None) -> torch.Tensor:
shifts: List[int] = None, do_blend=True) -> torch.Tensor:
"""
Generate foveated retinal image by blending fovea layers
**Note: current implementation only support two fovea layers**
......@@ -55,8 +55,12 @@ class Foveation(object):
if shifts != None:
grid = img.horizontal_shift(grid, shifts[i], -2)
# (1, 1, H:out, W:out)
blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None, ...], grid)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid) * blend)
if do_blend:
blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None], grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
else:
blend = nn_f.grid_sample(torch.ones_like(self.eye_fovea_blend[i][None, None]), grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
return output
def get_layer_size_in_final_image(self, i: int) -> int:
......@@ -94,7 +98,7 @@ class Foveation(object):
r = torch.norm(p - R, dim=2) # (size, size, 2)
return misc.smooth_step(R, R * self.blend, r)
def get_layers_mask(self) -> List[torch.Tensor]:
def get_layers_mask(self, gaze) -> List[torch.Tensor]:
"""
Generate mask images for layers[:-1]
the meaning of values in mask images:
......@@ -106,12 +110,23 @@ class Foveation(object):
:return: Mask images for layers except outermost
"""
layers_mask = []
for i in range(self.n_layers - 1):
for i in range(self.n_layers):
layers_mask.append(torch.ones(*self.layers_res[i], device=self.device) * -1)
r = torch.norm(misc.meshgrid(*self.layers_res[i], normalize=True).to(device=self.device) * 2 - 1, dim=-1)
if i == self.n_layers - 1:
c = torch.tensor([
(gaze[0] + 0.5 * self.out_res[1]) / self.out_res[0],
(gaze[1] + 0.5 * self.out_res[0]) / self.out_res[0]
], device=self.device)
else:
c = torch.tensor([0.5, 0.5], device=self.device)
coord = misc.meshgrid(*self.layers_res[i]).to(device=self.device) / self.layers_res[i][0]
r = 2 * torch.norm(coord - c, dim=-1)
inner_radius = self.get_source_layer_cover_size_in_target_layer(
self.layers_fov[i - 1], self.layers_fov[i],
self.layers_res[i][0]) / self.layers_res[i][0] if i > 0 else 0
self.layers_fov[i - 1], self.layers_fov[i], self.layers_res[i][0]) / self.layers_res[i][0] \
if i > 0 else 0
if i == self.n_layers - 1:
bounds = [inner_radius * (1 - self.blend), inner_radius, 100, 100]
else:
bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
for bi in range(len(bounds) - 1):
region = torch.logical_and(r > bounds[bi], r <= bounds[bi + 1])
......
......@@ -128,7 +128,7 @@ endif
#########################
INCPATHS=
LIBPATHS=
COMMON_LIBS= -lGLEW -lglfw3 -lGL -lX11 -lpthread -lXrandr -lXinerama -lXcursor -lXi -ldl
COMMON_LIBS= -lGLEW -lglfw -lGL -lX11 -lpthread -lXrandr #-lXinerama -lXcursor -lXi -ldl
# Add extra libraries if TRT_STATIC is enabled
ifeq ($(TRT_STATIC), 1)
......@@ -207,7 +207,7 @@ else ifeq ($(TARGET), aarch64)
endif
endif
ifeq ($(ENABLE_MYELIN), 1)
COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
#COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
endif
.SUFFIXES:
......
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"
inline unsigned int getElementSize(nv::DataType t)
{
switch (t)
{
case nv::DataType::kINT32:
return 4;
case nv::DataType::kFLOAT:
return 4;
case nv::DataType::kHALF:
return 2;
case nv::DataType::kBOOL:
case nv::DataType::kINT8:
return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
template <typename T>
void dumpRow(std::ostream &os, T* buf, size_t n)
{
os << buf[0];
for (size_t i = 1; i < n; ++i) {
os << " " << buf[i];
}
os << std::endl;
}
template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount, size_t maxDumpRows = 0)
{
T *typedBuf = static_cast<T *>(buf);
size_t numItems = bufSize / sizeof(T);
size_t nInLastRow = numItems % rowCount;
size_t rows;
if (nInLastRow == 0) {
rows = numItems / rowCount;
nInLastRow = rowCount;
} else {
rows = numItems / rowCount + 1;
}
if (maxDumpRows == 0) {
for (size_t i = 0; i < rows - 1; ++i) {
dumpRow(os, typedBuf, rowCount);
typedBuf += rowCount;
}
dumpRow(os, typedBuf, nInLastRow);
} else {
for (size_t i = 0; i < maxDumpRows / 2; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
os << "..." << std::endl;
for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
}
}
class CudaStream
{
public:
CudaStream()
{
cudaStreamCreate(&stream);
}
operator cudaStream_t()
{
return stream;
}
virtual ~CudaStream()
{
cudaStreamDestroy(stream);
}
private:
cudaStream_t stream;
};
class CudaEvent
{
public:
CudaEvent()
{
cudaEventCreate(&mEvent);
}
operator cudaEvent_t()
{
return mEvent;
}
virtual ~CudaEvent()
{
cudaEventDestroy(mEvent);
}
private:
cudaEvent_t mEvent;
};
struct CudaMapScope
{
std::vector<cudaGraphicsResource_t> resources_;
cudaStream_t stream_;
CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
cudaStream_t stream = nullptr) : resources_(resources), stream_(stream) {}
~CudaMapScope()
{
if (!resources_.empty())
cudaGraphicsUnmapResources(resources_.size(),
resources_.data(), stream_);
}
cudaError_t map()
{
if (!resources_.empty())
return cudaGraphicsMapResources(resources_.size(),
resources_.data(), stream_);
return cudaSuccess;
}
};
template <typename T>
struct Destroy
{
void operator()(T *t)
{
if (t != nullptr)
t->destroy();
}
};
template <class T>
using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T>
using sptr = std::shared_ptr<T>;
#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
#include "Resource.h"
#include "Formatter.h"
\ No newline at end of file
#include "Encoder.h"
#include "thread_index.h"
#include "../utils/cuda.h"
/// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
/// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
......@@ -7,12 +7,11 @@
/// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
/// cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
/// Dispatch (n_batch, n_chns, n_freqs)
__global__ void cu_encode(float *output, float *input, float *freqs, uint n)
{
__global__ void cu_encode(float *output, float *input, float *freqs, uint n) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
return;
uint n = blockDim.x, inChns = blockDim.y, nFreqs = blockDim.z;
uint inChns = blockDim.y, nFreqs = blockDim.z;
uint i = idx3.x, chn = idx3.y, freq = idx3.z;
uint elem = i * inChns + chn;
uint outChns = inChns * (nFreqs * 2 + 1);
......@@ -26,16 +25,14 @@ __global__ void cu_encode(float *output, float *input, float *freqs, uint n)
output[base + inChns * (freq * 2 + 2)] = c;
}
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input)
{
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
dim3 grdSize((uint)ceil(input->n() / (float)blkSize.x), 1, 1);
cu_encode<<<grdSize, blkSize>>>(output->getBuffer(), *input, *_freqs, input->n());
CU_INVOKE(cu_encode)(output->getBuffer<float>(), *input, *_freqs, input->n());
CHECK_EX(cudaGetLastError());
}
void Encoder::_genFreqArray()
{
void Encoder::_genFreqArray() {
float *arr = new float[_multires];
arr[0] = 1.0f;
for (auto i = 1; i < _multires; ++i)
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Encoder {
public:
......@@ -14,5 +14,4 @@ private:
sptr<CudaArray<float>> _freqs;
void _genFreqArray();
};
\ No newline at end of file
#include "Enhancement.h"
#include "thread_index.h"
#include "../utils/cuda.h"
#define max(__a__, __b__) (__a__ > __b__ ? __a__ : __b__)
#define min(__a__, __b__) (__a__ < __b__ ? __a__ : __b__)
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Enhancement
{
......
#include "InferPipeline.h"
#include "Nmsl2.h"
InferPipeline::InferPipeline(
const std::string &netDir, bool isNmsl, uint batchSize,
uint samples) : _batchSize(batchSize),
_samples(samples),
_sampler(new Sampler({1.0f, 50.0f}, samples)),
_encoder(new Encoder(10, 3)),
_renderer(new Renderer()),
_net(isNmsl ? new Nmsl2(batchSize, samples) : new Msl(batchSize, samples))
{
uint batchSizeForNet = _batchSize * _samples;
_sphericalCoords = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(batchSizeForNet));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet * _encoder->outDim()));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(batchSizeForNet));
_net->load(netDir);
InferPipeline::InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay, glm::vec2 depthRange,
int encodeDim, int coordChns)
: _nRays(nRays),
_nSamplesPerRay(nSamplesPerRay),
_net(net),
_sampler(new Sampler(depthRange, nSamplesPerRay, coordChns == 3)),
_encoder(new Encoder(encodeDim, coordChns)),
_renderer(new Renderer()) {
uint nSamples = _nRays * _nSamplesPerRay;
_coords = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * coordChns));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(nSamples));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * _encoder->outDim()));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(nSamples));
_net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get());
}
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayOrigin, bool showPerf)
{
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
glm::vec3 origin, bool showPerf) {
CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered;
cudaEventRecord(eStart);
_sampler->sampleOnRays(_sphericalCoords, _depths, rays, rayOrigin);
_sampler->sampleOnRays(_coords, _depths, rays, origin);
cudaEventRecord(eSampled);
sptr<CudaArray<float>> coords(new CudaArray<float>((float *)_sphericalCoords->getBuffer(),
_sphericalCoords->n() * 3));
_encoder->encode(_encoded, coords);
_encoder->encode(_encoded, _coords);
cudaEventRecord(eEncoded);
......@@ -46,8 +40,7 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
cudaEventRecord(eRendered);
if (showPerf)
{
if (showPerf) {
CHECK_EX(cudaDeviceSynchronize());
float timeTotal, timeSample, timeEncode, timeInfer, timeRender;
......@@ -59,8 +52,8 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
std::ostringstream sout;
sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample
<< "ms, Encode: " << timeEncode << "ms, Infer: "
<< timeInfer << "ms, Render: " << timeRender << "ms)";
<< "ms, Encode: " << timeEncode << "ms, Infer: " << timeInfer
<< "ms, Render: " << timeRender << "ms)";
Logger::instance.info(sout.str());
}
/*
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
#include "../msl_infer/Sampler.h"
#include "../msl_infer/Encoder.h"
#include "../msl_infer/Renderer.h"
#include "../msl_infer/Msl.h"
class InferPipeline
{
class InferPipeline {
public:
InferPipeline(const std::string &netDir, bool isNmsl, uint batchSize, uint samples);
InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay,
glm::vec2 depthRange, int encodeDim, int coordChns);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayOrigin, bool showPerf = false);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin,
bool showPerf = false);
private:
uint _batchSize;
uint _samples;
uint _nRays;
uint _nSamplesPerRay;
sptr<Msl> _net;
sptr<Sampler> _sampler;
sptr<Encoder> _encoder;
sptr<Renderer> _renderer;
sptr<Msl> _net;
sptr<CudaArray<glm::vec3>> _sphericalCoords;
sptr<CudaArray<float>> _coords;
sptr<CudaArray<float>> _depths;
sptr<CudaArray<float>> _encoded;
sptr<CudaArray<glm::vec4>> _layeredColors;
......
#include "Msl.h"
#include <time.h>
Msl::Msl(int batchSize, int samples) : batchSize(batchSize), samples(samples), net(nullptr) {}
Msl::Msl() : net(nullptr) {}
bool Msl::load(const std::string &netDir)
{
bool Msl::load(const std::string &netPath) {
net = new Net();
if (!net->load(netDir + "msl.trt"))
return false;
if (net->load(netPath))
return true;
dispose();
return false;
}
void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
{
void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
net->bindResource("Encoded", resEncoded);
net->bindResource("Depths", resDepths);
net->bindResource("Colors", resColors);
}
bool Msl::infer()
{
if (!net->infer())
return false;
return true;
}
bool Msl::infer() { return net->infer(); }
bool Msl::dispose()
{
if (net != nullptr)
{
void Msl::dispose() {
if (net != nullptr) {
net->dispose();
delete net;
net = nullptr;
}
return true;
}
#pragma once
#include "Common.h"
#include "../utils/common.h"
#include "Net.h"
class Msl
{
class Msl {
public:
int batchSize;
int samples;
Net *net;
Msl(int batchSize, int samples);
Msl();
virtual bool load(const std::string &netDir);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
virtual bool infer();
virtual bool dispose();
virtual void dispose();
};
#include "half.h"
#include "../utils/half.h"
#include "Net.h"
#include <fstream>
#include <numeric>
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Net {
......
#include "Nmsl2.h"
#include <time.h>
Nmsl2::Nmsl2(int batchSize, int samples) : Msl(batchSize, samples),
resRaw1(nullptr), resRaw2(nullptr),
fcNet1(nullptr), fcNet2(nullptr), catNet(nullptr) {}
Nmsl2::Nmsl2(int batchSize, int samples)
: batchSize(batchSize),
samples(samples),
resRaw1(nullptr),
resRaw2(nullptr),
fcNet1(nullptr),
fcNet2(nullptr),
catNet(nullptr) {}
bool Nmsl2::load(const std::string &netDir)
{
bool Nmsl2::load(const std::string &netDir) {
fcNet1 = new Net();
fcNet2 = new Net();
catNet = new Net();
......@@ -18,8 +22,7 @@ bool Nmsl2::load(const std::string &netDir)
return true;
}
void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
{
void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
fcNet1->bindResource("Encoded", resEncoded);
fcNet1->bindResource("Raw", resRaw1.get());
fcNet2->bindResource("Encoded", resEncoded);
......@@ -30,8 +33,7 @@ void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *r
catNet->bindResource("Colors", resColors);
}
bool Nmsl2::infer()
{
bool Nmsl2::infer() {
// CudaStream stream1, stream2;
if (!fcNet1->infer())
return false;
......@@ -42,27 +44,22 @@ bool Nmsl2::infer()
return true;
}
bool Nmsl2::dispose()
{
if (fcNet1 != nullptr)
{
void Nmsl2::dispose() {
if (fcNet1 != nullptr) {
fcNet1->dispose();
delete fcNet1;
fcNet1 = nullptr;
}
if (fcNet2 != nullptr)
{
if (fcNet2 != nullptr) {
fcNet2->dispose();
delete fcNet2;
fcNet2 = nullptr;
}
if (catNet != nullptr)
{
if (catNet != nullptr) {
catNet->dispose();
delete catNet;
catNet = nullptr;
}
resRaw1 = nullptr;
resRaw2 = nullptr;
return true;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment