Commit c10f614f authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent dcba5844
---
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: true
AfterControlStatement: false
AfterEnum: true
AfterFunction: true
AfterNamespace: true
AfterObjCDeclaration: true
AfterStruct: true
AfterUnion: true
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
...
......@@ -11,5 +11,4 @@
"__nullptr": "cpp"
},
"python.pythonPath": "/home/dengnc/miniconda3/bin/python",
"jupyter.jupyterServerType": "local"
}
\ No newline at end of file
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "echo",
"type": "shell",
"command": "echo Hello",
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
\ No newline at end of file
#/usr/bin/bash
datadir='data/__new/classroom_fovea_r360x80_t0.6'
onnxdir="$datadir/eval_onnx"
trtdir="$datadir/eval_trt"
epochs=50
if [ ! -d "$onnxdir" ]; then
echo "make directory for ONNX"
mkdir $onnxdir
fi
if [ ! -d "$trtdir" ]; then
echo "make directory for TensorRT"
mkdir $trtdir
mkdir $trtdir/time
fi
# nets: 1, 2, 4, 8
# layers: 2, 4, 8
# channels: 64 128 256 512 1024
for n_nets in 1 2 4 8; do
for n_layers in 2 4 8; do
for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
exportname="eval_${n_nets}x${nf}x${n_layers}"
pth_path="$datadir/$configid/model-epoch_$epochs.pth"
onnx_path="$onnxdir/$exportname.onnx"
trt_path="$trtdir/$exportname.trt"
time_perf_path="$trtdir/time/$exportname.json"
if [ -f "$pth_path" ]; then
if [ ! -f "$onnx_path" ]; then
# Export ONNX model
python tools/export_snerf_fast.py $pth_path -b 65536 -o $onnx_path
fi
if [ ! -f "$trt_path" ]; then
# Export TensorRT engine
trtexec --onnx=$onnx_path --fp16 --saveEngine=$trt_path --workspace=4096 --exportTimes=$time_perf_path --noDataTransfers
fi
fi
done
done
done
\ No newline at end of file
......@@ -8,7 +8,7 @@ epochs=50
# nets: 1, 2, 4, 8
# layers: 2, 4, 8
# channels: 128 256 512
# channels: 64 128 256 512 1024
n_nets_arr=(1 2 4 8 1 2 4 8 1 2 4 8)
n_layers_arr=(2 2 2 2 4 4 4 4 8 8 8 8)
n_nets=${n_nets_arr[$testcase]}
......@@ -16,21 +16,22 @@ n_layers=${n_layers_arr[$testcase]}
for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
if [ -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
continue
fi
cont_epoch=0
for ((i=$epochs-1;i>0;i--)) do
if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
cont_epoch=$i
break
if [ ! -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
cont_epoch=0
for ((i=$epochs-1;i>0;i--)) do
if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
cont_epoch=$i
break
fi
done
if [ ${cont_epoch} -gt 0 ]; then
python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
else
python run_spherical_view_syn.py $trainset -i $configid -e $epochs
fi
done
if [ ${cont_epoch} -gt 0 ]; then
python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
else
python run_spherical_view_syn.py $trainset -i $configid -e $epochs
fi
python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
done
if ! ls $datadir/$configid/output_$epochs/perf_r120x80* >/dev/null 2>&1; then
python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
fi
done
\ No newline at end of file
......@@ -14,7 +14,6 @@ class FoveatedNeuralRenderer(object):
layers_res: List[Tuple[int, int]],
layers_net: nn.ModuleList,
output_res: Tuple[int, int], *,
using_mask=True,
device: torch.device = None):
super().__init__()
self.layers_net = layers_net.to(device=device)
......@@ -34,7 +33,6 @@ class FoveatedNeuralRenderer(object):
'normalized': True
}, output_res, device=device)
self.foveation = Foveation(layers_fov, layers_res, output_res, device=device)
self.layers_mask = self.foveation.get_layers_mask() if using_mask else None
self.device = device
def to(self, device: torch.device):
......@@ -43,8 +41,6 @@ class FoveatedNeuralRenderer(object):
self.cam.to(device)
for cam in self.layers_cam:
cam.to(device)
if self.layers_mask is not None:
self.layers_mask = self.layers_mask.to(device)
self.device = device
return self
......@@ -52,32 +48,46 @@ class FoveatedNeuralRenderer(object):
return self.render(*args, **kwds)
def render(self, view: Trans, gaze, right_gaze=None, *,
stereo_disparity=0, ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
stereo_disparity=0,
using_mask=True,
ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
if stereo_disparity > TINY_FLOAT:
left_view = Trans(
view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=view.device())),
view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=self.device)),
view.r)
right_view = Trans(
view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=view.device())),
view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=self.device)),
view.r)
left_gaze = gaze
right_gaze = gaze if right_gaze is None else right_gaze
left_layers_mask = self.foveation.get_layers_mask(left_gaze) \
if using_mask else [None] * 3
right_layers_mask = self.foveation.get_layers_mask(right_gaze) \
if using_mask else [None] * 3
res_raw_left = [
self._render(i, left_view, left_gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], left_view,
left_gaze if i < 2 else None,
layer_mask=left_layers_mask[i])['color']
for i in range(3)
]
res_raw_right = [
self._render(i, right_view, right_gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], right_view,
right_gaze if i < 2 else None,
layer_mask=right_layers_mask[i])['color']
for i in range(3)
]
return self._gen_output(res_raw_left, left_gaze, ret_raw), \
self._gen_output(res_raw_right, right_gaze, ret_raw)
else:
layers_mask = self.foveation.get_layers_mask(gaze) if using_mask else None
res_raw = [
self._render(i, view, gaze if i < 2 else None)['color']
self._render(self.layers_net[i], self.layers_cam[i], view, gaze if i < 2 else None,
layer_mask=layers_mask[i] if layers_mask is not None else None)['color']
for i in range(3)
]
return self._gen_output(res_raw, gaze, ret_raw)
'''
if mono_trans != None and shift == 0: # do warp
fovea_depth[torch.isnan(fovea_depth)] = 50
......@@ -105,25 +115,25 @@ class FoveatedNeuralRenderer(object):
], (gaze[0], gaze[1]), [0, shift, shift] if shift != 0 else None)
'''
def _render(self, layer: int, view: Trans, gaze=None, ret_depth=False) -> Mapping[str, torch.Tensor]:
net = self.layers_net[layer]
cam = self.layers_cam[layer]
def _render(self, net, cam: CameraParam, view: Trans, gaze=None, *,
ret_depth=False,
layer_mask=None) -> Mapping[str, torch.Tensor]:
if gaze is not None:
cam = self._adjust_cam(cam, gaze)
rays_o, rays_d = cam.get_global_rays(view, True) # (1, N, 3)
if self.layers_mask is not None and layer < len(self.layers_mask):
mask = self.layers_mask[layer] >= 0
rays_o = rays_o[:, mask]
rays_d = rays_d[:, mask]
rays_o, rays_d = cam.get_global_rays(view, False) # (1, H, W, 3)
if layer_mask is not None:
infer_mask = layer_mask >= 0
rays_o = rays_o[:, infer_mask]
rays_d = rays_d[:, infer_mask]
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
ret = {
'color': torch.zeros(1, cam.res[0], cam.res[1], 3)
'color': torch.zeros(1, cam.res[0], cam.res[1], 3, device=self.device)
}
ret['color'][:, mask] = net_output['color']
ret['color'][:, infer_mask] = net_output['color']
ret['color'] = ret['color'].permute(0, 3, 1, 2)
if ret_depth:
ret['depth'] = torch.zeros(1, cam.res[0], cam.res[1])
ret['depth'][:, mask] = net_output['depth']
ret['depth'][:, infer_mask] = net_output['depth']
return ret
else:
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
......@@ -140,7 +150,7 @@ class FoveatedNeuralRenderer(object):
'blended': blended
}
if ret_raw:
ret['layers_raw'] = layers_img,
ret['layers_raw'] = layers_img
ret['blended_raw'] = self.foveation.synthesis(layers_img, gaze)
return ret
......
......@@ -31,7 +31,7 @@ class Foveation(object):
def synthesis(self, layers: List[torch.Tensor],
fovea_center: Tuple[float, float],
shifts: List[int] = None) -> torch.Tensor:
shifts: List[int] = None, do_blend=True) -> torch.Tensor:
"""
Generate foveated retinal image by blending fovea layers
**Note: current implementation only support two fovea layers**
......@@ -55,8 +55,12 @@ class Foveation(object):
if shifts != None:
grid = img.horizontal_shift(grid, shifts[i], -2)
# (1, 1, H:out, W:out)
blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None, ...], grid)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid) * blend)
if do_blend:
blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None], grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
else:
blend = nn_f.grid_sample(torch.ones_like(self.eye_fovea_blend[i][None, None]), grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
return output
def get_layer_size_in_final_image(self, i: int) -> int:
......@@ -94,7 +98,7 @@ class Foveation(object):
r = torch.norm(p - R, dim=2) # (size, size, 2)
return misc.smooth_step(R, R * self.blend, r)
def get_layers_mask(self) -> List[torch.Tensor]:
def get_layers_mask(self, gaze) -> List[torch.Tensor]:
"""
Generate mask images for layers[:-1]
the meaning of values in mask images:
......@@ -106,15 +110,26 @@ class Foveation(object):
:return: Mask images for layers except outermost
"""
layers_mask = []
for i in range(self.n_layers - 1):
for i in range(self.n_layers):
layers_mask.append(torch.ones(*self.layers_res[i], device=self.device) * -1)
r = torch.norm(misc.meshgrid(*self.layers_res[i], normalize=True).to(device=self.device) * 2 - 1, dim=-1)
if i == self.n_layers - 1:
c = torch.tensor([
(gaze[0] + 0.5 * self.out_res[1]) / self.out_res[0],
(gaze[1] + 0.5 * self.out_res[0]) / self.out_res[0]
], device=self.device)
else:
c = torch.tensor([0.5, 0.5], device=self.device)
coord = misc.meshgrid(*self.layers_res[i]).to(device=self.device) / self.layers_res[i][0]
r = 2 * torch.norm(coord - c, dim=-1)
inner_radius = self.get_source_layer_cover_size_in_target_layer(
self.layers_fov[i - 1], self.layers_fov[i],
self.layers_res[i][0]) / self.layers_res[i][0] if i > 0 else 0
bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
self.layers_fov[i - 1], self.layers_fov[i], self.layers_res[i][0]) / self.layers_res[i][0] \
if i > 0 else 0
if i == self.n_layers - 1:
bounds = [inner_radius * (1 - self.blend), inner_radius, 100, 100]
else:
bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
for bi in range(len(bounds) - 1):
region = torch.logical_and(r > bounds[bi], r <= bounds[bi + 1])
layers_mask[i][region] = bi + \
(r[region] - bounds[bi]) / (bounds[bi + 1] - bounds[bi])
return layers_mask
\ No newline at end of file
return layers_mask
......@@ -128,7 +128,7 @@ endif
#########################
INCPATHS=
LIBPATHS=
COMMON_LIBS= -lGLEW -lglfw3 -lGL -lX11 -lpthread -lXrandr -lXinerama -lXcursor -lXi -ldl
COMMON_LIBS= -lGLEW -lglfw -lGL -lX11 -lpthread -lXrandr #-lXinerama -lXcursor -lXi -ldl
# Add extra libraries if TRT_STATIC is enabled
ifeq ($(TRT_STATIC), 1)
......@@ -207,7 +207,7 @@ else ifeq ($(TARGET), aarch64)
endif
endif
ifeq ($(ENABLE_MYELIN), 1)
COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
#COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
endif
.SUFFIXES:
......
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"
inline unsigned int getElementSize(nv::DataType t)
{
switch (t)
{
case nv::DataType::kINT32:
return 4;
case nv::DataType::kFLOAT:
return 4;
case nv::DataType::kHALF:
return 2;
case nv::DataType::kBOOL:
case nv::DataType::kINT8:
return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
template <typename T>
void dumpRow(std::ostream &os, T* buf, size_t n)
{
os << buf[0];
for (size_t i = 1; i < n; ++i) {
os << " " << buf[i];
}
os << std::endl;
}
template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount, size_t maxDumpRows = 0)
{
T *typedBuf = static_cast<T *>(buf);
size_t numItems = bufSize / sizeof(T);
size_t nInLastRow = numItems % rowCount;
size_t rows;
if (nInLastRow == 0) {
rows = numItems / rowCount;
nInLastRow = rowCount;
} else {
rows = numItems / rowCount + 1;
}
if (maxDumpRows == 0) {
for (size_t i = 0; i < rows - 1; ++i) {
dumpRow(os, typedBuf, rowCount);
typedBuf += rowCount;
}
dumpRow(os, typedBuf, nInLastRow);
} else {
for (size_t i = 0; i < maxDumpRows / 2; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
os << "..." << std::endl;
for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
}
}
class CudaStream
{
public:
CudaStream()
{
cudaStreamCreate(&stream);
}
operator cudaStream_t()
{
return stream;
}
virtual ~CudaStream()
{
cudaStreamDestroy(stream);
}
private:
cudaStream_t stream;
};
class CudaEvent
{
public:
CudaEvent()
{
cudaEventCreate(&mEvent);
}
operator cudaEvent_t()
{
return mEvent;
}
virtual ~CudaEvent()
{
cudaEventDestroy(mEvent);
}
private:
cudaEvent_t mEvent;
};
struct CudaMapScope
{
std::vector<cudaGraphicsResource_t> resources_;
cudaStream_t stream_;
CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
cudaStream_t stream = nullptr) : resources_(resources), stream_(stream) {}
~CudaMapScope()
{
if (!resources_.empty())
cudaGraphicsUnmapResources(resources_.size(),
resources_.data(), stream_);
}
cudaError_t map()
{
if (!resources_.empty())
return cudaGraphicsMapResources(resources_.size(),
resources_.data(), stream_);
return cudaSuccess;
}
};
template <typename T>
struct Destroy
{
void operator()(T *t)
{
if (t != nullptr)
t->destroy();
}
};
template <class T>
using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T>
using sptr = std::shared_ptr<T>;
#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
#include "Resource.h"
#include "Formatter.h"
\ No newline at end of file
#include "Encoder.h"
#include "thread_index.h"
#include "../utils/cuda.h"
/// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
/// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
......@@ -7,12 +7,11 @@
/// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
/// cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
/// Dispatch (n_batch, n_chns, n_freqs)
__global__ void cu_encode(float *output, float *input, float *freqs, uint n)
{
__global__ void cu_encode(float *output, float *input, float *freqs, uint n) {
glm::uvec3 idx3 = IDX3;
if (idx3.x >= n)
return;
uint n = blockDim.x, inChns = blockDim.y, nFreqs = blockDim.z;
uint inChns = blockDim.y, nFreqs = blockDim.z;
uint i = idx3.x, chn = idx3.y, freq = idx3.z;
uint elem = i * inChns + chn;
uint outChns = inChns * (nFreqs * 2 + 1);
......@@ -26,16 +25,14 @@ __global__ void cu_encode(float *output, float *input, float *freqs, uint n)
output[base + inChns * (freq * 2 + 2)] = c;
}
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input)
{
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
dim3 grdSize((uint)ceil(input->n() / (float)blkSize.x), 1, 1);
cu_encode<<<grdSize, blkSize>>>(output->getBuffer(), *input, *_freqs, input->n());
CU_INVOKE(cu_encode)(output->getBuffer<float>(), *input, *_freqs, input->n());
CHECK_EX(cudaGetLastError());
}
void Encoder::_genFreqArray()
{
void Encoder::_genFreqArray() {
float *arr = new float[_multires];
arr[0] = 1.0f;
for (auto i = 1; i < _multires; ++i)
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Encoder {
public:
......@@ -14,5 +14,4 @@ private:
sptr<CudaArray<float>> _freqs;
void _genFreqArray();
};
\ No newline at end of file
#include "Enhancement.h"
#include "thread_index.h"
#include "../utils/cuda.h"
#define max(__a__, __b__) (__a__ > __b__ ? __a__ : __b__)
#define min(__a__, __b__) (__a__ < __b__ ? __a__ : __b__)
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Enhancement
{
......
#include "InferPipeline.h"
#include "Nmsl2.h"
InferPipeline::InferPipeline(
const std::string &netDir, bool isNmsl, uint batchSize,
uint samples) : _batchSize(batchSize),
_samples(samples),
_sampler(new Sampler({1.0f, 50.0f}, samples)),
_encoder(new Encoder(10, 3)),
_renderer(new Renderer()),
_net(isNmsl ? new Nmsl2(batchSize, samples) : new Msl(batchSize, samples))
{
uint batchSizeForNet = _batchSize * _samples;
_sphericalCoords = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(batchSizeForNet));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet * _encoder->outDim()));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(batchSizeForNet));
_net->load(netDir);
InferPipeline::InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay, glm::vec2 depthRange,
int encodeDim, int coordChns)
: _nRays(nRays),
_nSamplesPerRay(nSamplesPerRay),
_net(net),
_sampler(new Sampler(depthRange, nSamplesPerRay, coordChns == 3)),
_encoder(new Encoder(encodeDim, coordChns)),
_renderer(new Renderer()) {
uint nSamples = _nRays * _nSamplesPerRay;
_coords = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * coordChns));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(nSamples));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * _encoder->outDim()));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(nSamples));
_net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get());
}
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayOrigin, bool showPerf)
{
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
glm::vec3 origin, bool showPerf) {
CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered;
cudaEventRecord(eStart);
_sampler->sampleOnRays(_sphericalCoords, _depths, rays, rayOrigin);
_sampler->sampleOnRays(_coords, _depths, rays, origin);
cudaEventRecord(eSampled);
sptr<CudaArray<float>> coords(new CudaArray<float>((float *)_sphericalCoords->getBuffer(),
_sphericalCoords->n() * 3));
_encoder->encode(_encoded, coords);
_encoder->encode(_encoded, _coords);
cudaEventRecord(eEncoded);
......@@ -46,8 +40,7 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
cudaEventRecord(eRendered);
if (showPerf)
{
if (showPerf) {
CHECK_EX(cudaDeviceSynchronize());
float timeTotal, timeSample, timeEncode, timeInfer, timeRender;
......@@ -59,34 +52,34 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
std::ostringstream sout;
sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample
<< "ms, Encode: " << timeEncode << "ms, Infer: "
<< timeInfer << "ms, Render: " << timeRender << "ms)";
<< "ms, Encode: " << timeEncode << "ms, Infer: " << timeInfer
<< "ms, Render: " << timeRender << "ms)";
Logger::instance.info(sout.str());
}
/*
{
std::ostringstream sout;
sout << "Rays:" << std::endl;
dumpFloatArray(sout, *rays, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Spherical coords:" << std::endl;
dumpFloatArray(sout, *sphericalCoords, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Depths:" << std::endl;
dumpFloatArray(sout, *depths, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Encoded:" << std::endl;
dumpFloatArray(sout, *encoded, 10, encoder.outDim());
Logger::instance.info(sout.str());
}
*/
{
std::ostringstream sout;
sout << "Rays:" << std::endl;
dumpFloatArray(sout, *rays, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Spherical coords:" << std::endl;
dumpFloatArray(sout, *sphericalCoords, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Depths:" << std::endl;
dumpFloatArray(sout, *depths, 10);
Logger::instance.info(sout.str());
}
{
std::ostringstream sout;
sout << "Encoded:" << std::endl;
dumpFloatArray(sout, *encoded, 10, encoder.outDim());
Logger::instance.info(sout.str());
}
*/
}
\ No newline at end of file
#pragma once
#include "Common.h"
#include "../utils/common.h"
#include "../msl_infer/Sampler.h"
#include "../msl_infer/Encoder.h"
#include "../msl_infer/Renderer.h"
#include "../msl_infer/Msl.h"
class InferPipeline
{
public:
InferPipeline(const std::string &netDir, bool isNmsl, uint batchSize, uint samples);
class InferPipeline {
public:
InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay,
glm::vec2 depthRange, int encodeDim, int coordChns);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
glm::vec3 rayOrigin, bool showPerf = false);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin,
bool showPerf = false);
private:
uint _batchSize;
uint _samples;
private:
uint _nRays;
uint _nSamplesPerRay;
sptr<Msl> _net;
sptr<Sampler> _sampler;
sptr<Encoder> _encoder;
sptr<Renderer> _renderer;
sptr<Msl> _net;
sptr<CudaArray<glm::vec3>> _sphericalCoords;
sptr<CudaArray<float>> _coords;
sptr<CudaArray<float>> _depths;
sptr<CudaArray<float>> _encoded;
sptr<CudaArray<glm::vec4>> _layeredColors;
......
#include "Msl.h"
#include <time.h>
Msl::Msl(int batchSize, int samples) : batchSize(batchSize), samples(samples), net(nullptr) {}
Msl::Msl() : net(nullptr) {}
bool Msl::load(const std::string &netDir)
{
bool Msl::load(const std::string &netPath) {
net = new Net();
if (!net->load(netDir + "msl.trt"))
return false;
return true;
if (net->load(netPath))
return true;
dispose();
return false;
}
void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
{
void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
net->bindResource("Encoded", resEncoded);
net->bindResource("Depths", resDepths);
net->bindResource("Colors", resColors);
}
bool Msl::infer()
{
if (!net->infer())
return false;
return true;
}
bool Msl::infer() { return net->infer(); }
bool Msl::dispose()
{
if (net != nullptr)
{
void Msl::dispose() {
if (net != nullptr) {
net->dispose();
delete net;
net = nullptr;
}
return true;
}
#pragma once
#include "Common.h"
#include "../utils/common.h"
#include "Net.h"
class Msl
{
class Msl {
public:
int batchSize;
int samples;
Net *net;
Msl(int batchSize, int samples);
Msl();
virtual bool load(const std::string &netDir);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
virtual bool infer();
virtual bool dispose();
virtual bool load(const std::string &netDir);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
virtual bool infer();
virtual void dispose();
};
#include "half.h"
#include "../utils/half.h"
#include "Net.h"
#include <fstream>
#include <numeric>
......
#pragma once
#include "Common.h"
#include "../utils/common.h"
class Net {
......
#include "Nmsl2.h"
#include <time.h>
Nmsl2::Nmsl2(int batchSize, int samples) : Msl(batchSize, samples),
resRaw1(nullptr), resRaw2(nullptr),
fcNet1(nullptr), fcNet2(nullptr), catNet(nullptr) {}
Nmsl2::Nmsl2(int batchSize, int samples)
: batchSize(batchSize),
samples(samples),
resRaw1(nullptr),
resRaw2(nullptr),
fcNet1(nullptr),
fcNet2(nullptr),
catNet(nullptr) {}
bool Nmsl2::load(const std::string &netDir)
{
fcNet1 = new Net();
fcNet2 = new Net();
catNet = new Net();
if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") ||
!catNet->load(netDir + "cat.trt"))
return false;
resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
return true;
bool Nmsl2::load(const std::string &netDir) {
fcNet1 = new Net();
fcNet2 = new Net();
catNet = new Net();
if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") ||
!catNet->load(netDir + "cat.trt"))
return false;
resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
return true;
}
void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
{
fcNet1->bindResource("Encoded", resEncoded);
fcNet1->bindResource("Raw", resRaw1.get());
fcNet2->bindResource("Encoded", resEncoded);
fcNet2->bindResource("Raw", resRaw2.get());
catNet->bindResource("Raw1", resRaw1.get());
catNet->bindResource("Raw2", resRaw2.get());
catNet->bindResource("Depths", resDepths);
catNet->bindResource("Colors", resColors);
void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
fcNet1->bindResource("Encoded", resEncoded);
fcNet1->bindResource("Raw", resRaw1.get());
fcNet2->bindResource("Encoded", resEncoded);
fcNet2->bindResource("Raw", resRaw2.get());
catNet->bindResource("Raw1", resRaw1.get());
catNet->bindResource("Raw2", resRaw2.get());
catNet->bindResource("Depths", resDepths);
catNet->bindResource("Colors", resColors);
}
bool Nmsl2::infer()
{
//CudaStream stream1, stream2;
if (!fcNet1->infer())
return false;
if (!fcNet2->infer())
return false;
if (!catNet->infer())
return false;
return true;
bool Nmsl2::infer() {
// CudaStream stream1, stream2;
if (!fcNet1->infer())
return false;
if (!fcNet2->infer())
return false;
if (!catNet->infer())
return false;
return true;
}
bool Nmsl2::dispose()
{
if (fcNet1 != nullptr)
{
fcNet1->dispose();
delete fcNet1;
fcNet1 = nullptr;
}
if (fcNet2 != nullptr)
{
fcNet2->dispose();
delete fcNet2;
fcNet2 = nullptr;
}
if (catNet != nullptr)
{
catNet->dispose();
delete catNet;
catNet = nullptr;
}
resRaw1 = nullptr;
resRaw2 = nullptr;
return true;
void Nmsl2::dispose() {
if (fcNet1 != nullptr) {
fcNet1->dispose();
delete fcNet1;
fcNet1 = nullptr;
}
if (fcNet2 != nullptr) {
fcNet2->dispose();
delete fcNet2;
fcNet2 = nullptr;
}
if (catNet != nullptr) {
catNet->dispose();
delete catNet;
catNet = nullptr;
}
resRaw1 = nullptr;
resRaw2 = nullptr;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment