Commit c10f614f authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent dcba5844
---
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: true
AfterControlStatement: false
AfterEnum: true
AfterFunction: true
AfterNamespace: true
AfterObjCDeclaration: true
AfterStruct: true
AfterUnion: true
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: true
SplitEmptyRecord: true
SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 100
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|gmock|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IncludeIsMainRegex: '(Test)?$'
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 4
UseTab: Never
...
...@@ -11,5 +11,4 @@ ...@@ -11,5 +11,4 @@
"__nullptr": "cpp" "__nullptr": "cpp"
}, },
"python.pythonPath": "/home/dengnc/miniconda3/bin/python", "python.pythonPath": "/home/dengnc/miniconda3/bin/python",
"jupyter.jupyterServerType": "local"
} }
\ No newline at end of file
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "echo",
"type": "shell",
"command": "echo Hello",
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
\ No newline at end of file
#/usr/bin/bash
datadir='data/__new/classroom_fovea_r360x80_t0.6'
onnxdir="$datadir/eval_onnx"
trtdir="$datadir/eval_trt"
epochs=50
if [ ! -d "$onnxdir" ]; then
echo "make directory for ONNX"
mkdir $onnxdir
fi
if [ ! -d "$trtdir" ]; then
echo "make directory for TensorRT"
mkdir $trtdir
mkdir $trtdir/time
fi
# nets: 1, 2, 4, 8
# layers: 2, 4, 8
# channels: 64 128 256 512 1024
for n_nets in 1 2 4 8; do
for n_layers in 2 4 8; do
for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
exportname="eval_${n_nets}x${nf}x${n_layers}"
pth_path="$datadir/$configid/model-epoch_$epochs.pth"
onnx_path="$onnxdir/$exportname.onnx"
trt_path="$trtdir/$exportname.trt"
time_perf_path="$trtdir/time/$exportname.json"
if [ -f "$pth_path" ]; then
if [ ! -f "$onnx_path" ]; then
# Export ONNX model
python tools/export_snerf_fast.py $pth_path -b 65536 -o $onnx_path
fi
if [ ! -f "$trt_path" ]; then
# Export TensorRT engine
trtexec --onnx=$onnx_path --fp16 --saveEngine=$trt_path --workspace=4096 --exportTimes=$time_perf_path --noDataTransfers
fi
fi
done
done
done
\ No newline at end of file
...@@ -8,7 +8,7 @@ epochs=50 ...@@ -8,7 +8,7 @@ epochs=50
# nets: 1, 2, 4, 8 # nets: 1, 2, 4, 8
# layers: 2, 4, 8 # layers: 2, 4, 8
# channels: 128 256 512 # channels: 64 128 256 512 1024
n_nets_arr=(1 2 4 8 1 2 4 8 1 2 4 8) n_nets_arr=(1 2 4 8 1 2 4 8 1 2 4 8)
n_layers_arr=(2 2 2 2 4 4 4 4 8 8 8 8) n_layers_arr=(2 2 2 2 4 4 4 4 8 8 8 8)
n_nets=${n_nets_arr[$testcase]} n_nets=${n_nets_arr[$testcase]}
...@@ -16,21 +16,22 @@ n_layers=${n_layers_arr[$testcase]} ...@@ -16,21 +16,22 @@ n_layers=${n_layers_arr[$testcase]}
for nf in 64 128 256 512 1024; do for nf in 64 128 256 512 1024; do
configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p" configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
if [ -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then if [ ! -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
continue cont_epoch=0
fi for ((i=$epochs-1;i>0;i--)) do
cont_epoch=0 if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
for ((i=$epochs-1;i>0;i--)) do cont_epoch=$i
if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then break
cont_epoch=$i fi
break done
if [ ${cont_epoch} -gt 0 ]; then
python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
else
python run_spherical_view_syn.py $trainset -i $configid -e $epochs
fi fi
done
if [ ${cont_epoch} -gt 0 ]; then
python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
else
python run_spherical_view_syn.py $trainset -i $configid -e $epochs
fi fi
python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf if ! ls $datadir/$configid/output_$epochs/perf_r120x80* >/dev/null 2>&1; then
python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
done python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
fi
done
\ No newline at end of file
...@@ -14,7 +14,6 @@ class FoveatedNeuralRenderer(object): ...@@ -14,7 +14,6 @@ class FoveatedNeuralRenderer(object):
layers_res: List[Tuple[int, int]], layers_res: List[Tuple[int, int]],
layers_net: nn.ModuleList, layers_net: nn.ModuleList,
output_res: Tuple[int, int], *, output_res: Tuple[int, int], *,
using_mask=True,
device: torch.device = None): device: torch.device = None):
super().__init__() super().__init__()
self.layers_net = layers_net.to(device=device) self.layers_net = layers_net.to(device=device)
...@@ -34,7 +33,6 @@ class FoveatedNeuralRenderer(object): ...@@ -34,7 +33,6 @@ class FoveatedNeuralRenderer(object):
'normalized': True 'normalized': True
}, output_res, device=device) }, output_res, device=device)
self.foveation = Foveation(layers_fov, layers_res, output_res, device=device) self.foveation = Foveation(layers_fov, layers_res, output_res, device=device)
self.layers_mask = self.foveation.get_layers_mask() if using_mask else None
self.device = device self.device = device
def to(self, device: torch.device): def to(self, device: torch.device):
...@@ -43,8 +41,6 @@ class FoveatedNeuralRenderer(object): ...@@ -43,8 +41,6 @@ class FoveatedNeuralRenderer(object):
self.cam.to(device) self.cam.to(device)
for cam in self.layers_cam: for cam in self.layers_cam:
cam.to(device) cam.to(device)
if self.layers_mask is not None:
self.layers_mask = self.layers_mask.to(device)
self.device = device self.device = device
return self return self
...@@ -52,32 +48,46 @@ class FoveatedNeuralRenderer(object): ...@@ -52,32 +48,46 @@ class FoveatedNeuralRenderer(object):
return self.render(*args, **kwds) return self.render(*args, **kwds)
def render(self, view: Trans, gaze, right_gaze=None, *, def render(self, view: Trans, gaze, right_gaze=None, *,
stereo_disparity=0, ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]: stereo_disparity=0,
using_mask=True,
ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
if stereo_disparity > TINY_FLOAT: if stereo_disparity > TINY_FLOAT:
left_view = Trans( left_view = Trans(
view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=view.device())), view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=self.device)),
view.r) view.r)
right_view = Trans( right_view = Trans(
view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=view.device())), view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=self.device)),
view.r) view.r)
left_gaze = gaze left_gaze = gaze
right_gaze = gaze if right_gaze is None else right_gaze right_gaze = gaze if right_gaze is None else right_gaze
left_layers_mask = self.foveation.get_layers_mask(left_gaze) \
if using_mask else [None] * 3
right_layers_mask = self.foveation.get_layers_mask(right_gaze) \
if using_mask else [None] * 3
res_raw_left = [ res_raw_left = [
self._render(i, left_view, left_gaze if i < 2 else None)['color'] self._render(self.layers_net[i], self.layers_cam[i], left_view,
left_gaze if i < 2 else None,
layer_mask=left_layers_mask[i])['color']
for i in range(3) for i in range(3)
] ]
res_raw_right = [ res_raw_right = [
self._render(i, right_view, right_gaze if i < 2 else None)['color'] self._render(self.layers_net[i], self.layers_cam[i], right_view,
right_gaze if i < 2 else None,
layer_mask=right_layers_mask[i])['color']
for i in range(3) for i in range(3)
] ]
return self._gen_output(res_raw_left, left_gaze, ret_raw), \ return self._gen_output(res_raw_left, left_gaze, ret_raw), \
self._gen_output(res_raw_right, right_gaze, ret_raw) self._gen_output(res_raw_right, right_gaze, ret_raw)
else: else:
layers_mask = self.foveation.get_layers_mask(gaze) if using_mask else None
res_raw = [ res_raw = [
self._render(i, view, gaze if i < 2 else None)['color'] self._render(self.layers_net[i], self.layers_cam[i], view, gaze if i < 2 else None,
layer_mask=layers_mask[i] if layers_mask is not None else None)['color']
for i in range(3) for i in range(3)
] ]
return self._gen_output(res_raw, gaze, ret_raw) return self._gen_output(res_raw, gaze, ret_raw)
''' '''
if mono_trans != None and shift == 0: # do warp if mono_trans != None and shift == 0: # do warp
fovea_depth[torch.isnan(fovea_depth)] = 50 fovea_depth[torch.isnan(fovea_depth)] = 50
...@@ -105,25 +115,25 @@ class FoveatedNeuralRenderer(object): ...@@ -105,25 +115,25 @@ class FoveatedNeuralRenderer(object):
], (gaze[0], gaze[1]), [0, shift, shift] if shift != 0 else None) ], (gaze[0], gaze[1]), [0, shift, shift] if shift != 0 else None)
''' '''
def _render(self, layer: int, view: Trans, gaze=None, ret_depth=False) -> Mapping[str, torch.Tensor]: def _render(self, net, cam: CameraParam, view: Trans, gaze=None, *,
net = self.layers_net[layer] ret_depth=False,
cam = self.layers_cam[layer] layer_mask=None) -> Mapping[str, torch.Tensor]:
if gaze is not None: if gaze is not None:
cam = self._adjust_cam(cam, gaze) cam = self._adjust_cam(cam, gaze)
rays_o, rays_d = cam.get_global_rays(view, True) # (1, N, 3) rays_o, rays_d = cam.get_global_rays(view, False) # (1, H, W, 3)
if self.layers_mask is not None and layer < len(self.layers_mask): if layer_mask is not None:
mask = self.layers_mask[layer] >= 0 infer_mask = layer_mask >= 0
rays_o = rays_o[:, mask] rays_o = rays_o[:, infer_mask]
rays_d = rays_d[:, mask] rays_d = rays_d[:, infer_mask]
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth) net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
ret = { ret = {
'color': torch.zeros(1, cam.res[0], cam.res[1], 3) 'color': torch.zeros(1, cam.res[0], cam.res[1], 3, device=self.device)
} }
ret['color'][:, mask] = net_output['color'] ret['color'][:, infer_mask] = net_output['color']
ret['color'] = ret['color'].permute(0, 3, 1, 2) ret['color'] = ret['color'].permute(0, 3, 1, 2)
if ret_depth: if ret_depth:
ret['depth'] = torch.zeros(1, cam.res[0], cam.res[1]) ret['depth'] = torch.zeros(1, cam.res[0], cam.res[1])
ret['depth'][:, mask] = net_output['depth'] ret['depth'][:, infer_mask] = net_output['depth']
return ret return ret
else: else:
net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth) net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
...@@ -140,7 +150,7 @@ class FoveatedNeuralRenderer(object): ...@@ -140,7 +150,7 @@ class FoveatedNeuralRenderer(object):
'blended': blended 'blended': blended
} }
if ret_raw: if ret_raw:
ret['layers_raw'] = layers_img, ret['layers_raw'] = layers_img
ret['blended_raw'] = self.foveation.synthesis(layers_img, gaze) ret['blended_raw'] = self.foveation.synthesis(layers_img, gaze)
return ret return ret
......
...@@ -31,7 +31,7 @@ class Foveation(object): ...@@ -31,7 +31,7 @@ class Foveation(object):
def synthesis(self, layers: List[torch.Tensor], def synthesis(self, layers: List[torch.Tensor],
fovea_center: Tuple[float, float], fovea_center: Tuple[float, float],
shifts: List[int] = None) -> torch.Tensor: shifts: List[int] = None, do_blend=True) -> torch.Tensor:
""" """
Generate foveated retinal image by blending fovea layers Generate foveated retinal image by blending fovea layers
**Note: current implementation only support two fovea layers** **Note: current implementation only support two fovea layers**
...@@ -55,8 +55,12 @@ class Foveation(object): ...@@ -55,8 +55,12 @@ class Foveation(object):
if shifts != None: if shifts != None:
grid = img.horizontal_shift(grid, shifts[i], -2) grid = img.horizontal_shift(grid, shifts[i], -2)
# (1, 1, H:out, W:out) # (1, 1, H:out, W:out)
blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None, ...], grid) if do_blend:
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid) * blend) blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None], grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
else:
blend = nn_f.grid_sample(torch.ones_like(self.eye_fovea_blend[i][None, None]), grid, align_corners=False)
output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
return output return output
def get_layer_size_in_final_image(self, i: int) -> int: def get_layer_size_in_final_image(self, i: int) -> int:
...@@ -94,7 +98,7 @@ class Foveation(object): ...@@ -94,7 +98,7 @@ class Foveation(object):
r = torch.norm(p - R, dim=2) # (size, size, 2) r = torch.norm(p - R, dim=2) # (size, size, 2)
return misc.smooth_step(R, R * self.blend, r) return misc.smooth_step(R, R * self.blend, r)
def get_layers_mask(self) -> List[torch.Tensor]: def get_layers_mask(self, gaze) -> List[torch.Tensor]:
""" """
Generate mask images for layers[:-1] Generate mask images for layers[:-1]
the meaning of values in mask images: the meaning of values in mask images:
...@@ -106,15 +110,26 @@ class Foveation(object): ...@@ -106,15 +110,26 @@ class Foveation(object):
:return: Mask images for layers except outermost :return: Mask images for layers except outermost
""" """
layers_mask = [] layers_mask = []
for i in range(self.n_layers - 1): for i in range(self.n_layers):
layers_mask.append(torch.ones(*self.layers_res[i], device=self.device) * -1) layers_mask.append(torch.ones(*self.layers_res[i], device=self.device) * -1)
r = torch.norm(misc.meshgrid(*self.layers_res[i], normalize=True).to(device=self.device) * 2 - 1, dim=-1) if i == self.n_layers - 1:
c = torch.tensor([
(gaze[0] + 0.5 * self.out_res[1]) / self.out_res[0],
(gaze[1] + 0.5 * self.out_res[0]) / self.out_res[0]
], device=self.device)
else:
c = torch.tensor([0.5, 0.5], device=self.device)
coord = misc.meshgrid(*self.layers_res[i]).to(device=self.device) / self.layers_res[i][0]
r = 2 * torch.norm(coord - c, dim=-1)
inner_radius = self.get_source_layer_cover_size_in_target_layer( inner_radius = self.get_source_layer_cover_size_in_target_layer(
self.layers_fov[i - 1], self.layers_fov[i], self.layers_fov[i - 1], self.layers_fov[i], self.layers_res[i][0]) / self.layers_res[i][0] \
self.layers_res[i][0]) / self.layers_res[i][0] if i > 0 else 0 if i > 0 else 0
bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1] if i == self.n_layers - 1:
bounds = [inner_radius * (1 - self.blend), inner_radius, 100, 100]
else:
bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
for bi in range(len(bounds) - 1): for bi in range(len(bounds) - 1):
region = torch.logical_and(r > bounds[bi], r <= bounds[bi + 1]) region = torch.logical_and(r > bounds[bi], r <= bounds[bi + 1])
layers_mask[i][region] = bi + \ layers_mask[i][region] = bi + \
(r[region] - bounds[bi]) / (bounds[bi + 1] - bounds[bi]) (r[region] - bounds[bi]) / (bounds[bi + 1] - bounds[bi])
return layers_mask return layers_mask
\ No newline at end of file
...@@ -128,7 +128,7 @@ endif ...@@ -128,7 +128,7 @@ endif
######################### #########################
INCPATHS= INCPATHS=
LIBPATHS= LIBPATHS=
COMMON_LIBS= -lGLEW -lglfw3 -lGL -lX11 -lpthread -lXrandr -lXinerama -lXcursor -lXi -ldl COMMON_LIBS= -lGLEW -lglfw -lGL -lX11 -lpthread -lXrandr #-lXinerama -lXcursor -lXi -ldl
# Add extra libraries if TRT_STATIC is enabled # Add extra libraries if TRT_STATIC is enabled
ifeq ($(TRT_STATIC), 1) ifeq ($(TRT_STATIC), 1)
...@@ -207,7 +207,7 @@ else ifeq ($(TARGET), aarch64) ...@@ -207,7 +207,7 @@ else ifeq ($(TARGET), aarch64)
endif endif
endif endif
ifeq ($(ENABLE_MYELIN), 1) ifeq ($(ENABLE_MYELIN), 1)
COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB) #COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
endif endif
.SUFFIXES: .SUFFIXES:
......
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"
inline unsigned int getElementSize(nv::DataType t)
{
switch (t)
{
case nv::DataType::kINT32:
return 4;
case nv::DataType::kFLOAT:
return 4;
case nv::DataType::kHALF:
return 2;
case nv::DataType::kBOOL:
case nv::DataType::kINT8:
return 1;
}
throw std::runtime_error("Invalid DataType.");
return 0;
}
template <typename T>
void dumpRow(std::ostream &os, T* buf, size_t n)
{
os << buf[0];
for (size_t i = 1; i < n; ++i) {
os << " " << buf[i];
}
os << std::endl;
}
template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount, size_t maxDumpRows = 0)
{
T *typedBuf = static_cast<T *>(buf);
size_t numItems = bufSize / sizeof(T);
size_t nInLastRow = numItems % rowCount;
size_t rows;
if (nInLastRow == 0) {
rows = numItems / rowCount;
nInLastRow = rowCount;
} else {
rows = numItems / rowCount + 1;
}
if (maxDumpRows == 0) {
for (size_t i = 0; i < rows - 1; ++i) {
dumpRow(os, typedBuf, rowCount);
typedBuf += rowCount;
}
dumpRow(os, typedBuf, nInLastRow);
} else {
for (size_t i = 0; i < maxDumpRows / 2; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
os << "..." << std::endl;
for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
dumpRow(os, typedBuf + i * rowCount, rowCount);
dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
}
}
class CudaStream
{
public:
CudaStream()
{
cudaStreamCreate(&stream);
}
operator cudaStream_t()
{
return stream;
}
virtual ~CudaStream()
{
cudaStreamDestroy(stream);
}
private:
cudaStream_t stream;
};
class CudaEvent
{
public:
CudaEvent()
{
cudaEventCreate(&mEvent);
}
operator cudaEvent_t()
{
return mEvent;
}
virtual ~CudaEvent()
{
cudaEventDestroy(mEvent);
}
private:
cudaEvent_t mEvent;
};
struct CudaMapScope
{
std::vector<cudaGraphicsResource_t> resources_;
cudaStream_t stream_;
CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
cudaStream_t stream = nullptr) : resources_(resources), stream_(stream) {}
~CudaMapScope()
{
if (!resources_.empty())
cudaGraphicsUnmapResources(resources_.size(),
resources_.data(), stream_);
}
cudaError_t map()
{
if (!resources_.empty())
return cudaGraphicsMapResources(resources_.size(),
resources_.data(), stream_);
return cudaSuccess;
}
};
template <typename T>
struct Destroy
{
void operator()(T *t)
{
if (t != nullptr)
t->destroy();
}
};
template <class T>
using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T>
using sptr = std::shared_ptr<T>;
#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
#include "Resource.h"
#include "Formatter.h"
\ No newline at end of file
#include "Encoder.h" #include "Encoder.h"
#include "thread_index.h" #include "../utils/cuda.h"
/// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z) /// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
/// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z) /// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
...@@ -7,12 +7,11 @@ ...@@ -7,12 +7,11 @@
/// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z), /// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
/// cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z) /// cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
/// Dispatch (n_batch, n_chns, n_freqs) /// Dispatch (n_batch, n_chns, n_freqs)
__global__ void cu_encode(float *output, float *input, float *freqs, uint n) __global__ void cu_encode(float *output, float *input, float *freqs, uint n) {
{
glm::uvec3 idx3 = IDX3; glm::uvec3 idx3 = IDX3;
if (idx3.x >= n) if (idx3.x >= n)
return; return;
uint n = blockDim.x, inChns = blockDim.y, nFreqs = blockDim.z; uint inChns = blockDim.y, nFreqs = blockDim.z;
uint i = idx3.x, chn = idx3.y, freq = idx3.z; uint i = idx3.x, chn = idx3.y, freq = idx3.z;
uint elem = i * inChns + chn; uint elem = i * inChns + chn;
uint outChns = inChns * (nFreqs * 2 + 1); uint outChns = inChns * (nFreqs * 2 + 1);
...@@ -26,16 +25,14 @@ __global__ void cu_encode(float *output, float *input, float *freqs, uint n) ...@@ -26,16 +25,14 @@ __global__ void cu_encode(float *output, float *input, float *freqs, uint n)
output[base + inChns * (freq * 2 + 2)] = c; output[base + inChns * (freq * 2 + 2)] = c;
} }
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
{
dim3 blkSize(1024 / _chns / _multires, _chns, _multires); dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
dim3 grdSize((uint)ceil(input->n() / (float)blkSize.x), 1, 1); dim3 grdSize((uint)ceil(input->n() / (float)blkSize.x), 1, 1);
cu_encode<<<grdSize, blkSize>>>(output->getBuffer(), *input, *_freqs, input->n()); CU_INVOKE(cu_encode)(output->getBuffer<float>(), *input, *_freqs, input->n());
CHECK_EX(cudaGetLastError()); CHECK_EX(cudaGetLastError());
} }
void Encoder::_genFreqArray() void Encoder::_genFreqArray() {
{
float *arr = new float[_multires]; float *arr = new float[_multires];
arr[0] = 1.0f; arr[0] = 1.0f;
for (auto i = 1; i < _multires; ++i) for (auto i = 1; i < _multires; ++i)
......
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Encoder { class Encoder {
public: public:
...@@ -14,5 +14,4 @@ private: ...@@ -14,5 +14,4 @@ private:
sptr<CudaArray<float>> _freqs; sptr<CudaArray<float>> _freqs;
void _genFreqArray(); void _genFreqArray();
}; };
\ No newline at end of file
#include "Enhancement.h" #include "Enhancement.h"
#include "thread_index.h" #include "../utils/cuda.h"
#define max(__a__, __b__) (__a__ > __b__ ? __a__ : __b__) #define max(__a__, __b__) (__a__ > __b__ ? __a__ : __b__)
#define min(__a__, __b__) (__a__ < __b__ ? __a__ : __b__) #define min(__a__, __b__) (__a__ < __b__ ? __a__ : __b__)
......
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Enhancement class Enhancement
{ {
......
#include "InferPipeline.h" #include "InferPipeline.h"
#include "Nmsl2.h" #include "Nmsl2.h"
InferPipeline::InferPipeline( InferPipeline::InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay, glm::vec2 depthRange,
const std::string &netDir, bool isNmsl, uint batchSize, int encodeDim, int coordChns)
uint samples) : _batchSize(batchSize), : _nRays(nRays),
_samples(samples), _nSamplesPerRay(nSamplesPerRay),
_sampler(new Sampler({1.0f, 50.0f}, samples)), _net(net),
_encoder(new Encoder(10, 3)), _sampler(new Sampler(depthRange, nSamplesPerRay, coordChns == 3)),
_renderer(new Renderer()), _encoder(new Encoder(encodeDim, coordChns)),
_net(isNmsl ? new Nmsl2(batchSize, samples) : new Msl(batchSize, samples)) _renderer(new Renderer()) {
{ uint nSamples = _nRays * _nSamplesPerRay;
uint batchSizeForNet = _batchSize * _samples; _coords = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * coordChns));
_sphericalCoords = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(batchSizeForNet)); _depths = sptr<CudaArray<float>>(new CudaArray<float>(nSamples));
_depths = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet)); _encoded = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * _encoder->outDim()));
_encoded = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet * _encoder->outDim())); _layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(nSamples));
_layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(batchSizeForNet));
_net->load(netDir);
_net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get()); _net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get());
} }
void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin, bool showPerf) {
glm::vec3 rayOrigin, bool showPerf)
{
CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered; CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered;
cudaEventRecord(eStart); cudaEventRecord(eStart);
_sampler->sampleOnRays(_sphericalCoords, _depths, rays, rayOrigin); _sampler->sampleOnRays(_coords, _depths, rays, origin);
cudaEventRecord(eSampled); cudaEventRecord(eSampled);
sptr<CudaArray<float>> coords(new CudaArray<float>((float *)_sphericalCoords->getBuffer(), _encoder->encode(_encoded, _coords);
_sphericalCoords->n() * 3));
_encoder->encode(_encoded, coords);
cudaEventRecord(eEncoded); cudaEventRecord(eEncoded);
...@@ -46,8 +40,7 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, ...@@ -46,8 +40,7 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
cudaEventRecord(eRendered); cudaEventRecord(eRendered);
if (showPerf) if (showPerf) {
{
CHECK_EX(cudaDeviceSynchronize()); CHECK_EX(cudaDeviceSynchronize());
float timeTotal, timeSample, timeEncode, timeInfer, timeRender; float timeTotal, timeSample, timeEncode, timeInfer, timeRender;
...@@ -59,34 +52,34 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, ...@@ -59,34 +52,34 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
std::ostringstream sout; std::ostringstream sout;
sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample
<< "ms, Encode: " << timeEncode << "ms, Infer: " << "ms, Encode: " << timeEncode << "ms, Infer: " << timeInfer
<< timeInfer << "ms, Render: " << timeRender << "ms)"; << "ms, Render: " << timeRender << "ms)";
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
/* /*
{ {
std::ostringstream sout; std::ostringstream sout;
sout << "Rays:" << std::endl; sout << "Rays:" << std::endl;
dumpFloatArray(sout, *rays, 10); dumpFloatArray(sout, *rays, 10);
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
{ {
std::ostringstream sout; std::ostringstream sout;
sout << "Spherical coords:" << std::endl; sout << "Spherical coords:" << std::endl;
dumpFloatArray(sout, *sphericalCoords, 10); dumpFloatArray(sout, *sphericalCoords, 10);
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
{ {
std::ostringstream sout; std::ostringstream sout;
sout << "Depths:" << std::endl; sout << "Depths:" << std::endl;
dumpFloatArray(sout, *depths, 10); dumpFloatArray(sout, *depths, 10);
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
{ {
std::ostringstream sout; std::ostringstream sout;
sout << "Encoded:" << std::endl; sout << "Encoded:" << std::endl;
dumpFloatArray(sout, *encoded, 10, encoder.outDim()); dumpFloatArray(sout, *encoded, 10, encoder.outDim());
Logger::instance.info(sout.str()); Logger::instance.info(sout.str());
} }
*/ */
} }
\ No newline at end of file
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
#include "../msl_infer/Sampler.h" #include "../msl_infer/Sampler.h"
#include "../msl_infer/Encoder.h" #include "../msl_infer/Encoder.h"
#include "../msl_infer/Renderer.h" #include "../msl_infer/Renderer.h"
#include "../msl_infer/Msl.h" #include "../msl_infer/Msl.h"
class InferPipeline class InferPipeline {
{ public:
public: InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay,
InferPipeline(const std::string &netDir, bool isNmsl, uint batchSize, uint samples); glm::vec2 depthRange, int encodeDim, int coordChns);
void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin,
glm::vec3 rayOrigin, bool showPerf = false); bool showPerf = false);
private: private:
uint _batchSize; uint _nRays;
uint _samples; uint _nSamplesPerRay;
sptr<Msl> _net;
sptr<Sampler> _sampler; sptr<Sampler> _sampler;
sptr<Encoder> _encoder; sptr<Encoder> _encoder;
sptr<Renderer> _renderer; sptr<Renderer> _renderer;
sptr<Msl> _net; sptr<CudaArray<float>> _coords;
sptr<CudaArray<glm::vec3>> _sphericalCoords;
sptr<CudaArray<float>> _depths; sptr<CudaArray<float>> _depths;
sptr<CudaArray<float>> _encoded; sptr<CudaArray<float>> _encoded;
sptr<CudaArray<glm::vec4>> _layeredColors; sptr<CudaArray<glm::vec4>> _layeredColors;
......
#include "Msl.h" #include "Msl.h"
#include <time.h> #include <time.h>
Msl::Msl(int batchSize, int samples) : batchSize(batchSize), samples(samples), net(nullptr) {} Msl::Msl() : net(nullptr) {}
bool Msl::load(const std::string &netDir) bool Msl::load(const std::string &netPath) {
{
net = new Net(); net = new Net();
if (!net->load(netDir + "msl.trt")) if (net->load(netPath))
return false; return true;
return true; dispose();
return false;
} }
void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
{
net->bindResource("Encoded", resEncoded); net->bindResource("Encoded", resEncoded);
net->bindResource("Depths", resDepths); net->bindResource("Depths", resDepths);
net->bindResource("Colors", resColors); net->bindResource("Colors", resColors);
} }
bool Msl::infer() bool Msl::infer() { return net->infer(); }
{
if (!net->infer())
return false;
return true;
}
bool Msl::dispose() void Msl::dispose() {
{ if (net != nullptr) {
if (net != nullptr)
{
net->dispose(); net->dispose();
delete net; delete net;
net = nullptr; net = nullptr;
} }
return true;
} }
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
#include "Net.h" #include "Net.h"
class Msl class Msl {
{
public: public:
int batchSize;
int samples;
Net *net; Net *net;
Msl(int batchSize, int samples); Msl();
virtual bool load(const std::string &netDir); virtual bool load(const std::string &netDir);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors); virtual bool infer();
virtual void dispose();
virtual bool infer();
virtual bool dispose();
}; };
#include "half.h" #include "../utils/half.h"
#include "Net.h" #include "Net.h"
#include <fstream> #include <fstream>
#include <numeric> #include <numeric>
......
#pragma once #pragma once
#include "Common.h" #include "../utils/common.h"
class Net { class Net {
......
#include "Nmsl2.h" #include "Nmsl2.h"
#include <time.h> #include <time.h>
Nmsl2::Nmsl2(int batchSize, int samples) : Msl(batchSize, samples), Nmsl2::Nmsl2(int batchSize, int samples)
resRaw1(nullptr), resRaw2(nullptr), : batchSize(batchSize),
fcNet1(nullptr), fcNet2(nullptr), catNet(nullptr) {} samples(samples),
resRaw1(nullptr),
resRaw2(nullptr),
fcNet1(nullptr),
fcNet2(nullptr),
catNet(nullptr) {}
bool Nmsl2::load(const std::string &netDir) bool Nmsl2::load(const std::string &netDir) {
{ fcNet1 = new Net();
fcNet1 = new Net(); fcNet2 = new Net();
fcNet2 = new Net(); catNet = new Net();
catNet = new Net(); if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") ||
if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") || !catNet->load(netDir + "cat.trt"))
!catNet->load(netDir + "cat.trt")) return false;
return false; resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4))); resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4))); return true;
return true;
} }
void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
{ fcNet1->bindResource("Encoded", resEncoded);
fcNet1->bindResource("Encoded", resEncoded); fcNet1->bindResource("Raw", resRaw1.get());
fcNet1->bindResource("Raw", resRaw1.get()); fcNet2->bindResource("Encoded", resEncoded);
fcNet2->bindResource("Encoded", resEncoded); fcNet2->bindResource("Raw", resRaw2.get());
fcNet2->bindResource("Raw", resRaw2.get()); catNet->bindResource("Raw1", resRaw1.get());
catNet->bindResource("Raw1", resRaw1.get()); catNet->bindResource("Raw2", resRaw2.get());
catNet->bindResource("Raw2", resRaw2.get()); catNet->bindResource("Depths", resDepths);
catNet->bindResource("Depths", resDepths); catNet->bindResource("Colors", resColors);
catNet->bindResource("Colors", resColors);
} }
bool Nmsl2::infer() bool Nmsl2::infer() {
{ // CudaStream stream1, stream2;
//CudaStream stream1, stream2; if (!fcNet1->infer())
if (!fcNet1->infer()) return false;
return false; if (!fcNet2->infer())
if (!fcNet2->infer()) return false;
return false; if (!catNet->infer())
if (!catNet->infer()) return false;
return false; return true;
return true;
} }
bool Nmsl2::dispose() void Nmsl2::dispose() {
{ if (fcNet1 != nullptr) {
if (fcNet1 != nullptr) fcNet1->dispose();
{ delete fcNet1;
fcNet1->dispose(); fcNet1 = nullptr;
delete fcNet1; }
fcNet1 = nullptr; if (fcNet2 != nullptr) {
} fcNet2->dispose();
if (fcNet2 != nullptr) delete fcNet2;
{ fcNet2 = nullptr;
fcNet2->dispose(); }
delete fcNet2; if (catNet != nullptr) {
fcNet2 = nullptr; catNet->dispose();
} delete catNet;
if (catNet != nullptr) catNet = nullptr;
{ }
catNet->dispose(); resRaw1 = nullptr;
delete catNet; resRaw2 = nullptr;
catNet = nullptr;
}
resRaw1 = nullptr;
resRaw2 = nullptr;
return true;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment