From c10f614f57249c37b482d334e06e6dd0e16a933e Mon Sep 17 00:00:00 2001
From: Nianchen Deng <dengnianchen@sjtu.edu.cn>
Date: Sat, 8 May 2021 21:35:20 +0800
Subject: [PATCH] sync

---
 .clang-format                           | 108 ++++
 .vscode/settings.json                   |   1 -
 .vscode/tasks.json                      |  17 -
 batch_export_net.sh                     |  42 ++
 batch_infer.sh                          |  35 +-
 components/fnr.py                       |  54 +-
 components/foveation.py                 |  35 +-
 cpp/Makefile.config                     |   4 +-
 cpp/msl_infer/Common.h                  | 155 ------
 cpp/msl_infer/Encoder.cu                |  15 +-
 cpp/msl_infer/Encoder.h                 |   3 +-
 cpp/msl_infer/Enhancement.cu            |   2 +-
 cpp/msl_infer/Enhancement.h             |   2 +-
 cpp/msl_infer/InferPipeline.cpp         |  99 ++--
 cpp/msl_infer/InferPipeline.h           |  24 +-
 cpp/msl_infer/Msl.cpp                   |  29 +-
 cpp/msl_infer/Msl.h                     |  20 +-
 cpp/msl_infer/Net.cpp                   |   2 +-
 cpp/msl_infer/Net.h                     |   2 +-
 cpp/msl_infer/Nmsl2.cpp                 | 111 ++--
 cpp/msl_infer/Nmsl2.h                   |   4 +-
 cpp/msl_infer/Renderer.cu               |  30 +-
 cpp/msl_infer/Renderer.h                |   9 +-
 cpp/msl_infer/Resource.h                | 159 ------
 cpp/msl_infer/Sampler.cu                |  49 +-
 cpp/msl_infer/Sampler.h                 |  19 +-
 cpp/msl_infer/SynthesisPipeline.cpp     | 135 ++---
 cpp/msl_infer/SynthesisPipeline.h       |  68 +--
 cpp/msl_infer/View.cu                   |   2 +-
 cpp/msl_infer/View.h                    |   2 +-
 cpp/msl_infer_test/Makefile             |   2 +-
 cpp/msl_infer_test/main.cpp             | 648 ++++++++++++------------
 cpp/{msl_infer => utils}/Formatter.h    |   0
 cpp/{msl_infer => utils}/Logger.cpp     |   0
 cpp/{msl_infer => utils}/Logger.h       |   2 +-
 cpp/utils/Resource.h                    | 136 +++++
 cpp/utils/common.h                      | 138 +++++
 cpp/utils/cuda.h                        |  11 +
 cpp/{msl_infer => utils}/half.h         |   0
 cpp/{msl_infer => utils}/thread_index.h |   0
 data/spherical_view_syn.py              |  18 +-
 notebook/gen_demo_mono.ipynb            |  38 +-
 notebook/gen_demo_stereo.ipynb          | 194 +++----
 notebook/test_foveation.ipynb           |  28 +-
 notebook/test_spherical_view_syn.ipynb  |  65 +--
 single_infer.sh                         |  30 ++
 tools/convert_data_desc.py              |   2 +-
 tools/export_snerf_fast.py              |  62 ++-
 tools/gen_eval_table.py                 |  91 ++++
 49 files changed, 1413 insertions(+), 1289 deletions(-)
 create mode 100644 .clang-format
 delete mode 100644 .vscode/tasks.json
 create mode 100755 batch_export_net.sh
 delete mode 100644 cpp/msl_infer/Common.h
 delete mode 100644 cpp/msl_infer/Resource.h
 rename cpp/{msl_infer => utils}/Formatter.h (100%)
 rename cpp/{msl_infer => utils}/Logger.cpp (100%)
 rename cpp/{msl_infer => utils}/Logger.h (95%)
 create mode 100644 cpp/utils/Resource.h
 create mode 100644 cpp/utils/common.h
 create mode 100644 cpp/utils/cuda.h
 rename cpp/{msl_infer => utils}/half.h (100%)
 rename cpp/{msl_infer => utils}/thread_index.h (100%)
 create mode 100755 single_infer.sh
 create mode 100644 tools/gen_eval_table.py

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..edc803a
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,108 @@
+---
+Language:        Cpp
+# BasedOnStyle:  LLVM
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:   
+  AfterClass:      true
+  AfterControlStatement: false
+  AfterEnum:       true
+  AfterFunction:   true
+  AfterNamespace:  true
+  AfterObjCDeclaration: true
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     100
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:   
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeCategories: 
+  - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+    Priority:        2
+  - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
+    Priority:        3
+  - Regex:           '.*'
+    Priority:        1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 4
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+...
+
diff --git a/.vscode/settings.json b/.vscode/settings.json
index d6a0452..920979e 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -11,5 +11,4 @@
         "__nullptr": "cpp"
     },
     "python.pythonPath": "/home/dengnc/miniconda3/bin/python",
-    "jupyter.jupyterServerType": "local"
 }
\ No newline at end of file
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
deleted file mode 100644
index 12388cd..0000000
--- a/.vscode/tasks.json
+++ /dev/null
@@ -1,17 +0,0 @@
-{
-    // See https://go.microsoft.com/fwlink/?LinkId=733558
-    // for the documentation about the tasks.json format
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "label": "echo",
-            "type": "shell",
-            "command": "echo Hello",
-            "problemMatcher": [],
-            "group": {
-                "kind": "build",
-                "isDefault": true
-            }
-        }
-    ]
-}
\ No newline at end of file
diff --git a/batch_export_net.sh b/batch_export_net.sh
new file mode 100755
index 0000000..3a1f8f6
--- /dev/null
+++ b/batch_export_net.sh
@@ -0,0 +1,42 @@
+#/usr/bin/bash
+
+datadir='data/__new/classroom_fovea_r360x80_t0.6'
+onnxdir="$datadir/eval_onnx"
+trtdir="$datadir/eval_trt"
+epochs=50
+
+if [ ! -d "$onnxdir" ]; then
+    echo "make directory for ONNX"
+    mkdir $onnxdir
+fi
+if [ ! -d "$trtdir" ]; then
+    echo "make directory for TensorRT"
+    mkdir $trtdir
+    mkdir $trtdir/time
+fi
+
+# nets: 1, 2, 4, 8
+# layers: 2, 4, 8
+# channels: 64 128 256 512 1024
+for n_nets in 1 2 4 8; do
+    for n_layers in 2 4 8; do
+        for nf in 64 128 256 512 1024; do
+            configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
+            exportname="eval_${n_nets}x${nf}x${n_layers}"
+            pth_path="$datadir/$configid/model-epoch_$epochs.pth"
+            onnx_path="$onnxdir/$exportname.onnx"
+            trt_path="$trtdir/$exportname.trt"
+            time_perf_path="$trtdir/time/$exportname.json"
+            if [ -f "$pth_path" ]; then
+                if [ ! -f "$onnx_path" ]; then
+                    # Export ONNX model
+                    python tools/export_snerf_fast.py $pth_path -b 65536 -o $onnx_path
+                fi
+                if [ ! -f "$trt_path" ]; then
+                    # Export TensorRT engine
+                    trtexec --onnx=$onnx_path --fp16 --saveEngine=$trt_path --workspace=4096 --exportTimes=$time_perf_path --noDataTransfers
+                fi
+            fi
+        done
+    done
+done
\ No newline at end of file
diff --git a/batch_infer.sh b/batch_infer.sh
index 3ebd340..cbeae90 100755
--- a/batch_infer.sh
+++ b/batch_infer.sh
@@ -8,7 +8,7 @@ epochs=50
 
 # nets: 1, 2, 4, 8
 # layers: 2, 4, 8
-# channels: 128 256 512
+# channels: 64 128 256 512 1024
 n_nets_arr=(1 2 4 8 1 2 4 8 1 2 4 8)
 n_layers_arr=(2 2 2 2 4 4 4 4 8 8 8 8)
 n_nets=${n_nets_arr[$testcase]}
@@ -16,21 +16,22 @@ n_layers=${n_layers_arr[$testcase]}
 
 for nf in 64 128 256 512 1024; do
     configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.00-7.00_s64_~p"
-    if [ -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
-        continue
-    fi
-    cont_epoch=0
-    for ((i=$epochs-1;i>0;i--)) do
-        if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
-            cont_epoch=$i
-            break
+    if [ ! -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
+        cont_epoch=0
+        for ((i=$epochs-1;i>0;i--)) do
+            if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
+                cont_epoch=$i
+                break
+            fi
+        done
+        if [ ${cont_epoch} -gt 0 ]; then
+            python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
+        else
+            python run_spherical_view_syn.py $trainset -i $configid -e $epochs
         fi
-    done
-    if [ ${cont_epoch} -gt 0 ]; then
-        python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
-    else
-        python run_spherical_view_syn.py $trainset -i $configid -e $epochs
     fi
-    python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
-    python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
-done
+    if ! ls $datadir/$configid/output_$epochs/perf_r120x80* >/dev/null 2>&1; then
+        python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
+        python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
+    fi
+done
\ No newline at end of file
diff --git a/components/fnr.py b/components/fnr.py
index 8cf165c..cdad505 100644
--- a/components/fnr.py
+++ b/components/fnr.py
@@ -14,7 +14,6 @@ class FoveatedNeuralRenderer(object):
                  layers_res: List[Tuple[int, int]],
                  layers_net: nn.ModuleList,
                  output_res: Tuple[int, int], *,
-                 using_mask=True,
                  device: torch.device = None):
         super().__init__()
         self.layers_net = layers_net.to(device=device)
@@ -34,7 +33,6 @@ class FoveatedNeuralRenderer(object):
             'normalized': True
         }, output_res, device=device)
         self.foveation = Foveation(layers_fov, layers_res, output_res, device=device)
-        self.layers_mask = self.foveation.get_layers_mask() if using_mask else None
         self.device = device
 
     def to(self, device: torch.device):
@@ -43,8 +41,6 @@ class FoveatedNeuralRenderer(object):
         self.cam.to(device)
         for cam in self.layers_cam:
             cam.to(device)
-        if self.layers_mask is not None:
-            self.layers_mask = self.layers_mask.to(device)
         self.device = device
         return self
 
@@ -52,32 +48,46 @@ class FoveatedNeuralRenderer(object):
         return self.render(*args, **kwds)
 
     def render(self, view: Trans, gaze, right_gaze=None, *,
-               stereo_disparity=0, ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
+               stereo_disparity=0,
+               using_mask=True,
+               ret_raw=False) -> Union[Mapping[str, torch.Tensor], Tuple[Mapping[str, torch.Tensor]]]:
         if stereo_disparity > TINY_FLOAT:
             left_view = Trans(
-                view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=view.device())),
+                view.trans_point(torch.tensor([-stereo_disparity / 2, 0, 0], device=self.device)),
                 view.r)
             right_view = Trans(
-                view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=view.device())),
+                view.trans_point(torch.tensor([stereo_disparity / 2, 0, 0], device=self.device)),
                 view.r)
             left_gaze = gaze
             right_gaze = gaze if right_gaze is None else right_gaze
+
+            left_layers_mask = self.foveation.get_layers_mask(left_gaze) \
+                if using_mask else [None] * 3
+            right_layers_mask = self.foveation.get_layers_mask(right_gaze) \
+                if using_mask else [None] * 3
             res_raw_left = [
-                self._render(i, left_view, left_gaze if i < 2 else None)['color']
+                self._render(self.layers_net[i], self.layers_cam[i], left_view,
+                             left_gaze if i < 2 else None,
+                             layer_mask=left_layers_mask[i])['color']
                 for i in range(3)
             ]
             res_raw_right = [
-                self._render(i, right_view, right_gaze if i < 2 else None)['color']
+                self._render(self.layers_net[i], self.layers_cam[i], right_view,
+                             right_gaze if i < 2 else None,
+                             layer_mask=right_layers_mask[i])['color']
                 for i in range(3)
             ]
             return self._gen_output(res_raw_left, left_gaze, ret_raw), \
                 self._gen_output(res_raw_right, right_gaze, ret_raw)
         else:
+            layers_mask = self.foveation.get_layers_mask(gaze) if using_mask else None
             res_raw = [
-                self._render(i, view, gaze if i < 2 else None)['color']
+                self._render(self.layers_net[i], self.layers_cam[i], view, gaze if i < 2 else None,
+                             layer_mask=layers_mask[i] if layers_mask is not None else None)['color']
                 for i in range(3)
             ]
             return self._gen_output(res_raw, gaze, ret_raw)
+
         '''
         if mono_trans != None and shift == 0:  # do warp
             fovea_depth[torch.isnan(fovea_depth)] = 50
@@ -105,25 +115,25 @@ class FoveatedNeuralRenderer(object):
         ], (gaze[0], gaze[1]), [0, shift, shift] if shift != 0 else None)
         '''
 
-    def _render(self, layer: int, view: Trans, gaze=None, ret_depth=False) -> Mapping[str, torch.Tensor]:
-        net = self.layers_net[layer]
-        cam = self.layers_cam[layer]
+    def _render(self, net, cam: CameraParam, view: Trans, gaze=None, *,
+                ret_depth=False,
+                layer_mask=None) -> Mapping[str, torch.Tensor]:
         if gaze is not None:
             cam = self._adjust_cam(cam, gaze)
-        rays_o, rays_d = cam.get_global_rays(view, True)  # (1, N, 3)
-        if self.layers_mask is not None and layer < len(self.layers_mask):
-            mask = self.layers_mask[layer] >= 0
-            rays_o = rays_o[:, mask]
-            rays_d = rays_d[:, mask]
+        rays_o, rays_d = cam.get_global_rays(view, False)  # (1, H, W, 3)
+        if layer_mask is not None:
+            infer_mask = layer_mask >= 0
+            rays_o = rays_o[:, infer_mask]
+            rays_d = rays_d[:, infer_mask]
             net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
             ret = {
-                'color': torch.zeros(1, cam.res[0], cam.res[1], 3)
+                'color': torch.zeros(1, cam.res[0], cam.res[1], 3, device=self.device)
             }
-            ret['color'][:, mask] = net_output['color']
+            ret['color'][:, infer_mask] = net_output['color']
             ret['color'] = ret['color'].permute(0, 3, 1, 2)
             if ret_depth:
                 ret['depth'] = torch.zeros(1, cam.res[0], cam.res[1])
-                ret['depth'][:, mask] = net_output['depth']
+                ret['depth'][:, infer_mask] = net_output['depth']
             return ret
         else:
             net_output = net(rays_o.view(-1, 3), rays_d.view(-1, 3), ret_depth=ret_depth)
@@ -140,7 +150,7 @@ class FoveatedNeuralRenderer(object):
             'blended': blended
         }
         if ret_raw:
-            ret['layers_raw'] = layers_img,
+            ret['layers_raw'] = layers_img
             ret['blended_raw'] = self.foveation.synthesis(layers_img, gaze)
         return ret
 
diff --git a/components/foveation.py b/components/foveation.py
index 32ef769..00e4a20 100644
--- a/components/foveation.py
+++ b/components/foveation.py
@@ -31,7 +31,7 @@ class Foveation(object):
 
     def synthesis(self, layers: List[torch.Tensor],
                   fovea_center: Tuple[float, float],
-                  shifts: List[int] = None) -> torch.Tensor:
+                  shifts: List[int] = None, do_blend=True) -> torch.Tensor:
         """
         Generate foveated retinal image by blending fovea layers
         **Note: current implementation only support two fovea layers**
@@ -55,8 +55,12 @@ class Foveation(object):
             if shifts != None:
                 grid = img.horizontal_shift(grid, shifts[i], -2)
             # (1, 1, H:out, W:out)
-            blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None, ...], grid)
-            output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid) * blend)
+            if do_blend:
+                blend = nn_f.grid_sample(self.eye_fovea_blend[i][None, None], grid, align_corners=False)
+                output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
+            else:
+                blend = nn_f.grid_sample(torch.ones_like(self.eye_fovea_blend[i][None, None]), grid, align_corners=False)
+                output.mul_(1 - blend).add_(nn_f.grid_sample(layers[i], grid, align_corners=False) * blend)
         return output
 
     def get_layer_size_in_final_image(self, i: int) -> int:
@@ -94,7 +98,7 @@ class Foveation(object):
         r = torch.norm(p - R, dim=2)  # (size, size, 2)
         return misc.smooth_step(R, R * self.blend, r)
 
-    def get_layers_mask(self) -> List[torch.Tensor]:
+    def get_layers_mask(self, gaze) -> List[torch.Tensor]:
         """
         Generate mask images for layers[:-1]
         the meaning of values in mask images:
@@ -106,15 +110,26 @@ class Foveation(object):
         :return: Mask images for layers except outermost
         """
         layers_mask = []
-        for i in range(self.n_layers - 1):
+        for i in range(self.n_layers):
             layers_mask.append(torch.ones(*self.layers_res[i], device=self.device) * -1)
-            r = torch.norm(misc.meshgrid(*self.layers_res[i], normalize=True).to(device=self.device) * 2 - 1, dim=-1)
+            if i == self.n_layers - 1:
+                c = torch.tensor([
+                    (gaze[0] + 0.5 * self.out_res[1]) / self.out_res[0],
+                    (gaze[1] + 0.5 * self.out_res[0]) / self.out_res[0]
+                ], device=self.device)
+            else:
+                c = torch.tensor([0.5, 0.5], device=self.device)
+            coord = misc.meshgrid(*self.layers_res[i]).to(device=self.device) / self.layers_res[i][0]
+            r = 2 * torch.norm(coord - c, dim=-1)
             inner_radius = self.get_source_layer_cover_size_in_target_layer(
-                self.layers_fov[i - 1], self.layers_fov[i],
-                self.layers_res[i][0]) / self.layers_res[i][0] if i > 0 else 0
-            bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
+                self.layers_fov[i - 1], self.layers_fov[i], self.layers_res[i][0]) / self.layers_res[i][0] \
+                if i > 0 else 0
+            if i == self.n_layers - 1:
+                bounds = [inner_radius * (1 - self.blend), inner_radius, 100, 100]
+            else:
+                bounds = [inner_radius * (1 - self.blend), inner_radius, self.blend, 1]
             for bi in range(len(bounds) - 1):
                 region = torch.logical_and(r > bounds[bi], r <= bounds[bi + 1])
                 layers_mask[i][region] = bi + \
                     (r[region] - bounds[bi]) / (bounds[bi + 1] - bounds[bi])
-        return layers_mask
\ No newline at end of file
+        return layers_mask
diff --git a/cpp/Makefile.config b/cpp/Makefile.config
index 908140b..6c677db 100644
--- a/cpp/Makefile.config
+++ b/cpp/Makefile.config
@@ -128,7 +128,7 @@ endif
 #########################
 INCPATHS=
 LIBPATHS=
-COMMON_LIBS= -lGLEW -lglfw3 -lGL -lX11 -lpthread -lXrandr -lXinerama -lXcursor -lXi -ldl
+COMMON_LIBS= -lGLEW -lglfw -lGL -lX11 -lpthread -lXrandr #-lXinerama -lXcursor -lXi -ldl
 
 # Add extra libraries if TRT_STATIC is enabled
 ifeq ($(TRT_STATIC), 1)
@@ -207,7 +207,7 @@ else ifeq ($(TARGET), aarch64)
   endif
 endif
 ifeq ($(ENABLE_MYELIN), 1)
-  COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
+  #COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
 endif
 
 .SUFFIXES:
diff --git a/cpp/msl_infer/Common.h b/cpp/msl_infer/Common.h
deleted file mode 100644
index e9898ac..0000000
--- a/cpp/msl_infer/Common.h
+++ /dev/null
@@ -1,155 +0,0 @@
-#pragma once
-#include <memory>
-#include <stdexcept>
-#include <vector>
-#include <string>
-#include <sstream>
-#include <GL/glew.h>
-#include <cuda_gl_interop.h>
-#include "../glm/glm.hpp"
-#include "Logger.h"
-
-inline unsigned int getElementSize(nv::DataType t)
-{
-	switch (t)
-	{
-	case nv::DataType::kINT32:
-		return 4;
-	case nv::DataType::kFLOAT:
-		return 4;
-	case nv::DataType::kHALF:
-		return 2;
-	case nv::DataType::kBOOL:
-	case nv::DataType::kINT8:
-		return 1;
-	}
-	throw std::runtime_error("Invalid DataType.");
-	return 0;
-}
-
-template <typename T>
-void dumpRow(std::ostream &os, T* buf, size_t n)
-{
-	os << buf[0];
-	for (size_t i = 1; i < n; ++i) {
-		os << " " << buf[i];
-	}
-	os << std::endl;
-}
-
-template <typename T>
-void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount, size_t maxDumpRows = 0)
-{
-	T *typedBuf = static_cast<T *>(buf);
-	size_t numItems = bufSize / sizeof(T);
-	size_t nInLastRow = numItems % rowCount;
-	size_t rows;
-	if (nInLastRow == 0) {
-		rows = numItems / rowCount;
-		nInLastRow = rowCount;
-	} else {
-		rows = numItems / rowCount + 1;
-	}
-	if (maxDumpRows == 0) {
-		for (size_t i = 0; i < rows - 1; ++i) {
-			dumpRow(os, typedBuf, rowCount);
-			typedBuf += rowCount;
-		}
-		dumpRow(os, typedBuf, nInLastRow);
-	} else {
-		for (size_t i = 0; i < maxDumpRows / 2; ++i)
-			dumpRow(os, typedBuf + i * rowCount, rowCount);
-		os << "..." << std::endl;
-		for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
-			dumpRow(os, typedBuf + i * rowCount, rowCount);
-		dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
-	}
-}
-
-class CudaStream
-{
-public:
-	CudaStream()
-	{
-		cudaStreamCreate(&stream);
-	}
-
-	operator cudaStream_t()
-	{
-		return stream;
-	}
-
-	virtual ~CudaStream()
-	{
-		cudaStreamDestroy(stream);
-	}
-
-private:
-	cudaStream_t stream;
-};
-
-class CudaEvent
-{
-public:
-	CudaEvent()
-	{
-		cudaEventCreate(&mEvent);
-	}
-
-	operator cudaEvent_t()
-	{
-		return mEvent;
-	}
-
-	virtual ~CudaEvent()
-	{
-		cudaEventDestroy(mEvent);
-	}
-
-private:
-	cudaEvent_t mEvent;
-};
-
-struct CudaMapScope
-{
-	std::vector<cudaGraphicsResource_t> resources_;
-	cudaStream_t stream_;
-
-	CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
-				 cudaStream_t stream = nullptr) : resources_(resources), stream_(stream) {}
-
-	~CudaMapScope()
-	{
-		if (!resources_.empty())
-			cudaGraphicsUnmapResources(resources_.size(),
-									   resources_.data(), stream_);
-	}
-
-	cudaError_t map()
-	{
-		if (!resources_.empty())
-			return cudaGraphicsMapResources(resources_.size(),
-											resources_.data(), stream_);
-		return cudaSuccess;
-	}
-};
-
-template <typename T>
-struct Destroy
-{
-	void operator()(T *t)
-	{
-		if (t != nullptr)
-			t->destroy();
-	}
-};
-
-template <class T>
-using uptr = std::unique_ptr<T, ::Destroy<T>>;
-template <class T>
-using sptr = std::shared_ptr<T>;
-
-#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
-
-#include "Resource.h"
-#include "Formatter.h"
\ No newline at end of file
diff --git a/cpp/msl_infer/Encoder.cu b/cpp/msl_infer/Encoder.cu
index 51e0d16..589f6c7 100644
--- a/cpp/msl_infer/Encoder.cu
+++ b/cpp/msl_infer/Encoder.cu
@@ -1,5 +1,5 @@
 #include "Encoder.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
 
 /// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
 /// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
@@ -7,12 +7,11 @@
 /// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
 ///                    cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
 /// Dispatch (n_batch, n_chns, n_freqs)
-__global__ void cu_encode(float *output, float *input, float *freqs, uint n)
-{
+__global__ void cu_encode(float *output, float *input, float *freqs, uint n) {
     glm::uvec3 idx3 = IDX3;
     if (idx3.x >= n)
         return;
-    uint n = blockDim.x, inChns = blockDim.y, nFreqs = blockDim.z;
+    uint inChns = blockDim.y, nFreqs = blockDim.z;
     uint i = idx3.x, chn = idx3.y, freq = idx3.z;
     uint elem = i * inChns + chn;
     uint outChns = inChns * (nFreqs * 2 + 1);
@@ -26,16 +25,14 @@ __global__ void cu_encode(float *output, float *input, float *freqs, uint n)
     output[base + inChns * (freq * 2 + 2)] = c;
 }
 
-void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input)
-{
+void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
     dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
     dim3 grdSize((uint)ceil(input->n() / (float)blkSize.x), 1, 1);
-    cu_encode<<<grdSize, blkSize>>>(output->getBuffer(), *input, *_freqs, input->n());
+    CU_INVOKE(cu_encode)(output->getBuffer<float>(), *input, *_freqs, input->n());
     CHECK_EX(cudaGetLastError());
 }
 
-void Encoder::_genFreqArray()
-{
+void Encoder::_genFreqArray() {
     float *arr = new float[_multires];
     arr[0] = 1.0f;
     for (auto i = 1; i < _multires; ++i)
diff --git a/cpp/msl_infer/Encoder.h b/cpp/msl_infer/Encoder.h
index 98e4a08..ef56566 100644
--- a/cpp/msl_infer/Encoder.h
+++ b/cpp/msl_infer/Encoder.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
 class Encoder {
 public:
@@ -14,5 +14,4 @@ private:
     sptr<CudaArray<float>> _freqs;
 
     void _genFreqArray();
-
 };
\ No newline at end of file
diff --git a/cpp/msl_infer/Enhancement.cu b/cpp/msl_infer/Enhancement.cu
index c928ea5..9fb12ea 100644
--- a/cpp/msl_infer/Enhancement.cu
+++ b/cpp/msl_infer/Enhancement.cu
@@ -1,5 +1,5 @@
 #include "Enhancement.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
 
 #define max(__a__, __b__) (__a__ > __b__ ? __a__ : __b__)
 #define min(__a__, __b__) (__a__ < __b__ ? __a__ : __b__)
diff --git a/cpp/msl_infer/Enhancement.h b/cpp/msl_infer/Enhancement.h
index b7c6d90..0c99f7d 100644
--- a/cpp/msl_infer/Enhancement.h
+++ b/cpp/msl_infer/Enhancement.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
 class Enhancement
 {
diff --git a/cpp/msl_infer/InferPipeline.cpp b/cpp/msl_infer/InferPipeline.cpp
index 5f3fbed..76a84af 100644
--- a/cpp/msl_infer/InferPipeline.cpp
+++ b/cpp/msl_infer/InferPipeline.cpp
@@ -1,40 +1,34 @@
 #include "InferPipeline.h"
 #include "Nmsl2.h"
 
-InferPipeline::InferPipeline(
-    const std::string &netDir, bool isNmsl, uint batchSize,
-    uint samples) : _batchSize(batchSize),
-                    _samples(samples),
-                    _sampler(new Sampler({1.0f, 50.0f}, samples)),
-                    _encoder(new Encoder(10, 3)),
-                    _renderer(new Renderer()),
-                    _net(isNmsl ? new Nmsl2(batchSize, samples) : new Msl(batchSize, samples))
-{
-    uint batchSizeForNet = _batchSize * _samples;
-    _sphericalCoords = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(batchSizeForNet));
-    _depths = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet));
-    _encoded = sptr<CudaArray<float>>(new CudaArray<float>(batchSizeForNet * _encoder->outDim()));
-    _layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(batchSizeForNet));
-    _net->load(netDir);
+InferPipeline::InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay, glm::vec2 depthRange,
+                             int encodeDim, int coordChns)
+    : _nRays(nRays),
+      _nSamplesPerRay(nSamplesPerRay),
+      _net(net),
+      _sampler(new Sampler(depthRange, nSamplesPerRay, coordChns == 3)),
+      _encoder(new Encoder(encodeDim, coordChns)),
+      _renderer(new Renderer()) {
+    uint nSamples = _nRays * _nSamplesPerRay;
+    _coords = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * coordChns));
+    _depths = sptr<CudaArray<float>>(new CudaArray<float>(nSamples));
+    _encoded = sptr<CudaArray<float>>(new CudaArray<float>(nSamples * _encoder->outDim()));
+    _layeredColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(nSamples));
     _net->bindResources(_encoded.get(), _depths.get(), _layeredColors.get());
 }
 
-void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
-                        sptr<CudaArray<glm::vec3>> rays,
-                        glm::vec3 rayOrigin, bool showPerf)
-{
+void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
+                        glm::vec3 origin, bool showPerf) {
 
     CudaEvent eStart, eSampled, eEncoded, eInferred, eRendered;
 
     cudaEventRecord(eStart);
 
-    _sampler->sampleOnRays(_sphericalCoords, _depths, rays, rayOrigin);
+    _sampler->sampleOnRays(_coords, _depths, rays, origin);
 
     cudaEventRecord(eSampled);
-    
-    sptr<CudaArray<float>> coords(new CudaArray<float>((float *)_sphericalCoords->getBuffer(),
-                                                       _sphericalCoords->n() * 3));
-    _encoder->encode(_encoded, coords);
+
+    _encoder->encode(_encoded, _coords);
 
     cudaEventRecord(eEncoded);
 
@@ -46,8 +40,7 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
 
     cudaEventRecord(eRendered);
 
-    if (showPerf)
-    {
+    if (showPerf) {
         CHECK_EX(cudaDeviceSynchronize());
 
         float timeTotal, timeSample, timeEncode, timeInfer, timeRender;
@@ -59,34 +52,34 @@ void InferPipeline::run(sptr<CudaArray<glm::vec4>> o_colors,
 
         std::ostringstream sout;
         sout << "Infer pipeline: " << timeTotal << "ms (Sample: " << timeSample
-             << "ms, Encode: " << timeEncode << "ms, Infer: "
-             << timeInfer << "ms, Render: " << timeRender << "ms)";
+             << "ms, Encode: " << timeEncode << "ms, Infer: " << timeInfer
+             << "ms, Render: " << timeRender << "ms)";
         Logger::instance.info(sout.str());
     }
     /*
-	{
-		std::ostringstream sout;
-		sout << "Rays:" << std::endl;
-		dumpFloatArray(sout, *rays, 10);
-		Logger::instance.info(sout.str());
-	}
-	{
-		std::ostringstream sout;
-		sout << "Spherical coords:" << std::endl;
-		dumpFloatArray(sout, *sphericalCoords, 10);
-		Logger::instance.info(sout.str());
-	}
-	{
-		std::ostringstream sout;
-		sout << "Depths:" << std::endl;
-		dumpFloatArray(sout, *depths, 10);
-		Logger::instance.info(sout.str());
-	}
-	{
-		std::ostringstream sout;
-		sout << "Encoded:" << std::endl;
-		dumpFloatArray(sout, *encoded, 10, encoder.outDim());
-		Logger::instance.info(sout.str());
-	}
-	*/
+    {
+        std::ostringstream sout;
+        sout << "Rays:" << std::endl;
+        dumpFloatArray(sout, *rays, 10);
+        Logger::instance.info(sout.str());
+    }
+    {
+        std::ostringstream sout;
+        sout << "Spherical coords:" << std::endl;
+        dumpFloatArray(sout, *sphericalCoords, 10);
+        Logger::instance.info(sout.str());
+    }
+    {
+        std::ostringstream sout;
+        sout << "Depths:" << std::endl;
+        dumpFloatArray(sout, *depths, 10);
+        Logger::instance.info(sout.str());
+    }
+    {
+        std::ostringstream sout;
+        sout << "Encoded:" << std::endl;
+        dumpFloatArray(sout, *encoded, 10, encoder.outDim());
+        Logger::instance.info(sout.str());
+    }
+    */
 }
\ No newline at end of file
diff --git a/cpp/msl_infer/InferPipeline.h b/cpp/msl_infer/InferPipeline.h
index aa27fc1..74e9dc1 100644
--- a/cpp/msl_infer/InferPipeline.h
+++ b/cpp/msl_infer/InferPipeline.h
@@ -1,26 +1,26 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 #include "../msl_infer/Sampler.h"
 #include "../msl_infer/Encoder.h"
 #include "../msl_infer/Renderer.h"
 #include "../msl_infer/Msl.h"
 
-class InferPipeline
-{
-public:
-    InferPipeline(const std::string &netDir, bool isNmsl, uint batchSize, uint samples);
+class InferPipeline {
+  public:
+    InferPipeline(sptr<Msl> net, uint nRays, uint nSamplesPerRay,
+                  glm::vec2 depthRange, int encodeDim, int coordChns);
 
-    void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays,
-             glm::vec3 rayOrigin, bool showPerf = false);
+    void run(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec3>> rays, glm::vec3 origin,
+             bool showPerf = false);
 
-private:
-    uint _batchSize;
-    uint _samples;
+  private:
+    uint _nRays;
+    uint _nSamplesPerRay;
+    sptr<Msl> _net;
     sptr<Sampler> _sampler;
     sptr<Encoder> _encoder;
     sptr<Renderer> _renderer;
-    sptr<Msl> _net;
-    sptr<CudaArray<glm::vec3>> _sphericalCoords;
+    sptr<CudaArray<float>> _coords;
     sptr<CudaArray<float>> _depths;
     sptr<CudaArray<float>> _encoded;
     sptr<CudaArray<glm::vec4>> _layeredColors;
diff --git a/cpp/msl_infer/Msl.cpp b/cpp/msl_infer/Msl.cpp
index b6ff71a..e966669 100644
--- a/cpp/msl_infer/Msl.cpp
+++ b/cpp/msl_infer/Msl.cpp
@@ -1,37 +1,28 @@
 #include "Msl.h"
 #include <time.h>
 
-Msl::Msl(int batchSize, int samples) : batchSize(batchSize), samples(samples), net(nullptr) {}
+Msl::Msl() : net(nullptr) {}
 
-bool Msl::load(const std::string &netDir)
-{
+bool Msl::load(const std::string &netPath) {
     net = new Net();
-    if (!net->load(netDir + "msl.trt"))
-        return false;
-    return true;
+    if (net->load(netPath))
+        return true;
+    dispose();
+    return false;
 }
 
-void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
-{
+void Msl::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
     net->bindResource("Encoded", resEncoded);
     net->bindResource("Depths", resDepths);
     net->bindResource("Colors", resColors);
 }
 
-bool Msl::infer()
-{
-    if (!net->infer())
-        return false;
-    return true;
-}
+bool Msl::infer() { return net->infer(); }
 
-bool Msl::dispose()
-{
-    if (net != nullptr)
-    {
+void Msl::dispose() {
+    if (net != nullptr) {
         net->dispose();
         delete net;
         net = nullptr;
     }
-    return true;
 }
diff --git a/cpp/msl_infer/Msl.h b/cpp/msl_infer/Msl.h
index c8fa96b..0a9e3e7 100644
--- a/cpp/msl_infer/Msl.h
+++ b/cpp/msl_infer/Msl.h
@@ -1,21 +1,15 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 #include "Net.h"
 
-class Msl
-{
+class Msl {
 public:
-	int batchSize;
-	int samples;
     Net *net;
 
-	Msl(int batchSize, int samples);
+    Msl();
 
-	virtual bool load(const std::string &netDir);
-
-	virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
-
-	virtual bool infer();
-
-	virtual bool dispose();
+    virtual bool load(const std::string &netDir);
+    virtual void bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors);
+    virtual bool infer();
+    virtual void dispose();
 };
diff --git a/cpp/msl_infer/Net.cpp b/cpp/msl_infer/Net.cpp
index 8477569..ec54135 100644
--- a/cpp/msl_infer/Net.cpp
+++ b/cpp/msl_infer/Net.cpp
@@ -1,4 +1,4 @@
-#include "half.h"
+#include "../utils/half.h"
 #include "Net.h"
 #include <fstream>
 #include <numeric>
diff --git a/cpp/msl_infer/Net.h b/cpp/msl_infer/Net.h
index 28e25dc..425508e 100644
--- a/cpp/msl_infer/Net.h
+++ b/cpp/msl_infer/Net.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
 
 class Net {
diff --git a/cpp/msl_infer/Nmsl2.cpp b/cpp/msl_infer/Nmsl2.cpp
index ecd0f19..5d41d0f 100644
--- a/cpp/msl_infer/Nmsl2.cpp
+++ b/cpp/msl_infer/Nmsl2.cpp
@@ -1,68 +1,65 @@
 #include "Nmsl2.h"
 #include <time.h>
 
-Nmsl2::Nmsl2(int batchSize, int samples) : Msl(batchSize, samples),
-										   resRaw1(nullptr), resRaw2(nullptr),
-										   fcNet1(nullptr), fcNet2(nullptr), catNet(nullptr) {}
+Nmsl2::Nmsl2(int batchSize, int samples)
+    : batchSize(batchSize),
+      samples(samples),
+      resRaw1(nullptr),
+      resRaw2(nullptr),
+      fcNet1(nullptr),
+      fcNet2(nullptr),
+      catNet(nullptr) {}
 
-bool Nmsl2::load(const std::string &netDir)
-{
-	fcNet1 = new Net();
-	fcNet2 = new Net();
-	catNet = new Net();
-	if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") ||
-		!catNet->load(netDir + "cat.trt"))
-		return false;
-	resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
-	resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
-	return true;
+bool Nmsl2::load(const std::string &netDir) {
+    fcNet1 = new Net();
+    fcNet2 = new Net();
+    catNet = new Net();
+    if (!fcNet1->load(netDir + "fc1.trt") || !fcNet2->load(netDir + "fc2.trt") ||
+        !catNet->load(netDir + "cat.trt"))
+        return false;
+    resRaw1 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
+    resRaw2 = sptr<Resource>(new CudaBuffer(batchSize * samples / 2 * sizeof(float4)));
+    return true;
 }
 
-void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors)
-{
-	fcNet1->bindResource("Encoded", resEncoded);
-	fcNet1->bindResource("Raw", resRaw1.get());
-	fcNet2->bindResource("Encoded", resEncoded);
-	fcNet2->bindResource("Raw", resRaw2.get());
-	catNet->bindResource("Raw1", resRaw1.get());
-	catNet->bindResource("Raw2", resRaw2.get());
-	catNet->bindResource("Depths", resDepths);
-	catNet->bindResource("Colors", resColors);
+void Nmsl2::bindResources(Resource *resEncoded, Resource *resDepths, Resource *resColors) {
+    fcNet1->bindResource("Encoded", resEncoded);
+    fcNet1->bindResource("Raw", resRaw1.get());
+    fcNet2->bindResource("Encoded", resEncoded);
+    fcNet2->bindResource("Raw", resRaw2.get());
+    catNet->bindResource("Raw1", resRaw1.get());
+    catNet->bindResource("Raw2", resRaw2.get());
+    catNet->bindResource("Depths", resDepths);
+    catNet->bindResource("Colors", resColors);
 }
 
-bool Nmsl2::infer()
-{
-	//CudaStream stream1, stream2;
-	if (!fcNet1->infer())
-		return false;
-	if (!fcNet2->infer())
-		return false;
-	if (!catNet->infer())
-		return false;
-	return true;
+bool Nmsl2::infer() {
+    // CudaStream stream1, stream2;
+    if (!fcNet1->infer())
+        return false;
+    if (!fcNet2->infer())
+        return false;
+    if (!catNet->infer())
+        return false;
+    return true;
 }
 
-bool Nmsl2::dispose()
-{
-	if (fcNet1 != nullptr)
-	{
-		fcNet1->dispose();
-		delete fcNet1;
-		fcNet1 = nullptr;
-	}
-	if (fcNet2 != nullptr)
-	{
-		fcNet2->dispose();
-		delete fcNet2;
-		fcNet2 = nullptr;
-	}
-	if (catNet != nullptr)
-	{
-		catNet->dispose();
-		delete catNet;
-		catNet = nullptr;
-	}
-	resRaw1 = nullptr;
-	resRaw2 = nullptr;
-	return true;
+void Nmsl2::dispose() {
+    if (fcNet1 != nullptr) {
+        fcNet1->dispose();
+        delete fcNet1;
+        fcNet1 = nullptr;
+    }
+    if (fcNet2 != nullptr) {
+        fcNet2->dispose();
+        delete fcNet2;
+        fcNet2 = nullptr;
+    }
+    if (catNet != nullptr) {
+        catNet->dispose();
+        delete catNet;
+        catNet = nullptr;
+    }
+    resRaw1 = nullptr;
+    resRaw2 = nullptr;
 }
diff --git a/cpp/msl_infer/Nmsl2.h b/cpp/msl_infer/Nmsl2.h
index e023e06..3f2e711 100644
--- a/cpp/msl_infer/Nmsl2.h
+++ b/cpp/msl_infer/Nmsl2.h
@@ -9,6 +9,8 @@ public:
 	Net *fcNet1;
 	Net *fcNet2;
 	Net *catNet;
+	uint batchSize;
+	uint samples;
 
 	Nmsl2(int batchSize, int samples);
 
@@ -18,6 +20,6 @@ public:
 
 	virtual bool infer();
 
-	virtual bool dispose();
+	virtual void dispose();
 
 };
diff --git a/cpp/msl_infer/Renderer.cu b/cpp/msl_infer/Renderer.cu
index b702144..29c35cc 100644
--- a/cpp/msl_infer/Renderer.cu
+++ b/cpp/msl_infer/Renderer.cu
@@ -1,32 +1,28 @@
 #include "Renderer.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
 
-/// Dispatch (n, 1, 1)
-__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint n)
-{
+/// Dispatch (n_rays, -)
+__global__ void cu_render(glm::vec4 *o_colors, glm::vec4 *layeredColors, uint samples, uint nRays) {
     glm::uvec3 idx3 = IDX3;
-    if (idx3.x >= n)
+    uint rayIdx = idx3.x;
+    if (rayIdx >= nRays)
         return;
     glm::vec4 outColor;
-    for (int i = samples - 1; i >= 0; --i)
-    {
-        glm::vec4 c = layeredColors[idx3.x * samples + i];
+    for (int si = samples - 1; si >= 0; --si) {
+        glm::vec4 c = layeredColors[rayIdx * samples + si];
         outColor = outColor * (1 - c.a) + c * c.a;
     }
     outColor.a = 1.0f;
     o_colors[idx3.x] = outColor;
 }
 
-Renderer::Renderer()
-{
-}
+Renderer::Renderer() {}
 
 void Renderer::render(sptr<CudaArray<glm::vec4>> o_colors,
-                      sptr<CudaArray<glm::vec4>> layeredColors)
-{
-    dim3 blockSize(1024);
-    dim3 gridSize((uint)ceil(o_colors->n() / (float)blockSize.x));
-    cu_render<<<gridSize, blockSize>>>(*o_colors, *layeredColors, layeredColors->n() / o_colors->n(),
-                                       o_colors->n());
+                      sptr<CudaArray<glm::vec4>> layeredColors) {
+    dim3 blkSize(1024);
+    dim3 grdSize(ceilDiv(o_colors->n(), blkSize.x));
+    CU_INVOKE(cu_render)
+    (*o_colors, *layeredColors, layeredColors->n() / o_colors->n(), o_colors->n());
     CHECK_EX(cudaGetLastError());
 }
\ No newline at end of file
diff --git a/cpp/msl_infer/Renderer.h b/cpp/msl_infer/Renderer.h
index 20766cf..4e48a0e 100644
--- a/cpp/msl_infer/Renderer.h
+++ b/cpp/msl_infer/Renderer.h
@@ -1,10 +1,15 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
 class Renderer {
 public:
     Renderer();
 
+    /**
+     * @brief
+     *
+     * @param o_colors
+     * @param layeredColors
+     */
     void render(sptr<CudaArray<glm::vec4>> o_colors, sptr<CudaArray<glm::vec4>> layeredColors);
-
 };
\ No newline at end of file
diff --git a/cpp/msl_infer/Resource.h b/cpp/msl_infer/Resource.h
deleted file mode 100644
index 7a53eca..0000000
--- a/cpp/msl_infer/Resource.h
+++ /dev/null
@@ -1,159 +0,0 @@
-#pragma once
-#include <map>
-#include <vector>
-
-class Resource
-{
-public:
-	virtual ~Resource() {}
-
-	virtual void *getBuffer() const = 0;
-
-	virtual size_t size() const = 0;
-};
-
-class CudaBuffer : public Resource
-{
-public:
-	CudaBuffer(void *buffer = nullptr, size_t size = 0) : _buffer(buffer), _ownBuffer(false), _size(size) {}
-	CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size)
-	{
-		CHECK_EX(cudaMalloc(&_buffer, size));
-	}
-	CudaBuffer(const CudaBuffer &rhs) = delete;
-
-	virtual ~CudaBuffer()
-	{
-		if (!_ownBuffer || _buffer == nullptr)
-			return;
-		try
-		{
-			CHECK_EX(cudaFree(_buffer));
-		}
-		catch (std::exception &ex)
-		{
-			Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
-		}
-		_buffer = nullptr;
-		_ownBuffer = false;
-	}
-
-	virtual void *getBuffer() const { return _buffer; }
-
-	virtual size_t size() const { return _size; }
-
-private:
-	void *_buffer;
-	bool _ownBuffer;
-	size_t _size;
-};
-
-template <typename T>
-class CudaArray : public CudaBuffer
-{
-public:
-	CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
-	CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
-	CudaArray(const CudaArray<T> &rhs) = delete;
-
-	size_t n() const { return size() / sizeof(T); }
-
-	operator T *() { return (T *)getBuffer(); }
-};
-
-class GraphicsResource : public Resource
-{
-public:
-	cudaGraphicsResource_t getHandler() { return _res; }
-
-	virtual ~GraphicsResource()
-	{
-		if (_res == nullptr)
-			return;
-		try
-		{
-			CHECK_EX(cudaGraphicsUnregisterResource(_res));
-		}
-		catch (std::exception &ex)
-		{
-			Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
-		}
-		_res = nullptr;
-	}
-
-	virtual size_t size() const { return _size; }
-
-protected:
-	cudaGraphicsResource_t _res;
-	size_t _size;
-
-	GraphicsResource() : _res(nullptr), _size(0) {}
-};
-
-template<typename T>
-class GlTextureResource : public GraphicsResource
-{
-public:
-	GlTextureResource(GLuint textureID, glm::uvec2 textureSize)
-	{
-		CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
-											 cudaGraphicsRegisterFlagsWriteDiscard));
-		_size = textureSize.x * textureSize.y * sizeof(T);
-		_textureSize = textureSize;
-	}
-
-	virtual ~GlTextureResource()
-	{
-		cudaGraphicsUnmapResources(1, &_res, 0);
-	}
-
-	virtual void *getBuffer() const {
-		cudaArray_t buffer;
-		try{
-			CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
-		} catch (...) {
-			return nullptr;
-		}
-		return buffer;
-	}
-
-	operator T *() { return (T *)getBuffer(); }
-	
-	glm::uvec2 textureSize () { return _textureSize; }
-
-private:
-	glm::uvec2 _textureSize;
-
-};
-
-class Resources
-{
-public:
-	std::map<std::string, Resource *> resources;
-	std::vector<cudaGraphicsResource_t> graphicsResources;
-
-	void addResource(const std::string &name, Resource *res)
-	{
-		auto gres = dynamic_cast<GraphicsResource *>(res);
-		if (gres != nullptr)
-			graphicsResources.push_back(gres->getHandler());
-		resources[name] = res;
-	}
-
-	void clear()
-	{
-		resources.clear();
-		graphicsResources.clear();
-	}
-};
-
-template <typename T>
-void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
-					size_t elemsPerRow = 1)
-{
-	T *hostArr = new T[arr.n()];
-	cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
-	dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T),
-						  sizeof(T) / sizeof(float) * elemsPerRow, maxDumpRows);
-	delete[] hostArr;
-}
\ No newline at end of file
diff --git a/cpp/msl_infer/Sampler.cu b/cpp/msl_infer/Sampler.cu
index 9b8534b..5d8dcdd 100644
--- a/cpp/msl_infer/Sampler.cu
+++ b/cpp/msl_infer/Sampler.cu
@@ -1,8 +1,7 @@
 #include "Sampler.h"
-#include "thread_index.h"
+#include "../utils/cuda.h"
 
-__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth)
-{
+__device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, float &o_depth) {
     float pp = glm::dot(p, p);
     float vv = glm::dot(v, v);
     float pv = glm::dot(p, v);
@@ -10,36 +9,36 @@ __device__ glm::vec3 _raySphereIntersect(glm::vec3 p, glm::vec3 v, float r, floa
     return p + o_depth * v;
 }
 
-__device__ float _getAngle(float x, float y)
-{
+__device__ float _getAngle(float x, float y) {
     return -atan(x / y) + (y < 0) * (float)M_PI + 0.5f * (float)M_PI;
 }
 
 /**
- * Dispatch with block_size=(n_samples, 1024)
+ * Dispatch with block_size=(n_samples, *), grid_size=(1, nRays/*)
+ * Index with (sample_idx, ray_idx)
  */
-__global__ void cu_sampleOnRays(glm::vec3 *o_sphericalCoords, float *o_depths, glm::vec3 *rays,
-                                glm::vec3 rayCenter, float range0, float rangeStep, uint n)
-{
+__global__ void cu_sampleOnRays(float *o_coords, float *o_depths, glm::vec3 *rays, uint nRays,
+                                glm::vec3 origin, Range range, bool outputRadius) {
     glm::uvec3 idx3 = IDX3;
-    uint rayIdx = flattenIdx({idx3.y, idx3.z, 0});
-    if (rayIdx >= n)
-        return;
     uint idx = flattenIdx(idx3);
-    float r_reciprocal = rangeStep * idx3.x + range0;
-    glm::vec3 p = _raySphereIntersect(rayCenter, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
-    o_sphericalCoords[idx] = glm::vec3(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
+    uint sampleIdx = idx3.x;
+    uint rayIdx = idx3.y;
+    if (rayIdx >= nRays)
+        return;
+    float r_reciprocal = range.get(sampleIdx);
+    glm::vec3 p = _raySphereIntersect(origin, rays[rayIdx], 1.0f / r_reciprocal, o_depths[idx]);
+    glm::vec3 sp(r_reciprocal, _getAngle(p.x, p.z), acos(p.y * r_reciprocal));
+    if (outputRadius)
+        ((glm::vec3 *)o_coords)[idx] = sp;
+    else
+        ((glm::vec2 *)o_coords)[idx] = {sp.y, sp.z};
 }
 
-void Sampler::sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords, sptr<CudaArray<float>> o_depths,
-                           sptr<CudaArray<glm::vec3>> rays,
-                           glm::vec3 rayCenter)
-{
-    dim3 blockSize(_samples, 1024 / _samples);
-    dim3 gridSize(1, (uint)ceil(rays->n() / (float)blockSize.y));
-    cu_sampleOnRays<<<gridSize, blockSize>>>(*o_sphericalCoords, *o_depths, *rays, rayCenter,
-                                             _dispRange.x,
-                                             (_dispRange.y - _dispRange.x) / (_samples - 1),
-                                             rays->n());
+void Sampler::sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
+                           sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter) {
+    dim3 blkSize(_dispRange.steps(), 1024 / _dispRange.steps());
+    dim3 grdSize(1, (uint)ceil(rays->n() / (float)blkSize.y));
+    CU_INVOKE(cu_sampleOnRays)
+    (*o_coords, *o_depths, *rays, rays->n(), rayCenter, _dispRange, _outputRadius);
     CHECK_EX(cudaGetLastError());
 }
\ No newline at end of file
diff --git a/cpp/msl_infer/Sampler.h b/cpp/msl_infer/Sampler.h
index 4ebf3ab..3883a76 100644
--- a/cpp/msl_infer/Sampler.h
+++ b/cpp/msl_infer/Sampler.h
@@ -1,18 +1,15 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
-class Sampler
-{
+class Sampler {
 public:
-    Sampler(glm::vec2 depthRange, uint samples) : _dispRange(1.0f / depthRange.x, 1.0f / depthRange.y),
-                                                  _samples(samples) {}
+    Sampler(glm::vec2 depthRange, uint samples, bool outputRadius)
+        : _dispRange(1.0f / depthRange, samples), _outputRadius(outputRadius) {}
 
-    void sampleOnRays(sptr<CudaArray<glm::vec3>> o_sphericalCoords,
-                      sptr<CudaArray<float>> o_depths,
-                      sptr<CudaArray<glm::vec3>> rays,
-                      glm::vec3 rayCenter);
+    void sampleOnRays(sptr<CudaArray<float>> o_coords, sptr<CudaArray<float>> o_depths,
+                      sptr<CudaArray<glm::vec3>> rays, glm::vec3 rayCenter);
 
 private:
-    glm::vec2 _dispRange;
-    uint _samples;
+    Range _dispRange;
+    bool _outputRadius;
 };
\ No newline at end of file
diff --git a/cpp/msl_infer/SynthesisPipeline.cpp b/cpp/msl_infer/SynthesisPipeline.cpp
index f7e04d6..9e8af36 100644
--- a/cpp/msl_infer/SynthesisPipeline.cpp
+++ b/cpp/msl_infer/SynthesisPipeline.cpp
@@ -1,18 +1,22 @@
 #include "SynthesisPipeline.h"
 
-SynthesisPipeline::SynthesisPipeline(
-    const std::string &netDir, bool isNmsl, uint batchSize,
-    uint samples) : _batchSize(batchSize),
-                    _samples(samples),
-                    _inferPipeline(new InferPipeline(netDir, isNmsl, batchSize, samples)),
-                    _rays(new CudaArray<glm::vec3>(batchSize)),
-                    _colors(new CudaArray<glm::vec4>(batchSize))
-{
-    _glResultBuffer = _createGlResultBuffer(_batchSize);
+SynthesisPipeline::SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples,
+                                     glm::vec2 depthRange, int encodeDim, int coordChns,
+                                     float enhanceSigma, float enhanceFe)
+    : _nRays(cam->res().x * cam->res().y),
+      _nSamples(nSamples),
+      _enhanceSigma(enhanceSigma),
+      _enhanceFe(enhanceFe),
+      _cam(cam),
+      _inferPipeline(new InferPipeline(net, _nRays, nSamples, depthRange, encodeDim, coordChns)),
+      _enhancement(new Enhancement(cam->res())),
+      _rays(new CudaArray<glm::vec3>(_nRays)),
+      _colors(new CudaArray<glm::vec4>(_nRays)) {
+    _glResultBuffer = _createGlResultBuffer(_nRays);
+    _glResultTextures.push_back(_createGlResultTexture(_cam->res()));
 }
 
-void SynthesisPipeline::run(View &view)
-{
+void SynthesisPipeline::run(View &view) {
     CudaEvent eStart, eGenRays, eInferred, eEnhanced;
 
     cudaEventRecord(eStart);
@@ -38,8 +42,9 @@ void SynthesisPipeline::run(View &view)
     cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
     {
         std::ostringstream sout;
-        sout << typeid(*this).name() << " => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
-             << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
+        sout << typeid(*this).name() << " => Total: " << timeTotal
+             << "ms (Gen rays: " << timeGenRays << "ms, Infer: " << timeInfer
+             << "ms, Enhance: " << timeEnhance << "ms)";
         Logger::instance.info(sout.str());
     }
 
@@ -53,13 +58,21 @@ void SynthesisPipeline::run(View &view)
     _uploadResultToTextures();
 }
 
-GLuint SynthesisPipeline::getGlResultTexture(int index)
-{
-    return _glResultTextures[index];
+GLuint SynthesisPipeline::getGlResultTexture(int index) { return _glResultTextures[index]; }
+
+void SynthesisPipeline::_genRays(View &view) { view.transVectors(_rays, _cam->localRays()); }
+
+void SynthesisPipeline::_enhance() { _enhancement->run(_colors, _enhanceSigma, _enhanceFe); }
+
+void SynthesisPipeline::_uploadResultToTextures() {
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
+    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _cam->res().x, _cam->res().y, GL_RGBA, GL_FLOAT, 0);
+    glBindTexture(GL_TEXTURE_2D, 0);
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 }
 
-GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
-{
+GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res) {
     GLuint textureID;
     glEnable(GL_TEXTURE_2D);
     glGenTextures(1, &textureID);
@@ -72,91 +85,11 @@ GLuint SynthesisPipeline::_createGlResultTexture(glm::uvec2 res)
     return textureID;
 }
 
-GLuint SynthesisPipeline::_createGlResultBuffer(uint elements)
-{
+GLuint SynthesisPipeline::_createGlResultBuffer(uint elements) {
     GLuint glBuffer;
     glGenBuffers(1, &glBuffer);
     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, glBuffer);
-    glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4),
-                 nullptr, GL_STREAM_DRAW);
+    glBufferData(GL_PIXEL_UNPACK_BUFFER, elements * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
     return glBuffer;
-}
-
-FoveaSynthesisPipeline::FoveaSynthesisPipeline(
-    glm::uvec2 res, float fov,
-    uint samples) : SynthesisPipeline("../nets/fovea_mono/", false, res.x * res.y, samples),
-                    _foveaCamera(fov, res / 2u, res),
-                    _enhancement(new Enhancement(res))
-{
-    _glResultTextures.push_back(_createGlResultTexture(res));
-}
-
-void FoveaSynthesisPipeline::_genRays(View &view)
-{
-    view.transVectors(_rays, _foveaCamera.localRays());
-}
-
-void FoveaSynthesisPipeline::_enhance()
-{
-    _enhancement->run(_colors, 3.0f, 0.2f);
-}
-
-void FoveaSynthesisPipeline::_uploadResultToTextures()
-{
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _foveaCamera.res().x, _foveaCamera.res().y,
-                    GL_RGBA, GL_FLOAT, 0);
-    glBindTexture(GL_TEXTURE_2D, 0);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
-
-PeriphSynthesisPipeline::PeriphSynthesisPipeline(
-    glm::uvec2 midRes, float midFov, glm::uvec2 periphRes, float periphFov,
-    uint samples) : SynthesisPipeline("../nets/periph/", false,
-                                      midRes.x * midRes.y + periphRes.x * periphRes.y,
-                                      samples),
-                    _midCamera(midFov, midRes / 2u, midRes),
-                    _periphCamera(periphFov, periphRes / 2u, periphRes),
-                    _midEnhancement(new Enhancement(midRes)),
-                    _periphEnhancement(new Enhancement(periphRes))
-{
-    uint midPixels = midRes.x * midRes.y;
-    uint periphPixels = periphRes.x * periphRes.y;
-    _midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*_rays, midPixels));
-    _periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(
-        (glm::vec3 *)*_rays + midPixels, periphPixels));
-    _glResultTextures.push_back(_createGlResultTexture(midRes));
-    _glResultTextures.push_back(_createGlResultTexture(periphRes));
-    _midColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(*_colors, midPixels));
-    _periphColors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(
-        (glm::vec4 *)*_colors + midPixels, periphPixels));
-}
-
-void PeriphSynthesisPipeline::_genRays(View &view)
-{
-    view.transVectors(_midRays, _midCamera.localRays());
-    view.transVectors(_periphRays, _periphCamera.localRays());
-}
-
-void PeriphSynthesisPipeline::_enhance()
-{
-    _midEnhancement->run(_midColors, 5.0f, 0.2f);
-    _periphEnhancement->run(_periphColors, 5.0f, 0.2f);
-}
-
-
-void PeriphSynthesisPipeline::_uploadResultToTextures()
-{
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, _glResultBuffer);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[0]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, _midCamera.res().x, _midCamera.res().y,
-                    GL_RGBA, GL_FLOAT, 0);
-    glBindTexture(GL_TEXTURE_2D, _glResultTextures[1]);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
-                    _periphCamera.res().x, _periphCamera.res().y, GL_RGBA, GL_FLOAT,
-                    (void *)(_midCamera.res().x * _midCamera.res().y * sizeof(glm::vec4)));
-    glBindTexture(GL_TEXTURE_2D, 0);
-    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-}
+}
\ No newline at end of file
diff --git a/cpp/msl_infer/SynthesisPipeline.h b/cpp/msl_infer/SynthesisPipeline.h
index d06856e..22a7153 100644
--- a/cpp/msl_infer/SynthesisPipeline.h
+++ b/cpp/msl_infer/SynthesisPipeline.h
@@ -1,71 +1,35 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 #include "InferPipeline.h"
 #include "View.h"
 #include "Enhancement.h"
 
-class SynthesisPipeline
-{
+class SynthesisPipeline {
 public:
-    SynthesisPipeline(const std::string &netDir, bool isNmsl,
-                      uint batchSize, uint samples);
+    SynthesisPipeline(sptr<Msl> net, sptr<Camera> cam, uint nSamples, glm::vec2 depthRange,
+                      int encodeDim, int coordChns, float enhanceSigma, float enhanceFe);
 
-    void run(View& view);
+    void run(View &view);
 
     GLuint getGlResultTexture(int index);
 
 protected:
-    uint _batchSize;
-    uint _samples;
-    std::vector<GLuint> _glResultTextures;
-    GLuint _glResultBuffer;
+    uint _nRays;
+    uint _nSamples;
+    float _enhanceSigma;
+    float _enhanceFe;
+    sptr<Camera> _cam;
     sptr<InferPipeline> _inferPipeline;
+    sptr<Enhancement> _enhancement;
     sptr<CudaArray<glm::vec3>> _rays;
     sptr<CudaArray<glm::vec4>> _colors;
+    std::vector<GLuint> _glResultTextures;
+    GLuint _glResultBuffer;
 
-    virtual void _genRays(View& view) = 0;
-    virtual void _enhance() = 0;
-    virtual void _uploadResultToTextures() = 0;
+    void _genRays(View &view);
+    void _enhance();
+    void _uploadResultToTextures();
 
     GLuint _createGlResultTexture(glm::uvec2 res);
     GLuint _createGlResultBuffer(uint elements);
-
-};
-
-class FoveaSynthesisPipeline : public SynthesisPipeline
-{
-public:
-    FoveaSynthesisPipeline(glm::uvec2 res, float fov, uint samples);
-
-protected:
-    virtual void _genRays(View& view);
-    virtual void _enhance();
-    virtual void _uploadResultToTextures();
-
-private:
-    Camera _foveaCamera;
-    sptr<Enhancement> _enhancement;
-};
-
-class PeriphSynthesisPipeline : public SynthesisPipeline
-{
-public:
-    PeriphSynthesisPipeline(glm::uvec2 midRes, float midFov,
-                            glm::uvec2 periphRes, float periphFov,
-                            uint samples);
-
-protected:
-    virtual void _genRays(View& view);
-    virtual void _enhance();
-    virtual void _uploadResultToTextures();
-
-private:
-    Camera _midCamera;
-    Camera _periphCamera;
-    sptr<CudaArray<glm::vec3>> _midRays;
-    sptr<CudaArray<glm::vec3>> _periphRays;
-    sptr<CudaArray<glm::vec4>> _midColors;
-    sptr<CudaArray<glm::vec4>> _periphColors;
-    sptr<Enhancement> _midEnhancement;
-    sptr<Enhancement> _periphEnhancement;
 };
\ No newline at end of file
diff --git a/cpp/msl_infer/View.cu b/cpp/msl_infer/View.cu
index 2e00dec..eda4f03 100644
--- a/cpp/msl_infer/View.cu
+++ b/cpp/msl_infer/View.cu
@@ -1,6 +1,6 @@
 #include "View.h"
 #include <cuda_runtime.h>
-#include "thread_index.h"
+#include "../utils/cuda.h"
 
 __global__ void cu_genLocalRays(glm::vec3 *o_rays, glm::vec2 f, glm::vec2 c, glm::uvec2 res)
 {
diff --git a/cpp/msl_infer/View.h b/cpp/msl_infer/View.h
index f8d4772..97d377b 100644
--- a/cpp/msl_infer/View.h
+++ b/cpp/msl_infer/View.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "Common.h"
+#include "../utils/common.h"
 
 
 class Camera {
diff --git a/cpp/msl_infer_test/Makefile b/cpp/msl_infer_test/Makefile
index 1d96fd8..f9184e9 100644
--- a/cpp/msl_infer_test/Makefile
+++ b/cpp/msl_infer_test/Makefile
@@ -1,6 +1,6 @@
 OUTNAME_RELEASE = msl_infer_test
 OUTNAME_DEBUG   = msl_infer_test_debug
-EXTRA_DIRECTORIES = ../msl_infer
+EXTRA_DIRECTORIES = ../msl_infer ../utils
 .NOTPARALLEL:
 MAKEFILE ?= ../Makefile.config
 include $(MAKEFILE)
diff --git a/cpp/msl_infer_test/main.cpp b/cpp/msl_infer_test/main.cpp
index 36ad4de..38eb0b7 100644
--- a/cpp/msl_infer_test/main.cpp
+++ b/cpp/msl_infer_test/main.cpp
@@ -7,388 +7,386 @@
 #include "../msl_infer/View.h"
 #include "../glm/gtx/transform.hpp"
 
-static const struct
-{
-	float x, y;
-	float u, v;
-} vertices[4] = {
-	{-1.0f, -1.0f, 0.f, 1.f},
-	{1.0f, -1.0f, 1.f, 1.f},
-	{1.0f, 1.0f, 1.f, 0.f},
-	{-1.0f, 1.0f, 0.f, 0.f}};
-
-static const char *vertex_shader_text =
-	"#version 300 es\n"
-	"uniform mat4 MVP;\n"
-	"in vec2 vUV;\n"
-	"in vec2 vPos;\n"
-	"out vec2 uv;\n"
-	"void main()\n"
-	"{\n"
-	"    gl_Position = MVP * vec4(vPos, 0.0, 1.0);\n"
-	"    uv = vUV;\n"
-	"}\n";
+static const struct {
+    float x, y;
+    float u, v;
+} vertices[4] = {{-1.0f, -1.0f, 0.f, 1.f},
+                 {1.0f, -1.0f, 1.f, 1.f},
+                 {1.0f, 1.0f, 1.f, 0.f},
+                 {-1.0f, 1.0f, 0.f, 0.f}};
+
+static const char *vertex_shader_text = "#version 300 es\n"
+                                        "uniform mat4 MVP;\n"
+                                        "in vec2 vUV;\n"
+                                        "in vec2 vPos;\n"
+                                        "out vec2 uv;\n"
+                                        "void main()\n"
+                                        "{\n"
+                                        "    gl_Position = MVP * vec4(vPos, 0.0, 1.0);\n"
+                                        "    uv = vUV;\n"
+                                        "}\n";
 
 static const char *fragment_shader_text =
-	"#version 300 es\n"
-	"#undef lowp\n"
-	"#undef mediump\n"
-	"#undef highp\n"
-	"precision mediump float;\n"
-	"out vec4 FragColor;\n"
-	"in vec2 uv;\n"
-	"uniform sampler2D tex;\n"
-	"uniform float R;\n"
-	"uniform vec2 foveaCenter;\n"
-	"uniform vec2 screenRes;\n"
-	"void main()\n"
-	"{\n"
-	"    if(R<1e-5) {\n"
-	"        FragColor = texture(tex, uv);\n"
-	"        return;\n"
-	"    }\n"
-	"    vec2 p = uv * screenRes;\n"
-	"    float r = distance(p, foveaCenter);\n"
-	"    vec2 coord = (p - foveaCenter) / R / 2.0 + 0.5;\n"
-	"    if(coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) {\n"
-	"        FragColor = vec4(0, 0, 0, 0);\n"
-	"        return;\n"
-	"    }\n"
-	"    vec4 c = texture(tex, coord);\n"
-	"    float alpha = 1.0 - smoothstep(R * 0.6, R, r);\n"
-	"    c.a = c.a * alpha;\n"
-	"    FragColor = c;\n"
-	"}\n";
-
-void inferFovea(void *o_imageData, View &view)
-{
-	glm::uvec2 foveaRes(128, 128);
-	size_t foveaPixels = foveaRes.x * foveaRes.y;
-	size_t totalPixels = foveaPixels;
-	size_t samples = 32;
-
-	Camera foveaCam(20, foveaRes / 2u, foveaRes);
-	InferPipeline inferPipeline("../nets/fovea_mono/", true, totalPixels, samples);
-
-	auto local_rays = foveaCam.localRays();
-	auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
-	auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
-
-	CudaEvent eStart, eGenRays, eInferred, eEnhanced;
-
-	cudaEventRecord(eStart);
-
-	view.transVectors(rays, local_rays);
-
-	cudaEventRecord(eGenRays);
-
-	inferPipeline.run(colors, rays, view.t(), true);
-
-	cudaEventRecord(eInferred);
-
-	// TODO Enhance
-
-	cudaEventRecord(eEnhanced);
-
-	CHECK_EX(cudaDeviceSynchronize());
-
-	float timeTotal, timeGenRays, timeInfer, timeEnhance;
-	cudaEventElapsedTime(&timeTotal, eStart, eEnhanced);
-	cudaEventElapsedTime(&timeGenRays, eStart, eGenRays);
-	cudaEventElapsedTime(&timeInfer, eGenRays, eInferred);
-	cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
-	{
-		std::ostringstream sout;
-		sout << "Fovea => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
-			 << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
-		Logger::instance.info(sout.str());
-	}
-	cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
+    "#version 300 es\n"
+    "#undef lowp\n"
+    "#undef mediump\n"
+    "#undef highp\n"
+    "precision mediump float;\n"
+    "out vec4 FragColor;\n"
+    "in vec2 uv;\n"
+    "uniform sampler2D tex;\n"
+    "uniform float R;\n"
+    "uniform vec2 foveaCenter;\n"
+    "uniform vec2 screenRes;\n"
+    "void main()\n"
+    "{\n"
+    "    if(R<1e-5) {\n"
+    "        FragColor = texture(tex, uv);\n"
+    "        return;\n"
+    "    }\n"
+    "    vec2 p = uv * screenRes;\n"
+    "    float r = distance(p, foveaCenter);\n"
+    "    vec2 coord = (p - foveaCenter) / R / 2.0 + 0.5;\n"
+    "    if(coord.x < 0.0 || coord.x > 1.0 || coord.y < 0.0 || coord.y > 1.0) {\n"
+    "        FragColor = vec4(0, 0, 0, 0);\n"
+    "        return;\n"
+    "    }\n"
+    "    vec4 c = texture(tex, coord);\n"
+    "    float alpha = 1.0 - smoothstep(R * 0.6, R, r);\n"
+    "    c.a = c.a * alpha;\n"
+    "    FragColor = c;\n"
+    "}\n";
+
+/*void inferFovea(void *o_imageData, View &view) {
+    glm::uvec2 foveaRes(128, 128);
+    size_t foveaPixels = foveaRes.x * foveaRes.y;
+    size_t totalPixels = foveaPixels;
+    size_t samples = 32;
+
+    Camera foveaCam(20, foveaRes / 2u, foveaRes);
+    InferPipeline inferPipeline("../nets/fovea_mono/", true, totalPixels, samples);
+
+    auto local_rays = foveaCam.localRays();
+    auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
+    auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
+
+    CudaEvent eStart, eGenRays, eInferred, eEnhanced;
+
+    cudaEventRecord(eStart);
+
+    view.transVectors(rays, local_rays);
+
+    cudaEventRecord(eGenRays);
+
+    inferPipeline.run(colors, rays, view.t(), true);
+
+    cudaEventRecord(eInferred);
+
+    // TODO Enhance
+
+    cudaEventRecord(eEnhanced);
+
+    CHECK_EX(cudaDeviceSynchronize());
+
+    float timeTotal, timeGenRays, timeInfer, timeEnhance;
+    cudaEventElapsedTime(&timeTotal, eStart, eEnhanced);
+    cudaEventElapsedTime(&timeGenRays, eStart, eGenRays);
+    cudaEventElapsedTime(&timeInfer, eGenRays, eInferred);
+    cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
+    {
+        std::ostringstream sout;
+        sout << "Fovea => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
+             << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
+        Logger::instance.info(sout.str());
+    }
+    cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
 }
 
-void inferOther(void *o_imageData, View &view)
-{
-	glm::uvec2 midRes(256, 256);
-	glm::uvec2 periphRes(230, 256);
-	size_t midPixels = midRes.x * midRes.y;
-	size_t periphPixels = periphRes.x * periphRes.y;
-	size_t totalPixels = midPixels + periphPixels;
-	size_t samples = 16;
-
-	Camera midCam(45.0f, {128.0f, 128.0f}, midRes);
-	Camera periphCam(110.0f, {115.0f, 128.0f}, periphRes);
-	InferPipeline inferPipeline("../nets/periph/", true, totalPixels, samples);
+void inferOther(void *o_imageData, View &view) {
+    glm::uvec2 midRes(256, 256);
+    glm::uvec2 periphRes(230, 256);
+    size_t midPixels = midRes.x * midRes.y;
+    size_t periphPixels = periphRes.x * periphRes.y;
+    size_t totalPixels = midPixels + periphPixels;
+    size_t samples = 16;
 
-	auto midLocalRays = midCam.localRays();
-	auto periphLocalRays = periphCam.localRays();
-	auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
-	auto midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*rays, midPixels));
-	auto periphRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels));
-	auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
+    Camera midCam(45.0f, {128.0f, 128.0f}, midRes);
+    Camera periphCam(110.0f, {115.0f, 128.0f}, periphRes);
+    InferPipeline inferPipeline("../nets/periph/", true, totalPixels, samples);
 
-	CudaEvent eStart, eGenRays, eInferred, eEnhanced;
+    auto midLocalRays = midCam.localRays();
+    auto periphLocalRays = periphCam.localRays();
+    auto rays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(totalPixels));
+    auto midRays = sptr<CudaArray<glm::vec3>>(new CudaArray<glm::vec3>(*rays, midPixels));
+    auto periphRays = sptr<CudaArray<glm::vec3>>(
+        new CudaArray<glm::vec3>((glm::vec3 *)*rays + midPixels, periphPixels));
+    auto colors = sptr<CudaArray<glm::vec4>>(new CudaArray<glm::vec4>(totalPixels));
 
-	cudaEventRecord(eStart);
+    CudaEvent eStart, eGenRays, eInferred, eEnhanced;
 
-	view.transVectors(midRays, midLocalRays);
-	view.transVectors(periphRays, periphLocalRays);
+    cudaEventRecord(eStart);
 
-	cudaEventRecord(eGenRays);
+    view.transVectors(midRays, midLocalRays);
+    view.transVectors(periphRays, periphLocalRays);
 
-	inferPipeline.run(colors, rays, view.t(), true);
+    cudaEventRecord(eGenRays);
 
-	cudaEventRecord(eInferred);
+    inferPipeline.run(colors, rays, view.t(), true);
 
-	// TODO Enhance
+    cudaEventRecord(eInferred);
 
-	cudaEventRecord(eEnhanced);
+    // TODO Enhance
 
-	CHECK_EX(cudaDeviceSynchronize());
+    cudaEventRecord(eEnhanced);
 
-	float timeTotal, timeGenRays, timeInfer, timeEnhance;
-	cudaEventElapsedTime(&timeTotal, eStart, eEnhanced);
-	cudaEventElapsedTime(&timeGenRays, eStart, eGenRays);
-	cudaEventElapsedTime(&timeInfer, eGenRays, eInferred);
-	cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
-	{
-		std::ostringstream sout;
-		sout << "Mid & Periph => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
-			 << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
-		Logger::instance.info(sout.str());
-	}
-	cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
-}
+    CHECK_EX(cudaDeviceSynchronize());
 
-static void error_callback(int error, const char *description)
-{
-	fprintf(stderr, "Error: %s\n", description);
+    float timeTotal, timeGenRays, timeInfer, timeEnhance;
+    cudaEventElapsedTime(&timeTotal, eStart, eEnhanced);
+    cudaEventElapsedTime(&timeGenRays, eStart, eGenRays);
+    cudaEventElapsedTime(&timeInfer, eGenRays, eInferred);
+    cudaEventElapsedTime(&timeEnhance, eInferred, eEnhanced);
+    {
+        std::ostringstream sout;
+        sout << "Mid & Periph => Total: " << timeTotal << "ms (Gen rays: " << timeGenRays
+             << "ms, Infer: " << timeInfer << "ms, Enhance: " << timeEnhance << "ms)";
+        Logger::instance.info(sout.str());
+    }
+    cudaMemcpy(o_imageData, colors->getBuffer(), colors->size(), cudaMemcpyDeviceToHost);
+}*/
+
+static void error_callback(int error, const char *description) {
+    fprintf(stderr, "Error: %s\n", description);
 }
 
-static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods)
-{
-	if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
-		glfwSetWindowShouldClose(window, GLFW_TRUE);
+static void key_callback(GLFWwindow *window, int key, int scancode, int action, int mods) {
+    if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
+        glfwSetWindowShouldClose(window, GLFW_TRUE);
 }
 
-GLFWwindow *initGl(uint windowWidth, uint windowHeight)
-{
-	glfwSetErrorCallback(error_callback);
-	if (!glfwInit())
-		return nullptr;
-	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
-	glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
-	//glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
-	//glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
-	/*glfwWindowHint(GLFW_DEPTH_BITS, 0);
-	glfwWindowHint(GLFW_STENCIL_BITS, 0);
-
-	glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE);
-
-	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
-	glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
-
-	glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
-	*/
-	GLFWwindow *window = glfwCreateWindow(windowWidth, windowHeight, "LearnOpenGL", NULL, NULL);
-	if (!window)
-	{
-		glfwTerminate();
-		return nullptr;
-	}
-	glfwSetKeyCallback(window, key_callback);
-	glfwMakeContextCurrent(window);
-	glfwSwapInterval(1);
-
-	/*if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
+GLFWwindow *initGl(uint windowWidth, uint windowHeight) {
+    glfwSetErrorCallback(error_callback);
+    if (!glfwInit())
+        return nullptr;
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
+    // glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    // glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
+    /*glfwWindowHint(GLFW_DEPTH_BITS, 0);
+    glfwWindowHint(GLFW_STENCIL_BITS, 0);
+
+    glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE);
+
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
+    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
+
+    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
+    */
+    GLFWwindow *window = glfwCreateWindow(windowWidth, windowHeight, "LearnOpenGL", NULL, NULL);
+    if (!window) {
+        glfwTerminate();
+        return nullptr;
+    }
+    glfwSetKeyCallback(window, key_callback);
+    glfwMakeContextCurrent(window);
+    glfwSwapInterval(1);
+
+    /*if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
     {
         std::cout << "Failed to initialize GLAD" << std::endl;
         return -1;
     }*/
 
-	glewInit();
-	glViewport(0, 0, windowWidth, windowHeight);
-	glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
+    glewInit();
+    glViewport(0, 0, windowWidth, windowHeight);
+    glClearColor(0.0f, 0.0f, 0.3f, 1.0f);
 
-	Logger::instance.info("OpenGL is initialized");
+    Logger::instance.info("OpenGL is initialized");
 
-	return window;
+    return window;
 }
 
-GLuint createGlTexture(uint width, uint height)
-{
-	GLuint textureID;
-	glEnable(GL_TEXTURE_2D);
-	glGenTextures(1, &textureID);
-	glBindTexture(GL_TEXTURE_2D, textureID);
-	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_FLOAT, nullptr);
-	glBindTexture(GL_TEXTURE_2D, 0);
-	glDisable(GL_TEXTURE_2D);
-	return textureID;
+GLuint createGlTexture(uint width, uint height) {
+    GLuint textureID;
+    glEnable(GL_TEXTURE_2D);
+    glGenTextures(1, &textureID);
+    glBindTexture(GL_TEXTURE_2D, textureID);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_FLOAT, nullptr);
+    glBindTexture(GL_TEXTURE_2D, 0);
+    glDisable(GL_TEXTURE_2D);
+    return textureID;
 }
 
-void checkCompileErrors(unsigned int shader, std::string type)
-{
-	int success;
-	char infoLog[1024];
-	if (type != "PROGRAM")
-	{
-		glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
-		if (!success)
-		{
-			glGetShaderInfoLog(shader, 1024, NULL, infoLog);
-			std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"
-					  << infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
-		}
-	}
-	else
-	{
-		glGetProgramiv(shader, GL_LINK_STATUS, &success);
-		if (!success)
-		{
-			glGetProgramInfoLog(shader, 1024, NULL, infoLog);
-			std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"
-					  << infoLog << "\n -- --------------------------------------------------- -- " << std::endl;
-		}
-	}
+void checkCompileErrors(unsigned int shader, std::string type) {
+    int success;
+    char infoLog[1024];
+    if (type != "PROGRAM") {
+        glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
+        if (!success) {
+            glGetShaderInfoLog(shader, 1024, NULL, infoLog);
+            std::cout << "ERROR::SHADER_COMPILATION_ERROR of type: " << type << "\n"
+                      << infoLog << "\n -- --------------------------------------------------- -- "
+                      << std::endl;
+        }
+    } else {
+        glGetProgramiv(shader, GL_LINK_STATUS, &success);
+        if (!success) {
+            glGetProgramInfoLog(shader, 1024, NULL, infoLog);
+            std::cout << "ERROR::PROGRAM_LINKING_ERROR of type: " << type << "\n"
+                      << infoLog << "\n -- --------------------------------------------------- -- "
+                      << std::endl;
+        }
+    }
 }
 
-GLuint loadShaderProgram()
-{
-	GLuint vertex_shader, fragment_shader, program;
-	vertex_shader = glCreateShader(GL_VERTEX_SHADER);
-	glShaderSource(vertex_shader, 1, &vertex_shader_text, NULL);
-	glCompileShader(vertex_shader);
-	checkCompileErrors(vertex_shader, "VERTEX");
-
-	fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
-	glShaderSource(fragment_shader, 1, &fragment_shader_text, NULL);
-	glCompileShader(fragment_shader);
-	checkCompileErrors(fragment_shader, "FRAGMENT");
-
-	program = glCreateProgram();
-	glAttachShader(program, vertex_shader);
-	glAttachShader(program, fragment_shader);
-	glLinkProgram(program);
-	checkCompileErrors(program, "PROGRAM");
-
-	Logger::instance.info("Shader program is loaded");
-	return program;
+GLuint loadShaderProgram() {
+    GLuint vertex_shader, fragment_shader, program;
+    vertex_shader = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(vertex_shader, 1, &vertex_shader_text, NULL);
+    glCompileShader(vertex_shader);
+    checkCompileErrors(vertex_shader, "VERTEX");
+
+    fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
+    glShaderSource(fragment_shader, 1, &fragment_shader_text, NULL);
+    glCompileShader(fragment_shader);
+    checkCompileErrors(fragment_shader, "FRAGMENT");
+
+    program = glCreateProgram();
+    glAttachShader(program, vertex_shader);
+    glAttachShader(program, fragment_shader);
+    glLinkProgram(program);
+    checkCompileErrors(program, "PROGRAM");
+
+    Logger::instance.info("Shader program is loaded");
+    return program;
 }
 
-int main(void)
-{
-	Logger::instance.logLevel = 3;
-
-	GLFWwindow *window;
-	GLuint vertex_buffer, program;
-	GLint mvp_location, vpos_location, vcol_location;
-
-	window = initGl(800, 800);
-
-	glGenBuffers(1, &vertex_buffer);
-	glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
-	glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
-
-	program = loadShaderProgram();
-	GLuint shaderProp_tex = glGetUniformLocation(program, "tex");
-	GLuint shaderProp_R = glGetUniformLocation(program, "R");
-	GLuint shaderProp_screenRes = glGetUniformLocation(program, "screenRes");
-	GLuint shaderProp_foveaCenter = glGetUniformLocation(program, "foveaCenter");
-
-	mvp_location = glGetUniformLocation(program, "MVP");
-	vpos_location = glGetAttribLocation(program, "vPos");
-	vcol_location = glGetAttribLocation(program, "vUV");
-
-	glEnableVertexAttribArray(vpos_location);
-	glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE,
-						  sizeof(vertices[0]), (void *)0);
-	glEnableVertexAttribArray(vcol_location);
-	glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE,
-						  sizeof(vertices[0]), (void *)(sizeof(float) * 2));
-
-	sptr<FoveaSynthesisPipeline> foveaSynthesisPipeline(
-		new FoveaSynthesisPipeline({128, 128}, 20, 32));
-	sptr<PeriphSynthesisPipeline> periphSynthesisPipeline(
-		new PeriphSynthesisPipeline({256, 256}, 45, {230, 256}, 110, 16));
-	View view({}, {});
-	auto glFoveaTex = foveaSynthesisPipeline->getGlResultTexture(0);
-	auto glMidTex = periphSynthesisPipeline->getGlResultTexture(0);
-	auto glPeriphTex = periphSynthesisPipeline->getGlResultTexture(1);
-
-	Logger::instance.info("Start main loop");
-
-	auto l = 1.428f;
-	glm::vec2 screenRes(1440.0f, 1600.0f);
-	glm::mat4 mvp = glm::ortho(-1.f, 1.f, -1.f, 1.f, 1.f, -1.f);
-
-	glEnable(GL_BLEND);
+int main(void) {
+    Logger::instance.logLevel = 3;
+
+    GLFWwindow *window;
+    GLuint vertex_buffer, program;
+    GLint mvp_location, vpos_location, vcol_location;
+
+    window = initGl(800, 800);
+
+    glGenBuffers(1, &vertex_buffer);
+    glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
+
+    program = loadShaderProgram();
+    GLuint shaderProp_tex = glGetUniformLocation(program, "tex");
+    GLuint shaderProp_R = glGetUniformLocation(program, "R");
+    GLuint shaderProp_screenRes = glGetUniformLocation(program, "screenRes");
+    GLuint shaderProp_foveaCenter = glGetUniformLocation(program, "foveaCenter");
+
+    mvp_location = glGetUniformLocation(program, "MVP");
+    vpos_location = glGetAttribLocation(program, "vPos");
+    vcol_location = glGetAttribLocation(program, "vUV");
+
+    glEnableVertexAttribArray(vpos_location);
+    glVertexAttribPointer(vpos_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]), (void *)0);
+    glEnableVertexAttribArray(vcol_location);
+    glVertexAttribPointer(vcol_location, 2, GL_FLOAT, GL_FALSE, sizeof(vertices[0]),
+                          (void *)(sizeof(float) * 2));
+
+    sptr<Msl> foveaNet(new Msl());
+    foveaNet->load("");
+    sptr<Msl> periphNet(new Msl());
+    periphNet->load("");
+    sptr<Camera> foveaCam(new Camera(20, {128, 128}, {256, 256}));
+    sptr<Camera> midCam(new Camera(45, {128, 128}, {256, 256}));
+    sptr<Camera> periphCam(new Camera(110, {115, 128}, {230, 256}));
+    uint nSamples = 64;
+    uint encodeDim = 6;
+    uint coordChns = 2;
+    glm::vec2 depthRange(1.0f, 7.0f);
+    sptr<SynthesisPipeline> synthesisPipelines[] = {
+        sptr<SynthesisPipeline>(new SynthesisPipeline(foveaNet, foveaCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 3.0f, 0.2f)),
+        sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, midCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 5.0f, 0.2f)),
+        sptr<SynthesisPipeline>(new SynthesisPipeline(periphNet, periphCam, nSamples, depthRange,
+                                                      encodeDim, coordChns, 5.0f, 0.2f)),
+    };
+    View view({}, {});
+    auto glFoveaTex = synthesisPipelines[0]->getGlResultTexture(0);
+    auto glMidTex = synthesisPipelines[1]->getGlResultTexture(0);
+    auto glPeriphTex = synthesisPipelines[2]->getGlResultTexture(0);
+
+    Logger::instance.info("Start main loop");
+
+    auto l = 1.428f;
+    glm::vec2 screenRes(1440.0f, 1600.0f);
+    glm::mat4 mvp = glm::ortho(-1.f, 1.f, -1.f, 1.f, 1.f, -1.f);
+
+    glEnable(GL_BLEND);
     glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
-	GLuint queries[1];
-	glGenQueries(1, queries);
+    GLuint queries[1];
+    glGenQueries(1, queries);
 
-	while (!glfwWindowShouldClose(window))
-	{
-		foveaSynthesisPipeline->run(view);
-		periphSynthesisPipeline->run(view);
+    while (!glfwWindowShouldClose(window)) {
+		for (int i = 0; i < 3; ++i)
+			synthesisPipelines[i]->run(view);
 
-		glClear(GL_COLOR_BUFFER_BIT);
+        glClear(GL_COLOR_BUFFER_BIT);
 
         // Start query 1
         glBeginQuery(GL_TIME_ELAPSED, queries[0]);
 
-		glUseProgram(program);
-		glUniformMatrix4fv(mvp_location, 1, GL_FALSE, (float *)&mvp[0][0]);
-		glUniform1i(shaderProp_tex, 0);
-		glEnable(GL_TEXTURE_2D);
-		glActiveTexture(GL_TEXTURE0);
+        glUseProgram(program);
+        glUniformMatrix4fv(mvp_location, 1, GL_FALSE, (float *)&mvp[0][0]);
+        glUniform1i(shaderProp_tex, 0);
+        glEnable(GL_TEXTURE_2D);
+        glActiveTexture(GL_TEXTURE0);
 
-		glUniform1f(shaderProp_R, 0.0f);
-		glUniform2f(shaderProp_screenRes, 1440, 1600);
-		glUniform2f(shaderProp_foveaCenter, 720, 800);
-		glBindTexture(GL_TEXTURE_2D, glPeriphTex);
-		glDrawArrays(GL_QUADS, 0, 4);
+        glUniform1f(shaderProp_R, 0.0f);
+        glUniform2f(shaderProp_screenRes, 1440, 1600);
+        glUniform2f(shaderProp_foveaCenter, 720, 800);
+        glBindTexture(GL_TEXTURE_2D, glPeriphTex);
+        glDrawArrays(GL_QUADS, 0, 4);
 
-		glUniform1f(shaderProp_R, screenRes.y * 0.5f * 0.414 / l);
-		glUniform2f(shaderProp_screenRes, 1440, 1600);
-		glUniform2f(shaderProp_foveaCenter, 720, 800);
-		glBindTexture(GL_TEXTURE_2D, glMidTex);
-		glDrawArrays(GL_QUADS, 0, 4);
+        glUniform1f(shaderProp_R, screenRes.y * 0.5f * 0.414 / l);
+        glUniform2f(shaderProp_screenRes, 1440, 1600);
+        glUniform2f(shaderProp_foveaCenter, 720, 800);
+        glBindTexture(GL_TEXTURE_2D, glMidTex);
+        glDrawArrays(GL_QUADS, 0, 4);
 
-		glUniform1f(shaderProp_R, screenRes.y * 0.5f * 0.176f / l);
-		glUniform2f(shaderProp_screenRes, 1440, 1600);
-		glUniform2f(shaderProp_foveaCenter, 720, 800);
-		glBindTexture(GL_TEXTURE_2D, glFoveaTex);
-		glDrawArrays(GL_QUADS, 0, 4);
+        glUniform1f(shaderProp_R, screenRes.y * 0.5f * 0.176f / l);
+        glUniform2f(shaderProp_screenRes, 1440, 1600);
+        glUniform2f(shaderProp_foveaCenter, 720, 800);
+        glBindTexture(GL_TEXTURE_2D, glFoveaTex);
+        glDrawArrays(GL_QUADS, 0, 4);
 
-		glDisable(GL_TEXTURE_2D);
+        glDisable(GL_TEXTURE_2D);
 
         glEndQuery(GL_TIME_ELAPSED);
 
         GLint available = 0;
-		while (!available)
+        while (!available)
             glGetQueryObjectiv(queries[0], GL_QUERY_RESULT_AVAILABLE, &available);
         // timer queries can contain more than 32 bits of data, so always
         // query them using the 64 bit types to avoid overflow
         GLuint64 timeElapsed = 0;
-		glGetQueryObjectui64v(queries[0], GL_QUERY_RESULT, &timeElapsed);
+        glGetQueryObjectui64v(queries[0], GL_QUERY_RESULT, &timeElapsed);
 
-		{
-			std::ostringstream sout;
-			sout << "Blending: " << timeElapsed / 10000 / 100.0f << "ms" << std::endl;
-			Logger::instance.info(sout.str());
-		}
+        {
+            std::ostringstream sout;
+            sout << "Blending: " << timeElapsed / 10000 / 100.0f << "ms" << std::endl;
+            Logger::instance.info(sout.str());
+        }
 
-		glfwSwapBuffers(window);
-		glfwPollEvents();
-	}
+        glfwSwapBuffers(window);
+        glfwPollEvents();
+    }
 
-	foveaSynthesisPipeline = nullptr;
-	periphSynthesisPipeline = nullptr;
+	for (int i = 0; i < 3; ++i)
+		synthesisPipelines[i] = nullptr;
 
-	glfwDestroyWindow(window);
+    glfwDestroyWindow(window);
 
-	glfwTerminate();
-	exit(EXIT_SUCCESS);
+    glfwTerminate();
+    exit(EXIT_SUCCESS);
 }
diff --git a/cpp/msl_infer/Formatter.h b/cpp/utils/Formatter.h
similarity index 100%
rename from cpp/msl_infer/Formatter.h
rename to cpp/utils/Formatter.h
diff --git a/cpp/msl_infer/Logger.cpp b/cpp/utils/Logger.cpp
similarity index 100%
rename from cpp/msl_infer/Logger.cpp
rename to cpp/utils/Logger.cpp
diff --git a/cpp/msl_infer/Logger.h b/cpp/utils/Logger.h
similarity index 95%
rename from cpp/msl_infer/Logger.h
rename to cpp/utils/Logger.h
index e02bbef..d89723f 100644
--- a/cpp/msl_infer/Logger.h
+++ b/cpp/utils/Logger.h
@@ -39,7 +39,7 @@ public:
 		return false;
 	}
 
-	virtual void log(nv::ILogger::Severity severity, const char* msg) override {
+	virtual void log(nv::ILogger::Severity severity, const char* msg) noexcept {
 		if ((int)severity > logLevel)
 			return;
 		if (externalLogFunc == nullptr) {
diff --git a/cpp/utils/Resource.h b/cpp/utils/Resource.h
new file mode 100644
index 0000000..f7f5f0e
--- /dev/null
+++ b/cpp/utils/Resource.h
@@ -0,0 +1,136 @@
+#pragma once
+#include <map>
+#include <vector>
+
+class Resource {
+  public:
+    virtual ~Resource() {}
+
+    virtual void *getBuffer() const = 0;
+
+    virtual size_t size() const = 0;
+};
+
+class CudaBuffer : public Resource {
+  public:
+    CudaBuffer(void *buffer = nullptr, size_t size = 0)
+        : _buffer(buffer), _ownBuffer(false), _size(size) {}
+    CudaBuffer(size_t size) : _buffer(nullptr), _ownBuffer(true), _size(size) {
+        CHECK_EX(cudaMalloc(&_buffer, size));
+    }
+    CudaBuffer(const CudaBuffer &rhs) = delete;
+
+    virtual ~CudaBuffer() {
+        if (!_ownBuffer || _buffer == nullptr)
+            return;
+        try {
+            CHECK_EX(cudaFree(_buffer));
+        } catch (std::exception &ex) {
+            Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
+        }
+        _buffer = nullptr;
+        _ownBuffer = false;
+    }
+
+    virtual void *getBuffer() const { return _buffer; }
+    template <class T> T *getBuffer() const { return (T *)getBuffer(); }
+
+    virtual size_t size() const { return _size; }
+
+  private:
+    void *_buffer;
+    bool _ownBuffer;
+    size_t _size;
+};
+
+template <typename T> class CudaArray : public CudaBuffer {
+  public:
+    CudaArray(size_t n) : CudaBuffer(n * sizeof(T)) {}
+    CudaArray(T *buffer, size_t n) : CudaBuffer(buffer, n * sizeof(T)) {}
+    CudaArray(const CudaArray<T> &rhs) = delete;
+
+    size_t n() const { return size() / sizeof(T); }
+
+    operator T *() { return (T *)getBuffer(); }
+};
+
+class GraphicsResource : public Resource {
+  public:
+    cudaGraphicsResource_t getHandler() { return _res; }
+
+    virtual ~GraphicsResource() {
+        if (_res == nullptr)
+            return;
+        try {
+            CHECK_EX(cudaGraphicsUnregisterResource(_res));
+        } catch (std::exception &ex) {
+            Logger::instance.warning(std::string("Exception raised in destructor: ") + ex.what());
+        }
+        _res = nullptr;
+    }
+
+    virtual size_t size() const { return _size; }
+
+  protected:
+    cudaGraphicsResource_t _res;
+    size_t _size;
+
+    GraphicsResource() : _res(nullptr), _size(0) {}
+};
+
+template <typename T> class GlTextureResource : public GraphicsResource {
+  public:
+    GlTextureResource(GLuint textureID, glm::uvec2 textureSize) {
+        CHECK_EX(cudaGraphicsGLRegisterImage(&_res, textureID, GL_TEXTURE_2D,
+                                             cudaGraphicsRegisterFlagsWriteDiscard));
+        _size = textureSize.x * textureSize.y * sizeof(T);
+        _textureSize = textureSize;
+    }
+
+    virtual ~GlTextureResource() { cudaGraphicsUnmapResources(1, &_res, 0); }
+
+    virtual void *getBuffer() const {
+        cudaArray_t buffer;
+        try {
+            CHECK_EX(cudaGraphicsSubResourceGetMappedArray(&buffer, _res, 0, 0));
+        } catch (...) {
+            return nullptr;
+        }
+        return buffer;
+    }
+
+    operator T *() { return (T *)getBuffer(); }
+
+    glm::uvec2 textureSize() { return _textureSize; }
+
+  private:
+    glm::uvec2 _textureSize;
+};
+
+class Resources {
+  public:
+    std::map<std::string, Resource *> resources;
+    std::vector<cudaGraphicsResource_t> graphicsResources;
+
+    void addResource(const std::string &name, Resource *res) {
+        auto gres = dynamic_cast<GraphicsResource *>(res);
+        if (gres != nullptr)
+            graphicsResources.push_back(gres->getHandler());
+        resources[name] = res;
+    }
+
+    void clear() {
+        resources.clear();
+        graphicsResources.clear();
+    }
+};
+
+template <typename T>
+void dumpFloatArray(std::ostream &so, CudaArray<T> &arr, size_t maxDumpRows = 0,
+                    size_t elemsPerRow = 1) {
+    T *hostArr = new T[arr.n()];
+    cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost);
+    dumpHostBuffer<float>(so, hostArr, arr.n() * sizeof(T), sizeof(T) / sizeof(float) * elemsPerRow,
+                          maxDumpRows);
+    delete[] hostArr;
+}
\ No newline at end of file
diff --git a/cpp/utils/common.h b/cpp/utils/common.h
new file mode 100644
index 0000000..3065d6d
--- /dev/null
+++ b/cpp/utils/common.h
@@ -0,0 +1,138 @@
+#pragma once
+#include <memory>
+#include <stdexcept>
+#include <vector>
+#include <string>
+#include <sstream>
+#include <GL/glew.h>
+#include <cuda_gl_interop.h>
+#include "../glm/glm.hpp"
+#include "Logger.h"
+
+inline unsigned int getElementSize(nv::DataType t) {
+    switch (t) {
+    case nv::DataType::kINT32:
+        return 4;
+    case nv::DataType::kFLOAT:
+        return 4;
+    case nv::DataType::kHALF:
+        return 2;
+    case nv::DataType::kBOOL:
+    case nv::DataType::kINT8:
+        return 1;
+    }
+    throw std::runtime_error("Invalid DataType.");
+    return 0;
+}
+
+template <typename T> void dumpRow(std::ostream &os, T *buf, size_t n) {
+    os << buf[0];
+    for (size_t i = 1; i < n; ++i) {
+        os << " " << buf[i];
+    }
+    os << std::endl;
+}
+
+template <typename T>
+void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount,
+                    size_t maxDumpRows = 0) {
+    T *typedBuf = static_cast<T *>(buf);
+    size_t numItems = bufSize / sizeof(T);
+    size_t nInLastRow = numItems % rowCount;
+    size_t rows;
+    if (nInLastRow == 0) {
+        rows = numItems / rowCount;
+        nInLastRow = rowCount;
+    } else {
+        rows = numItems / rowCount + 1;
+    }
+    if (maxDumpRows == 0) {
+        for (size_t i = 0; i < rows - 1; ++i) {
+            dumpRow(os, typedBuf, rowCount);
+            typedBuf += rowCount;
+        }
+        dumpRow(os, typedBuf, nInLastRow);
+    } else {
+        for (size_t i = 0; i < maxDumpRows / 2; ++i)
+            dumpRow(os, typedBuf + i * rowCount, rowCount);
+        os << "..." << std::endl;
+        for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
+            dumpRow(os, typedBuf + i * rowCount, rowCount);
+        dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
+    }
+}
+
+class CudaStream {
+public:
+    CudaStream() { cudaStreamCreate(&stream); }
+
+    operator cudaStream_t() { return stream; }
+
+    virtual ~CudaStream() { cudaStreamDestroy(stream); }
+
+private:
+    cudaStream_t stream;
+};
+
+class CudaEvent {
+public:
+    CudaEvent() { cudaEventCreate(&mEvent); }
+
+    operator cudaEvent_t() { return mEvent; }
+
+    virtual ~CudaEvent() { cudaEventDestroy(mEvent); }
+
+private:
+    cudaEvent_t mEvent;
+};
+
+struct CudaMapScope {
+    std::vector<cudaGraphicsResource_t> resources_;
+    cudaStream_t stream_;
+
+    CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
+                 cudaStream_t stream = nullptr)
+        : resources_(resources), stream_(stream) {}
+
+    ~CudaMapScope() {
+        if (!resources_.empty())
+            cudaGraphicsUnmapResources(resources_.size(), resources_.data(), stream_);
+    }
+
+    cudaError_t map() {
+        if (!resources_.empty())
+            return cudaGraphicsMapResources(resources_.size(), resources_.data(), stream_);
+        return cudaSuccess;
+    }
+};
+
+template <typename T> struct Destroy {
+    void operator()(T *t) {
+        if (t != nullptr)
+            t->destroy();
+    }
+};
+
+class Range {
+public:
+    Range(glm::vec2 bound, uint steps)
+        : _start(bound.x), _step((bound.y - bound.x) / (steps - 1)), _steps(steps) {}
+
+    __host__ __device__ float get(uint i) { return _start + i * _step; }
+    __host__ __device__ float start() { return _start; }
+    __host__ __device__ float stop() { return _start + _step * _steps; }
+    __host__ __device__ float steps() { return _steps; }
+
+private:
+    float _start;
+    float _step;
+    uint _steps;
+};
+
+template <class T> using uptr = std::unique_ptr<T, ::Destroy<T>>;
+template <class T> using sptr = std::shared_ptr<T>;
+
+#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)
+
+#include "Resource.h"
+#include "Formatter.h"
\ No newline at end of file
diff --git a/cpp/utils/cuda.h b/cpp/utils/cuda.h
new file mode 100644
index 0000000..b394ee7
--- /dev/null
+++ b/cpp/utils/cuda.h
@@ -0,0 +1,11 @@
+#include "thread_index.h"
+
+#ifdef __INTELLISENSE__
+#define CU_INVOKE(__func__) __func__
+#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__
+#else
+#define CU_INVOKE(__func__) __func__<<<grdSize, blkSize>>>
+#define CU_INVOKE1(__func__, __grdSize__, __blkSize__) __func__<<<__grdSize__, __blkSize__>>>
+#endif
+
+inline uint ceilDiv(uint a, uint b) { return (uint)ceil(a / (float)b); }
\ No newline at end of file
diff --git a/cpp/msl_infer/half.h b/cpp/utils/half.h
similarity index 100%
rename from cpp/msl_infer/half.h
rename to cpp/utils/half.h
diff --git a/cpp/msl_infer/thread_index.h b/cpp/utils/thread_index.h
similarity index 100%
rename from cpp/msl_infer/thread_index.h
rename to cpp/utils/thread_index.h
diff --git a/data/spherical_view_syn.py b/data/spherical_view_syn.py
index 2445ad3..e52d758 100644
--- a/data/spherical_view_syn.py
+++ b/data/spherical_view_syn.py
@@ -27,7 +27,7 @@ class SphericalViewSynDataset(object):
 
     def __init__(self, dataset_desc_path: str, load_images: bool = True,
                  load_depths: bool = False, load_bins: bool = False, c: int = color.RGB,
-                 calculate_rays: bool = True, res: Tuple[int, int] = None):
+                 calculate_rays: bool = True, res: Tuple[int, int] = None, load_views=None):
         """
         Initialize data loader for spherical view synthesis task
 
@@ -52,7 +52,7 @@ class SphericalViewSynDataset(object):
         self.load_bins = load_bins
 
         # Load dataset description file
-        self._load_desc(dataset_desc_path, res)
+        self._load_desc(dataset_desc_path, res, load_views)
 
         # Load view images
         if self.load_images:
@@ -98,7 +98,7 @@ class SphericalViewSynDataset(object):
         disp_val = (1 - input[..., 0, :, :]) * (disp_range[1] - disp_range[0]) + disp_range[0]
         return torch.reciprocal(disp_val)
 
-    def _load_desc(self, path, res=None):
+    def _load_desc(self, path, res=None, load_views=None):
         with open(path, 'r', encoding='utf-8') as file:
             data_desc = json.loads(file.read())
         if not data_desc.get('view_file_pattern'):
@@ -127,11 +127,17 @@ class SphericalViewSynDataset(object):
             [view.euler_to_matrix([rot[1], rot[0], 0]) for rot in data_desc['view_rots']]
             if len(data_desc['view_rots'][0]) == 2 else data_desc['view_rots'],
             device=device.default()).view(-1, 3, 3)  # (N, 3, 3)
-        #self.view_centers = self.view_centers[:6]
-        #self.view_rots = self.view_rots[:6]
+        self.view_idxs = torch.tensor(
+            data_desc['views'] if 'views' in data_desc else list(range(self.view_centers.size(0))),
+            device=device.default())
+
+        if load_views is not None:
+            self.view_centers = self.view_centers[load_views]
+            self.view_rots = self.view_rots[load_views]
+            self.view_idxs = self.view_idxs[load_views]
+        
         self.n_views = self.view_centers.size(0)
         self.n_pixels = self.n_views * self.view_res[0] * self.view_res[1]
-        self.view_idxs = data_desc['views'][:self.n_views] if 'views' in data_desc else range(self.n_views)
 
         if 'gl_coord' in data_desc and data_desc['gl_coord'] == True:
             print('Convert from OGL coordinate to DX coordinate (i. e. flip z axis)')
diff --git a/notebook/gen_demo_mono.ipynb b/notebook/gen_demo_mono.ipynb
index e1bde59..67753ad 100644
--- a/notebook/gen_demo_mono.ipynb
+++ b/notebook/gen_demo_mono.ipynb
@@ -70,6 +70,10 @@
     "    plt.subplot(133)\n",
     "    img.plot(images['layers_img'][2])\n",
     "    plt.figure(figsize=(12, 12))\n",
+    "    img.plot(images['overlaid'])\n",
+    "    plt.figure(figsize=(12, 12))\n",
+    "    img.plot(images['blended_raw'])\n",
+    "    plt.figure(figsize=(12, 12))\n",
     "    img.plot(images['blended'])\n",
     "\n",
     "\n",
@@ -87,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -108,12 +112,12 @@
     "fovea_net = load_net(find_file('fovea'))\n",
     "periph_net = load_net(find_file('periph'))\n",
     "renderer = FoveatedNeuralRenderer(fov_list, res_list, nn.ModuleList([fovea_net, periph_net, periph_net]),\n",
-    "                                  res_full, using_mask=False, device=device.default())\n"
+    "                                  res_full, device=device.default())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,13 +133,14 @@
     "    ],\n",
     "    'barbershop': [\n",
     "        [0, 0, 0,   0, 0,   0, 0],\n",
-    "        #[0, 0, 0, 20, 0, -300, 50],\n",
-    "        #[0, 0, 0, -140, -30, 150, -250],\n",
-    "        #[0, 0, 0, -60, -30, 75, -125],\n",
+    "        [0, 0, 0, 20, 0, -300, 50],\n",
+    "        [0, 0, 0, -140, -30, 150, -250],\n",
+    "        [0, 0, 0, -60, -30, 75, -125],\n",
+    "        [0, 0, 0,   -10, -5,   0, 0]\n",
     "    ],\n",
     "    'lobby': [\n",
-    "        #[0, 0, 0, 0, 0, 75, 0],\n",
-    "        #[0, 0, 0, 0, 0, 5, 150],\n",
+    "        [0, 0, 0, 0, 0, 75, 0],\n",
+    "        [0, 0, 0, 0, 0, 5, 150],\n",
     "        [0, 0, 0, -120, 0, 75, 50],\n",
     "    ]\n",
     "}\n",
@@ -143,14 +148,17 @@
     "for i, param in enumerate(params[scene]):\n",
     "    view = Trans(torch.tensor(param[:3], device=device.default()),\n",
     "                 torch.tensor(euler_to_matrix([-param[4], param[3], 0]), device=device.default()).view(3, 3))\n",
-    "    images = renderer(view, param[-2:])\n",
-    "    if False:\n",
+    "    images = renderer(view, param[-2:], using_mask=False, ret_raw=True)\n",
+    "    images['overlaid'] = renderer.foveation.synthesis(images['layers_raw'], param[-2:], do_blend=False)\n",
+    "    if True:\n",
     "        outputdir = '../__demo/mono/'\n",
     "        misc.create_dir(outputdir)\n",
     "        img.save(images['layers_img'][0], f'{outputdir}{scene}_{i}_fovea.png')\n",
     "        img.save(images['layers_img'][1], f'{outputdir}{scene}_{i}_mid.png')\n",
     "        img.save(images['layers_img'][2], f'{outputdir}{scene}_{i}_periph.png')\n",
     "        img.save(images['blended'], f'{outputdir}{scene}_{i}_blended.png')\n",
+    "        img.save(images['overlaid'], f'{outputdir}{scene}_{i}_overlaid.png')\n",
+    "        img.save(images['blended_raw'], f'{outputdir}{scene}_{i}_blended_raw.png')\n",
     "    else:\n",
     "        images = plot_images(images)\n"
    ]
@@ -212,8 +220,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.5 64-bit ('base': conda)",
-   "name": "python385jvsc74a57bd082066b63b621a9e3d15e3b7c11ca76da6238eff3834294910d715044bd0561e5"
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -231,9 +240,8 @@
    "interpreter": {
     "hash": "82066b63b621a9e3d15e3b7c11ca76da6238eff3834294910d715044bd0561e5"
    }
-  },
-  "orig_nbformat": 2
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
\ No newline at end of file
+}
diff --git a/notebook/gen_demo_stereo.ipynb b/notebook/gen_demo_stereo.ipynb
index a5d6c21..68aab2f 100644
--- a/notebook/gen_demo_stereo.ipynb
+++ b/notebook/gen_demo_stereo.ipynb
@@ -2,37 +2,44 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Set CUDA:0 as current device.\n"
+     ]
+    }
+   ],
    "source": [
     "import sys\n",
     "import os\n",
     "import torch\n",
+    "import torch.nn as nn\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "rootdir = os.path.abspath(sys.path[0] + '/../')\n",
     "sys.path.append(rootdir)\n",
     "\n",
-    "torch.cuda.set_device(2)\n",
+    "torch.cuda.set_device(0)\n",
     "print(\"Set CUDA:%d as current device.\" % torch.cuda.current_device())\n",
     "torch.autograd.set_grad_enabled(False)\n",
     "\n",
     "from data.spherical_view_syn import *\n",
     "from configs.spherical_view_syn import SphericalViewSynConfig\n",
     "from utils import netio\n",
-    "from utils import misc\n",
     "from utils import img\n",
     "from utils import device\n",
-    "from utils import view\n",
+    "from utils.view import *\n",
     "from components.fnr import FoveatedNeuralRenderer\n",
     "\n",
     "\n",
     "def load_net(path):\n",
     "    config = SphericalViewSynConfig()\n",
-    "    config.from_id(path[:-4])\n",
+    "    config.from_id(os.path.splitext(path)[0])\n",
     "    config.SAMPLE_PARAMS['perturb_sample'] = False\n",
-    "    # config.print()\n",
     "    net = config.create_net().to(device.default())\n",
     "    netio.load(path, net)\n",
     "    return net\n",
@@ -45,14 +52,14 @@
     "    return None\n",
     "\n",
     "\n",
-    "def load_views(data_desc_file) -> view.Trans:\n",
+    "def load_views(data_desc_file) -> Trans:\n",
     "    with open(data_desc_file, 'r', encoding='utf-8') as file:\n",
     "        data_desc = json.loads(file.read())\n",
     "        view_centers = torch.tensor(\n",
     "            data_desc['view_centers'], device=device.default()).view(-1, 3)\n",
     "        view_rots = torch.tensor(\n",
     "            data_desc['view_rots'], device=device.default()).view(-1, 3, 3)\n",
-    "        return view.Trans(view_centers, view_rots)\n",
+    "        return Trans(view_centers, view_rots)\n",
     "\n",
     "\n",
     "def plot_cross(center, res):\n",
@@ -78,115 +85,120 @@
     "        color=[0, 1, 0])\n",
     "\n",
     "\n",
-    "def plot_fovea(left_images, right_images, left_center, right_center):\n",
-    "    plt.figure(figsize=(8, 4))\n",
+    "def plot_figures(left_images, right_images, left_center, right_center):\n",
+    "    # Plot Fovea\n",
+    "    plt.figure(figsize=(12, 6))\n",
     "    plt.subplot(121)\n",
-    "    img.plot(left_images['fovea'])\n",
-    "    fovea_res = left_images['fovea'].size()[-2:]\n",
+    "    img.plot(left_images['layers_img'][0])\n",
+    "    fovea_res = left_images['layers_img'][0].size()[-2:]\n",
     "    plot_cross((0, 0), fovea_res)\n",
     "    plt.subplot(122)\n",
-    "    img.plot(right_images['fovea'])\n",
+    "    img.plot(right_images['layers_img'][0])\n",
     "    plot_cross((0, 0), fovea_res)\n",
     "\n",
+    "    # Plot Mid\n",
+    "    plt.figure(figsize=(12, 6))\n",
+    "    plt.subplot(121)\n",
+    "    img.plot(left_images['layers_img'][1])\n",
+    "    plt.subplot(122)\n",
+    "    img.plot(right_images['layers_img'][1])\n",
+    "\n",
+    "    # Plot Periph\n",
+    "    plt.figure(figsize=(12, 6))\n",
+    "    plt.subplot(121)\n",
+    "    img.plot(left_images['layers_img'][2])\n",
+    "    plt.subplot(122)\n",
+    "    img.plot(right_images['layers_img'][2])\n",
+    "\n",
+    "    # Plot Blended\n",
+    "    plt.figure(figsize=(12, 6))\n",
+    "    plt.subplot(121)\n",
+    "    img.plot(left_images['blended'])\n",
+    "    full_res = left_images['blended'].size()[-2:]\n",
+    "    plot_cross(left_center, full_res)\n",
+    "    plt.subplot(122)\n",
+    "    img.plot(right_images['blended'])\n",
+    "    plot_cross(right_center, full_res)\n",
+    "\n",
     "\n",
     "scenes = {\n",
-    "    'gas': '__0_user_study/us_gas_all_in_one',\n",
-    "    'mc': '__0_user_study/us_mc_all_in_one',\n",
-    "    'bedroom': 'bedroom_all_in_one',\n",
-    "    'gallery': 'gallery_all_in_one',\n",
-    "    'lobby': 'lobby_all_in_one'\n",
+    "    'classroom': 'classroom_all',\n",
+    "    'stones': 'stones_all',\n",
+    "    'barbershop': 'barbershop_all',\n",
+    "    'lobby': 'lobby_all'\n",
     "}\n",
     "\n",
+    "\n",
     "fov_list = [20, 45, 110]\n",
-    "res_list = [(128, 128), (256, 256), (256, 230)]\n",
+    "res_list = [(256, 256), (256, 256), (256, 230)]\n",
     "res_full = (1600, 1440)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Change working directory to  /home/dengnc/dvs/data/__new/classroom_all\n",
+      "Load net from fovea@snerffast4-rgb_e6_fc256x8_d1.00-7.00_s64_~p.pth ...\n",
+      "Load net from periph@snerffast4-rgb_e6_fc128x4_d1.00-7.00_s64_~p.pth ...\n"
+     ]
+    }
+   ],
    "source": [
-    "centers = {\n",
-    "    'gas': [\n",
-    "        [(3.5, 0), (-3.5, 0)],\n",
-    "        [(1.5, 0), (-1.5, 0)]\n",
-    "    ],\n",
-    "    'mc': [\n",
-    "        [(2, 0), (-2, 0)],\n",
-    "        [(2, 0), (-2, 0)]\n",
+    "params = {\n",
+    "    'classroom': [\n",
+    "        [(0, 0, 0, 0, 0), (1, -83), (-5, -83)],\n",
+    "        [(0, 0, 0, 0, 0), (-171, 55), (-178, 55)],\n",
+    "        [(0, 0, 0, 0, 0), (60, 55), (55, 55)],\n",
+    "        [(0, 0, 0, 0, 0), (138, 160), (130, 160)]\n",
     "    ],\n",
-    "    'bedroom': [\n",
-    "        [(5, 0), (-5, 0)],\n",
-    "        [(6, 0), (-6, 0)],\n",
-    "        [(5, 0), (-5, 0)]\n",
-    "    ],\n",
-    "    'gallery': [\n",
-    "        [(2.5, 0), (-2.5, 0)],\n",
-    "        [(11.5, 0), (-11.5, 0)]\n",
-    "    ]\n",
     "}\n",
-    "scene = 'bedroom'\n",
-    "os.chdir(os.path.join(rootdir, f'data/{scenes[scene]}'))\n",
+    "scene = 'classroom'\n",
+    "os.chdir(f'{rootdir}/data/__new/{scenes[scene]}')\n",
     "print('Change working directory to ', os.getcwd())\n",
     "\n",
     "fovea_net = load_net(find_file('fovea'))\n",
     "periph_net = load_net(find_file('periph'))\n",
-    "\n",
-    "# Load Dataset\n",
-    "views = load_views('demo.json')\n",
-    "print('Dataset loaded.')\n",
-    "print('views:', views.size())\n",
-    "gen = GenFinal(fov_list, res_list, res_full, fovea_net, periph_net,\n",
-    "               device=device.default())\n",
-    "\n",
-    "for view_idx in range(views.size()[0]):\n",
-    "    test_view = views.get(view_idx)\n",
-    "    left_images = gen(centers[scene][view_idx][0], view.Trans(\n",
-    "        test_view.trans_point(\n",
-    "            torch.tensor([-0.03, 0, 0], device=device.default())\n",
-    "        ), test_view.r), mono_trans=test_view)\n",
-    "    right_images = gen(centers[scene][view_idx][1], view.Trans(\n",
-    "        test_view.trans_point(\n",
-    "            torch.tensor([0.03, 0, 0], device=device.default())\n",
-    "        ), test_view.r), mono_trans=test_view)\n",
-    "    #plot_fovea(left_images, right_images, centers[scene][view_idx][0],\n",
-    "    #           centers[scene][view_idx][1])\n",
-    "    outputdir = '../__2_demo/mono_periph/stereo/'\n",
-    "    misc.create_dir(outputdir)\n",
-    "    # for key in images:\n",
-    "    key = 'blended'\n",
-    "    img.save(left_images[key], '%s%s_view%04d_%s_l.png' % (outputdir, scene, view_idx, key))\n",
-    "    img.save(right_images[key], '%s%s_view%04d_%s_r.png' % (outputdir, scene, view_idx, key))\n",
-    "    stereo_overlap = torch.cat([left_images['blended'][:, 0:1], right_images['blended'][:, 1:3]], dim=1)\n",
-    "    img.save(stereo_overlap, '%s%s_view%04d_%s_stereo.png' % (outputdir, scene, view_idx, key))\n",
-    "\n",
-    "    left_images = gen(centers[scene][view_idx][0], view.Trans(\n",
-    "        test_view.trans_point(\n",
-    "            torch.tensor([-0.03, 0, 0], device=device.default())\n",
-    "        ), test_view.r))\n",
-    "    right_images = gen(centers[scene][view_idx][1], view.Trans(\n",
-    "        test_view.trans_point(\n",
-    "            torch.tensor([0.03, 0, 0], device=device.default())\n",
-    "        ), test_view.r))\n",
-    "    #plot_fovea(left_images, right_images, centers[scene][view_idx][0],\n",
-    "    #           centers[scene][view_idx][1])\n",
-    "    outputdir = '../__2_demo/stereo/'\n",
-    "    misc.create_dir(outputdir)\n",
-    "    # for key in images:\n",
-    "    key = 'blended'\n",
-    "    img.save(left_images[key], '%s%s_view%04d_%s_l.png' % (outputdir, scene, view_idx, key))\n",
-    "    img.save(right_images[key], '%s%s_view%04d_%s_r.png' % (outputdir, scene, view_idx, key))\n",
-    "    stereo_overlap = torch.cat([left_images['blended'][:, 0:1], right_images['blended'][:, 1:3]], dim=1)\n",
-    "    img.save(stereo_overlap, '%s%s_view%04d_%s_stereo.png' % (outputdir, scene, view_idx, key))\n"
+    "renderer = FoveatedNeuralRenderer(fov_list, res_list,\n",
+    "                                  nn.ModuleList([fovea_net, periph_net, periph_net]),\n",
+    "                                  res_full, device=device.default())\n",
+    "\n",
+    "for i, param in enumerate(params[scene]):\n",
+    "    view = Trans(torch.tensor(param[0][:3], device=device.default()),\n",
+    "                 torch.tensor(euler_to_matrix([-param[0][4], param[0][3], 0]),\n",
+    "                              device=device.default()).view(3, 3))\n",
+    "    eye_offset = torch.tensor([0.03, 0, 0], device=device.default())\n",
+    "    left_view = Trans(view.trans_point(-eye_offset), view.r)\n",
+    "    right_view = Trans(view.trans_point(eye_offset), view.r)\n",
+    "    left_images, right_images = renderer(view, param[1], param[2],\n",
+    "                                         stereo_disparity=0.06, using_mask=False, ret_raw=False)\n",
+    "    if True:\n",
+    "        outputdir = '../__demo/stereo/'\n",
+    "        misc.create_dir(outputdir)\n",
+    "        img.save(left_images['blended'], '%s%s_%d_l.png' % (outputdir, scene, i))\n",
+    "        img.save(right_images['blended'], '%s%s_%d_r.png' % (outputdir, scene, i))\n",
+    "        stereo_overlap = torch.cat([\n",
+    "            left_images['blended'][:, 0:1],\n",
+    "            right_images['blended'][:, 1:3]\n",
+    "        ], dim=1)\n",
+    "        img.save(stereo_overlap, '%s%s_%d_stereo.png' % (outputdir, scene, i))\n",
+    "    else:\n",
+    "        plot_figures(left_images, right_images, param[1], param[2])\n"
    ]
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "82066b63b621a9e3d15e3b7c11ca76da6238eff3834294910d715044bd0561e5"
+  },
   "kernelspec": {
-   "display_name": "Python 3.7.9 64-bit ('pytorch': conda)",
-   "name": "python379jvsc74a57bd0660ca2a75467d3af74a68fcc6f40bc78ab96b99ff17d2f100b5ca821fbb183f2"
+   "display_name": "Python 3.8.5 64-bit ('base': conda)",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -198,7 +210,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.8.5"
   },
   "orig_nbformat": 2
  },
diff --git a/notebook/test_foveation.ipynb b/notebook/test_foveation.ipynb
index e0ba19e..77417ad 100644
--- a/notebook/test_foveation.ipynb
+++ b/notebook/test_foveation.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -10,14 +10,15 @@
      "output_type": "stream",
      "text": [
       "Set CUDA:0 as current device.\n",
-      "14496(22.12%) pixels in layer 0 are masked as skipped\n",
-      "15980(24.38%) pixels in layer 1 are masked as skipped\n"
+      "Layer 0: 14106(21.52%) pixels are masked as skipped, 51430 pixels need to be inferred\n",
+      "Layer 1: 15586(23.78%) pixels are masked as skipped, 49950 pixels need to be inferred\n",
+      "Layer 2: 700(1.19%) pixels are masked as skipped, 58180 pixels need to be inferred\n"
      ]
     },
     {
      "data": {
-      "image/png": "\n",
-      "text/plain": "<Figure size 432x288 with 2 Axes>"
+      "image/png": "\n",
+      "text/plain": "<Figure size 864x288 with 3 Axes>"
      },
      "metadata": {
       "needs_background": "light"
@@ -45,20 +46,27 @@
     "\n",
     "\n",
     "foveation = Foveation([20, 45, 110], [(256, 256), (256, 256), (256, 230)], (1600, 1440))\n",
-    "layers_mask = foveation.get_layers_mask()\n",
-    "plt.figure()\n",
+    "layers_mask = foveation.get_layers_mask((0, 0))\n",
+    "plt.figure(figsize=(12, 4))\n",
     "for i, mask in enumerate(layers_mask):\n",
     "    colored_mask = torch.zeros(mask.size(0), mask.size(1), 3, device=mask.device)\n",
-    "    c = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], device=mask.device)\n",
+    "    c = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 1, 1]], device=mask.device)\n",
     "    for bi in range(3):\n",
     "        region = torch.logical_and(mask > bi, mask < bi + 1)\n",
-    "        colored_mask[region] = c[bi] * (mask[region][..., None] - bi)\n",
+    "        colored_mask[region] = c[bi] + (c[-1] - c[bi]) * (mask[region][..., None] - bi)\n",
     "    plt.subplot(1, len(layers_mask), i + 1)\n",
     "    img.plot(colored_mask)\n",
     "    n_skipped = torch.sum(mask < 0)\n",
     "    n_tot = len(mask.flatten())\n",
-    "    print (f\"{n_skipped}({n_skipped / n_tot * 100:.2f}%) pixels in layer {i} are masked as skipped\")"
+    "    print (f\"Layer {i}: {n_skipped}({n_skipped / n_tot * 100:.2f}%) pixels are masked as skipped, {n_tot - n_skipped} pixels need to be inferred\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/notebook/test_spherical_view_syn.ipynb b/notebook/test_spherical_view_syn.ipynb
index d4e81cf..3046052 100644
--- a/notebook/test_spherical_view_syn.ipynb
+++ b/notebook/test_spherical_view_syn.ipynb
@@ -2,17 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Set CUDA:0 as current device.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import sys\n",
     "import os\n",
@@ -129,27 +121,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0 torch.Size([4, 3, 400, 400]) torch.Size([4, 400, 400, 3]) torch.Size([4, 400, 400, 3])\n"
-     ]
-    },
-    {
-     "data": {
-      "image/png": "\n",
-      "text/plain": "<Figure size 864x468 with 4 Axes>"
-     },
-     "metadata": {
-      "needs_background": "light"
-     },
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from data.spherical_view_syn import SphericalViewSynDataset\n",
     "from data.loader import FastDataLoader\n",
@@ -194,22 +168,23 @@
     "#DATA_DESC_FILE = f'{rootdir}/data/lobby_fovea_2021.01.18/train.json'\n",
     "#DATA_DESC_FILE = f'{rootdir}/data/__new/street_fovea_r360x80_t1.0/train1.json'\n",
     "#DATA_DESC_FILE = f'{rootdir}/data/__new/stones_fovea_r360x80_t1.0/train1.json'\n",
-    "DATA_DESC_FILE = f'{rootdir}/data/__new/lobby_periph_r360x180_t1.0/train1.json'\n",
+    "#DATA_DESC_FILE = f'{rootdir}/data/__new/lobby_periph_r360x180_t1.0/train1.json'\n",
+    "DATA_DESC_FILE = f'{rootdir}/data/__new/classroom_all/nerf_cvt.json'\n",
     "\n",
     "\n",
-    "dataset = SphericalViewSynDataset(DATA_DESC_FILE)\n",
+    "dataset = SphericalViewSynDataset(DATA_DESC_FILE, load_views=range(12))\n",
     "dataset.set_patch_size(1)\n",
     "res = dataset.view_res\n",
     "data_loader = FastDataLoader(dataset, res[0] * res[1], shuffle=False)\n",
     "\n",
     "selector = torch.arange(res[0] * res[1]).reshape(res[0], res[1])[::5, ::5].flatten()\n",
     "\n",
-    "for ri in range(0, 9):\n",
+    "for ri in range(0, 4):\n",
     "    r = ri * 2 + 1\n",
     "    p = None\n",
     "    centers = None\n",
     "    pixels = None\n",
-    "    idx_range = list(range(6, 12)) #+ list(range(24, 30)) + list(range(42, 48))\n",
+    "    idx_range = list(range(12)) #+ list(range(24, 30)) + list(range(42, 48))\n",
     "    idx = 0\n",
     "    for indices, patches, rays_o, rays_d in data_loader:\n",
     "        if idx not in idx_range:\n",
@@ -225,9 +200,9 @@
     "        pixels = pixels_ if pixels is None else np.concatenate((pixels, pixels_), axis=0)\n",
     "        idx += 1\n",
     "\n",
-    "    if ri % 2 == 0:\n",
-    "        plt.figure(facecolor='white', figsize=(20, 10))\n",
-    "    ax = plt.subplot(1, 2, ri % 2 + 1, projection='3d')\n",
+    "    plt.figure(facecolor='white', figsize=(20, 20))\n",
+    "    ax = plt.axes(projection='3d')\n",
+    "    #ax = plt.subplot(1, 2, ri % 2 + 1, projection='3d')\n",
     "    plt.xlabel('x')\n",
     "    plt.ylabel('z')\n",
     "    plt.title('r = %f' % r)\n",
@@ -235,19 +210,13 @@
     "    ax.scatter(p[:, 0], p[:, 2], p[:, 1], color=pixels, s=0.5)\n",
     "    ax.view_init(elev=0, azim=-90)\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.5 64-bit ('base': conda)",
-   "name": "python385jvsc74a57bd082066b63b621a9e3d15e3b7c11ca76da6238eff3834294910d715044bd0561e5"
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -263,5 +232,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
-}
\ No newline at end of file
+ "nbformat_minor": 4
+}
diff --git a/single_infer.sh b/single_infer.sh
new file mode 100755
index 0000000..fb6c2c0
--- /dev/null
+++ b/single_infer.sh
@@ -0,0 +1,30 @@
+#/usr/bin/bash
+
+datadir='data/__new/lobby_fovea_r360x80_t1.0'
+trainset='data/__new/lobby_fovea_r360x80_t1.0/train1.json'
+testset='data/__new/lobby_fovea_r360x80_t1.0/test1.json'
+epochs=50
+
+n_nets=$1
+nf=$2
+n_layers=$3
+
+configid="eval@snerffast${n_nets}-rgb_e6_fc${nf}x${n_layers}_d1.20-6.00_s64_~p"
+if [ ! -f "$datadir/$configid/model-epoch_$epochs.pth" ]; then
+    cont_epoch=0
+    for ((i=$epochs-1;i>0;i--)) do
+        if [ -f "$datadir/$configid/model-epoch_$i.pth" ]; then
+            cont_epoch=$i
+            break
+        fi
+    done
+    if [ ${cont_epoch} -gt 0 ]; then
+        python run_spherical_view_syn.py $trainset -e $epochs -m $configid/model-epoch_${cont_epoch}.pth
+    else
+        python run_spherical_view_syn.py $trainset -i $configid -e $epochs
+    fi
+fi
+if ! ls $datadir/$configid/output_$epochs/perf_* >/dev/null 2>&1; then
+    python run_spherical_view_syn.py $trainset -t -m $configid/model-epoch_$epochs.pth -o perf
+    python run_spherical_view_syn.py $testset -t -m $configid/model-epoch_$epochs.pth -o perf
+fi
\ No newline at end of file
diff --git a/tools/convert_data_desc.py b/tools/convert_data_desc.py
index 0432806..cf12b59 100644
--- a/tools/convert_data_desc.py
+++ b/tools/convert_data_desc.py
@@ -23,7 +23,7 @@ with open(data_desc_path, 'r') as fp:
 
 dataset_desc['cam_params'] = view.CameraParam.convert_camera_params(
     dataset_desc['cam_params'],
-    (dataset_desc['view_res']['x'], dataset_desc['view_res']['x']))
+    (dataset_desc['view_res']['y'], dataset_desc['view_res']['x']))
 
 dataset_desc['view_rots'] = [
     view.euler_to_matrix([rot[1], rot[0], 0])
diff --git a/tools/export_snerf_fast.py b/tools/export_snerf_fast.py
index 3b24ecc..ae5e96d 100644
--- a/tools/export_snerf_fast.py
+++ b/tools/export_snerf_fast.py
@@ -9,12 +9,10 @@ from typing import Mapping, List
 sys.path.append(os.path.abspath(sys.path[0] + '/../'))
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--device', type=int, default=0,
-                    help='Which CUDA device to use.')
-parser.add_argument('--batch-size', type=str,
-                    help='Resolution')
-parser.add_argument('model', type=str,
-                    help='Path of model to export')
+parser.add_argument('-b', '--batch-size', type=str, help='Resolution')
+parser.add_argument('-o', '--output', type=str)
+parser.add_argument('--device', type=int, default=0, help='Which CUDA device to use.')
+parser.add_argument('model', type=str, help='Path of model to export')
 opt = parser.parse_args()
 
 # Select device
@@ -28,30 +26,34 @@ from utils import device
 from configs.spherical_view_syn import SphericalViewSynConfig
 
 dir_path, model_file = os.path.split(opt.model)
+config_id = os.path.split(dir_path)[-1]
+
 batch_size = eval(opt.batch_size)
 batch_size_str = opt.batch_size.replace('*', 'x')
-outdir = f"output_{int(os.path.splitext(model_file)[0][12:])}"
 
-os.chdir(dir_path)
-misc.create_dir(outdir)
+if not opt.output:
+    epochs = os.path.splitext(model_file)[0][12:]
+    outdir = f"{dir_path}/output_{epochs}"
+    output = os.path.join(outdir, f"net@{batch_size_str}.onnx")
+    misc.create_dir(outdir)
+else:
+    output = opt.output
 
-config = SphericalViewSynConfig()
 
 
-def load_net(path):
-    id=os.path.split(dir_path)[-1]#os.path.splitext(os.path.basename(path))[0]
-    config.from_id(id)
+def load_net():
+    config = SphericalViewSynConfig()
+    config.from_id(config_id)
     config.SAMPLE_PARAMS['perturb_sample'] = False
     config.name += batch_size_str
     config.print()
     net = config.create_net().to(device.default())
-    netio.load(path, net)
-    return net, id
+    netio.load(opt.model, net)
+    return net
 
 
-def export_net(net: torch.nn.Module, name: str,
-               input: Mapping[str, List[int]], output_names: List[str]):
-    outpath = os.path.join(outdir, f"{name}@{batch_size_str}.onnx")
+def export_net(net: torch.nn.Module, path: str, input: Mapping[str, List[int]],
+               output_names: List[str]):
     input_tensors = tuple([
         torch.empty(size, device=device.default())
         for size in input.values()
@@ -59,21 +61,25 @@ def export_net(net: torch.nn.Module, name: str,
     onnx.export(
         net,
         input_tensors,
-        outpath,
+        path,
         export_params=True,  # store the trained parameter weights inside the model file
         verbose=True,
         opset_version=9,     # the ONNX version to export the model to
-        do_constant_folding=True, # whether to execute constant folding
-        input_names=input.keys(),   # the model's input names
-        output_names=output_names # the model's output names
+        do_constant_folding=True,  # whether to execute constant folding
+        input_names=list(input.keys()),   # the model's input names
+        output_names=output_names  # the model's output names
     )
-    print('Model exported to ' + outpath)
+    print('Model exported to ' + path)
 
 
 if __name__ == "__main__":
     with torch.no_grad():
-        net: SnerfFast = load_net(model_file)[0]
-        export_net(SnerfFastExport(net), 'net', {
-            'Encoded': [batch_size, net.n_samples, net.coord_encoder.out_dim],
-            'Depths': [batch_size, net.n_samples]
-        }, ['Colors'])
\ No newline at end of file
+        net: SnerfFast = load_net()
+        export_net(
+            SnerfFastExport(net),
+            output,
+            {
+                'Encoded': [batch_size, net.n_samples, net.coord_encoder.out_dim],
+                'Depths': [batch_size, net.n_samples]
+            },
+            ['Colors'])
diff --git a/tools/gen_eval_table.py b/tools/gen_eval_table.py
new file mode 100644
index 0000000..ba39a46
--- /dev/null
+++ b/tools/gen_eval_table.py
@@ -0,0 +1,91 @@
+import sys
+import os
+import json
+
+rootdir = os.path.abspath(sys.path[0] + '/../')
+
+datadir = f"{rootdir}/data/__new/classroom_fovea_r360x80_t0.6"
+n_nets_arr = [ 1, 2, 4, 8 ]
+nf_arr = [ 64, 128, 256, 512, 1024 ]
+n_layers_arr = [ 2, 4, 8 ]
+
+head = "Nets,Layers," + ",".join([f"{val}" for val in nf_arr])
+perf_train_table = []
+perf_test_table = []
+perf_time_table = []
+for n_nets in n_nets_arr:
+    for n_layers in n_layers_arr:
+        perf_train_row = []
+        perf_test_row = []
+        perf_time_row = []
+        for nf in nf_arr:
+            configid = f"eval@snerffast{n_nets}-rgb_e6_fc{nf}x{n_layers}_d1.00-7.00_s64_~p"
+            outputdir = f"{datadir}/{configid}/output_50"
+            if not os.path.exists(outputdir):
+                perf_train_row.append("-")
+                perf_test_row.append("-")
+                perf_time_row.append("-")
+                continue
+            perf_test_found=False
+            perf_train_found=False
+            for file in os.listdir(outputdir):
+                if file.startswith("perf_r120x80_test"):
+                    if perf_test_found:
+                        os.remove(f"{outputdir}/{file}")
+                    else:
+                        perf_test_row.append(os.path.splitext(file)[0].split("_")[-1])
+                        perf_test_found=True
+                elif file.startswith("perf_r120x80"):
+                    if perf_train_found:
+                        os.remove(f"{outputdir}/{file}")
+                    else:
+                        perf_train_row.append(os.path.splitext(file)[0].split("_")[-1])
+                        perf_train_found=True
+            if perf_train_found == False:
+                perf_train_row.append("-")
+            if perf_test_found == False:
+                perf_test_row.append("-")
+            # Collect time values
+            time_file = f"{datadir}/eval_trt/time/eval_{n_nets}x{nf}x{n_layers}.json"
+            if not os.path.exists(time_file):
+                perf_time_row.append("-")
+            else:
+                with open(time_file) as fp:
+                    time_data = json.load(fp)
+                time = 0
+                for item in time_data:
+                    time += item['computeMs']
+                time /= len(time_data)
+                perf_time_row.append(f"{time:.1f}")
+        perf_train_table.append(perf_train_row)
+        perf_test_table.append(perf_test_row)
+        perf_time_table.append(perf_time_row)
+
+perf_train_content = head + "\n"
+for i, row in enumerate(perf_train_table):
+    if i % len(n_layers_arr) == 0:
+        perf_train_content += f"{n_nets_arr[i // len(n_layers_arr)]}"
+    perf_train_content += f",{n_layers_arr[i % len(n_layers_arr)]},"
+    perf_train_content += ",".join(row) + "\n"
+
+perf_test_content = head + "\n"
+for i, row in enumerate(perf_test_table):
+    if i % len(n_layers_arr) == 0:
+        perf_test_content += f"{n_nets_arr[i // len(n_layers_arr)]}"
+    perf_test_content += f",{n_layers_arr[i % len(n_layers_arr)]},"
+    perf_test_content += ",".join(row) + "\n"
+
+perf_time_content = head + "\n"
+for i, row in enumerate(perf_time_table):
+    if i % len(n_layers_arr) == 0:
+        perf_time_content += f"{n_nets_arr[i // len(n_layers_arr)]}"
+    perf_time_content += f",{n_layers_arr[i % len(n_layers_arr)]},"
+    perf_time_content += ",".join(row) + "\n"
+
+with open(f"{datadir}/eval_perf.csv", "w") as fp:
+    fp.write("Train:\n")
+    fp.write(perf_train_content)
+    fp.write("Test:\n")
+    fp.write(perf_test_content)
+    fp.write("Time:\n")
+    fp.write(perf_time_content)
\ No newline at end of file
-- 
GitLab