Commit 5699ccbf authored by Nianchen Deng's avatar Nianchen Deng
Browse files

sync

parent 338ae906
{
// 使用 IntelliSense 了解相关属性。
// 悬停以查看现有属性的描述。
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Debug/Voxel Sampler Export 3D",
"type": "python",
"request": "launch",
"program": "debug/voxel_sampler_export3d.py",
"args": [
"-p",
"data/__new/barbershop_fovea_r360x80_t0.6/train_t0.3.json"
],
"console": "integratedTerminal"
},
{
"name": "Train",
"type": "python",
"request": "launch",
"program": "train.py",
"args": [
//"-c",
//"snerf_voxels",
"/home/dengnc/dvs/data/__new/barbershop_fovea_r360x80_t0.6/_nets/train_t0.3/snerfadvx_voxels_x4/checkpoint_10.tar",
"--prune",
"100",
"--split",
"100"
//"data/__new/barbershop_fovea_r360x80_t0.6/train_t0.3.json"
],
"console": "integratedTerminal"
},
{
"name": "Test",
"type": "python",
"request": "launch",
"program": "test.py",
"args": [
"-m",
"/home/dengnc/dvs/data/__new/barbershop_fovea_r360x80_t0.6/_nets/train_t0.3/snerfadv_voxels+ls2/checkpoint_50.tar",
"-o",
"perf",
"color",
"--output-type",
"image",
"/home/dengnc/dvs/data/__new/barbershop_fovea_r360x80_t0.6/test_t0.3.json",
"--views",
"1"
],
"console": "integratedTerminal"
}
]
}
\ No newline at end of file
...@@ -13,6 +13,12 @@ Or ref to https://pytorch.org/get-started/locally/ for install guide ...@@ -13,6 +13,12 @@ Or ref to https://pytorch.org/get-started/locally/ for install guide
* tensorboard * tensorboard
* plyfile
```
$ conda install -c conda-forge plyfile
```
* (Optional) dash * (Optional) dash
``` ```
......
import sys
import os
import argparse
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from gen_utils import GenPano
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--radius', type=float, required=True)
parser.add_argument("-n", "--samples", type=int, required=True)
parser.add_argument("--cycles-device", type=str)
args = parser.parse_args(sys.argv[sys.argv.index("--") + 1:])
GenPano('output/pano', f'hr_r{args.radius:.1f}', samples=[args.samples], depth_range=[args.radius, 50])()
import bpy
import math
import json
import os
import math
import numpy as np
from typing import List, Tuple
from itertools import product
class Gen:
def __init__(self, root_dir: str, dataset_name: str, *,
res: Tuple[int, int],
fov: float,
samples: List[int]) -> None:
self.res = res
self.fov = fov
self.samples = samples
self.scene = bpy.context.scene
self.cam_obj = self.scene.camera
self.cam = self.cam_obj.data
self.scene.render.resolution_x = self.res[0]
self.scene.render.resolution_y = self.res[1]
self.init_camera()
self.root_dir = root_dir
self.data_dir = f"{root_dir}/{dataset_name}/"
self.data_name = dataset_name
self.data_desc_file = f'{root_dir}/{dataset_name}.json'
def init_camera(self):
if self.fov < 0:
self.cam.type = 'PANO'
self.cam.cycles.panorama_type = 'EQUIRECTANGULAR'
else:
self.cam.type = 'PERSP'
self.cam.lens_unit = 'FOV'
self.cam.angle = math.radians(self.fov)
self.cam.dof.use_dof = False
self.cam.clip_start = 0.1
self.cam.clip_end = 1000
def init_desc(self):
return None
def save_desc(self):
with open(self.data_desc_file, 'w') as fp:
json.dump(self.desc, fp, indent=4)
def add_sample(self, i, x: List[float], render_only=False):
self.cam_obj.location = x[:3]
if len(x) > 3:
self.cam_obj.rotation_euler = [math.radians(x[4]), math.radians(x[3]), 0]
self.scene.render.filepath = self.data_dir + self.desc['view_file_pattern'] % i
bpy.ops.render.render(write_still=True)
if not render_only:
self.desc['view_centers'].append(x[:3])
if len(x) > 3:
self.desc['view_rots'].append(x[3:])
self.save_desc()
def gen_grid(self):
start_view = len(self.desc['view_centers'])
ranges = [
np.linspace(self.desc['range']['min'][i],
self.desc['range']['max'][i],
self.desc['samples'][i])
for i in range(len(self.desc['samples']))
]
for i, x in enumerate(product(*ranges)):
if i >= start_view:
self.add_sample(i, list(x))
def gen_rand(self):
pass
def __call__(self):
os.makedirs(self.data_dir, exist_ok=True)
if os.path.exists(self.data_desc_file):
with open(self.data_desc_file, 'r') as fp:
self.desc = json.load(fp)
else:
self.desc = self.init_desc()
# Render missing views in data desc
for i in range(len(self.desc['view_centers'])):
if not os.path.exists(self.data_dir + self.desc['view_file_pattern'] % i):
x: List[float] = self.desc['view_centers'][i]
if 'view_rots' in self.desc:
x += self.desc['view_rots'][i]
self.add_sample(i, x, render_only=True)
if len(self.desc['samples']) == 1:
self.gen_rand()
else:
self.gen_grid()
class GenView(Gen):
def __init__(self, root_dir: str, dataset_name: str, *,
res: Tuple[int, int], fov: float, samples: List[int],
tbox: Tuple[float, float, float], rbox: Tuple[float, float]) -> None:
super().__init__(root_dir, dataset_name, res=res, fov=fov, samples=samples)
self.tbox = tbox
self.rbox = rbox
def init_desc(self):
return {
'view_file_pattern': 'view_%04d.png',
"gl_coord": True,
'view_res': {
'x': self.res[0],
'y': self.res[1]
},
'cam_params': {
'fov': self.fov,
'cx': 0.5,
'cy': 0.5,
'normalized': True
},
'range': {
'min': [-self.tbox[0] / 2, -self.tbox[1] / 2, -self.tbox[2] / 2,
-self.rbox[0] / 2, -self.rbox[1] / 2],
'max': [self.tbox[0] / 2, self.tbox[1] / 2, self.tbox[2] / 2,
self.rbox[0] / 2, self.rbox[1] / 2]
},
'samples': self.samples,
'view_centers': [],
'view_rots': []
}
def gen_rand(self):
start_view = len(self.desc['view_centers'])
n = self.desc['samples'][0] - start_view
range_min = np.array(self.desc['range']['min'])
range_max = np.array(self.desc['range']['max'])
samples = (range_max - range_min) * np.random.rand(n, 5) + range_min
for i in range(n):
self.add_sample(i + start_view, list(samples[i]))
class GenPano(Gen):
def __init__(self, root_dir: str, dataset_name: str, *,
samples: List[int], depth_range: Tuple[float, float],
tbox: Tuple[float, float, float] = None) -> None:
self.depth_range = depth_range
self.tbox = tbox
super().__init__(root_dir, dataset_name, res=[4096, 2048], fov=-1, samples=samples)
def init_desc(self):
range = {
'range': {
'min': [-self.tbox[0] / 2, -self.tbox[1] / 2, -self.tbox[2] / 2],
'max': [self.tbox[0] / 2, self.tbox[1] / 2, self.tbox[2] / 2]
}
} if self.tbox else {}
return {
"type": "pano",
'view_file_pattern': 'view_%04d.png',
"gl_coord": True,
'view_res': {
'x': self.res[0],
'y': self.res[1]
},
**range,
"depth_range": {
"min": self.depth_range[0],
"max": self.depth_range[1]
},
'samples': self.samples,
'view_centers': []
}
def gen_rand(self):
start_view = len(self.desc['view_centers'])
n = self.desc['samples'][0] - start_view
r_max = self.desc['depth_range']['min']
pts = (np.random.rand(n * 5, 3) - 0.5) * 2 * r_max
samples = pts[np.linalg.norm(pts, axis=1) < r_max][:n]
for i in range(n):
self.add_sample(i + start_view, list(samples[i]))
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
''' Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch '''
from __future__ import (
division,
absolute_import,
with_statement,
print_function,
unicode_literals,
)
import os
import sys
from typing import Tuple
import torch
import torch.nn.functional as F
from torch.autograd import Function
import torch.nn as nn
import sys
import numpy as np
from utils.geometry import discretize_points
from utils.constants import HUGE_FLOAT
try:
import builtins
except:
import __builtin__ as builtins
try:
import clib._ext as _ext
except ImportError:
raise ImportError(
"Could not import _ext module.\n"
"Please see the setup instructions in the README"
)
class BallRayIntersect(Function):
@staticmethod
def forward(ctx, radius, n_max, points, ray_start, ray_dir):
inds, min_depth, max_depth = _ext.ball_intersect(
ray_start.float(), ray_dir.float(), points.float(), radius, n_max)
min_depth = min_depth.type_as(ray_start)
max_depth = max_depth.type_as(ray_start)
ctx.mark_non_differentiable(inds)
ctx.mark_non_differentiable(min_depth)
ctx.mark_non_differentiable(max_depth)
return inds, min_depth, max_depth
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None
ball_ray_intersect = BallRayIntersect.apply
class AABBRayIntersect(Function):
@staticmethod
def forward(ctx, voxelsize, n_max, points, ray_start, ray_dir):
# HACK: speed-up ray-voxel intersection by batching...
G = min(2048, int(2 * 10 ** 9 / points.numel())) # HACK: avoid out-of-memory
S, N = ray_start.shape[:2]
K = int(np.ceil(N / G))
G, K = 1, N # HACK
H = K * G
if H > N:
ray_start = torch.cat([ray_start, ray_start[:, :H - N]], 1)
ray_dir = torch.cat([ray_dir, ray_dir[:, :H - N]], 1)
ray_start = ray_start.reshape(S * G, K, 3)
ray_dir = ray_dir.reshape(S * G, K, 3)
points = points[None].expand(S * G, *points.size()).contiguous()
inds, min_depth, max_depth = _ext.aabb_intersect(
ray_start.float(), ray_dir.float(), points.float(), voxelsize, n_max)
min_depth = min_depth.type_as(ray_start)
max_depth = max_depth.type_as(ray_start)
inds = inds.reshape(S, H, -1)
min_depth = min_depth.reshape(S, H, -1)
max_depth = max_depth.reshape(S, H, -1)
if H > N:
inds = inds[:, :N]
min_depth = min_depth[:, :N]
max_depth = max_depth[:, :N]
ctx.mark_non_differentiable(inds)
ctx.mark_non_differentiable(min_depth)
ctx.mark_non_differentiable(max_depth)
return inds, min_depth, max_depth
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None
def aabb_ray_intersect(voxelsize: float, n_max: int, points: torch.Tensor, ray_start: torch.Tensor,
ray_dir: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
AABB-Ray intersect test
:param voxelsize `float`: size of a voxel
:param n_max `int`: maximum number of hits
:param points `Tensor(M, 3)`: voxels' centers
:param ray_start `Tensor(S, N, 3)`: rays' start positions
:param ray_dir `Tensor(S, N, 3)`: rays' directions
:return `Tensor(S, N, n_max)`: indices of intersected voxels or -1
:return `Tensor(S, N, n_max)`: min depths of every intersected voxels
:return `Tensor(S, N, n_max)`: max depths of every intersected voxels
"""
return AABBRayIntersect.apply(voxelsize, n_max, points, ray_start, ray_dir)
class SparseVoxelOctreeRayIntersect(Function):
@staticmethod
def forward(ctx, voxelsize, n_max, points, children, ray_start, ray_dir):
# HACK: avoid out-of-memory
G = min(2048, int(2 * 10 ** 9 / (points.numel() + children.numel())))
S, N = ray_start.shape[:2]
K = int(np.ceil(N / G))
G, K = 1, N # HACK
H = K * G
if H > N:
ray_start = torch.cat([ray_start, ray_start[:, :H - N]], 1)
ray_dir = torch.cat([ray_dir, ray_dir[:, :H - N]], 1)
ray_start = ray_start.reshape(S * G, K, 3)
ray_dir = ray_dir.reshape(S * G, K, 3)
points = points[None].expand(S * G, *points.size()).contiguous()
children = children[None].expand(S * G, *children.size()).contiguous()
inds, min_depth, max_depth = _ext.svo_intersect(
ray_start.float(), ray_dir.float(), points.float(), children.int(), voxelsize, n_max)
min_depth = min_depth.type_as(ray_start)
max_depth = max_depth.type_as(ray_start)
inds = inds.reshape(S, H, -1)
min_depth = min_depth.reshape(S, H, -1)
max_depth = max_depth.reshape(S, H, -1)
if H > N:
inds = inds[:, :N]
min_depth = min_depth[:, :N]
max_depth = max_depth[:, :N]
ctx.mark_non_differentiable(inds)
ctx.mark_non_differentiable(min_depth)
ctx.mark_non_differentiable(max_depth)
return inds, min_depth, max_depth
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None
def octree_ray_intersect(voxelsize: float, n_max: int, points: torch.Tensor, children: torch.Tensor,
ray_start: torch.Tensor, ray_dir: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Octree-Ray intersect test
:param voxelsize `float`: size of a voxel
:param n_max `int`: maximum number of hits
:param points `Tensor(M, 3)`: voxels' centers
:param children `Tensor(M, 9)`: flattened octree structure
:param ray_start `Tensor(S, N, 3)`: rays' start positions
:param ray_dir `Tensor(S, N, 3)`: rays' directions
:return `Tensor(S, N, n_max)`: indices of intersected voxels or -1
:return `Tensor(S, N, n_max)`: min depths of every intersected voxels
:return `Tensor(S, N, n_max)`: max depths of every intersected voxels
"""
return SparseVoxelOctreeRayIntersect.apply(voxelsize, n_max, points, children, ray_start,
ray_dir)
class TriangleRayIntersect(Function):
@staticmethod
def forward(ctx, cagesize, blur_ratio, n_max, points, faces, ray_start, ray_dir):
# HACK: speed-up ray-voxel intersection by batching...
G = min(2048, int(2 * 10 ** 9 / (3 * faces.numel()))) # HACK: avoid out-of-memory
S, N = ray_start.shape[:2]
K = int(np.ceil(N / G))
H = K * G
if H > N:
ray_start = torch.cat([ray_start, ray_start[:, :H - N]], 1)
ray_dir = torch.cat([ray_dir, ray_dir[:, :H - N]], 1)
ray_start = ray_start.reshape(S * G, K, 3)
ray_dir = ray_dir.reshape(S * G, K, 3)
face_points = F.embedding(faces.reshape(-1, 3), points.reshape(-1, 3))
face_points = face_points.unsqueeze(0).expand(S * G, *face_points.size()).contiguous()
inds, depth, uv = _ext.triangle_intersect(
ray_start.float(), ray_dir.float(), face_points.float(), cagesize, blur_ratio, n_max)
depth = depth.type_as(ray_start)
uv = uv.type_as(ray_start)
inds = inds.reshape(S, H, -1)
depth = depth.reshape(S, H, -1, 3)
uv = uv.reshape(S, H, -1)
if H > N:
inds = inds[:, :N]
depth = depth[:, :N]
uv = uv[:, :N]
ctx.mark_non_differentiable(inds)
ctx.mark_non_differentiable(depth)
ctx.mark_non_differentiable(uv)
return inds, depth, uv
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None, None
triangle_ray_intersect = TriangleRayIntersect.apply
class UniformRaySampling(Function):
@staticmethod
def forward(ctx, pts_idx, min_depth, max_depth, step_size, max_ray_length, deterministic=False):
G, N, P = 256, pts_idx.size(0), pts_idx.size(1)
H = int(np.ceil(N / G)) * G
if H > N:
pts_idx = torch.cat([pts_idx, pts_idx[:H - N]], 0)
min_depth = torch.cat([min_depth, min_depth[:H - N]], 0)
max_depth = torch.cat([max_depth, max_depth[:H - N]], 0)
pts_idx = pts_idx.reshape(G, -1, P)
min_depth = min_depth.reshape(G, -1, P)
max_depth = max_depth.reshape(G, -1, P)
# pre-generate noise
max_steps = int(max_ray_length / step_size)
max_steps = max_steps + min_depth.size(-1) * 2
noise = min_depth.new_zeros(*min_depth.size()[:-1], max_steps)
if deterministic:
noise += 0.5
else:
noise = noise.uniform_()
# call cuda function
sampled_idx, sampled_depth, sampled_dists = _ext.uniform_ray_sampling(
pts_idx, min_depth.float(), max_depth.float(), noise.float(), step_size, max_steps)
sampled_depth = sampled_depth.type_as(min_depth)
sampled_dists = sampled_dists.type_as(min_depth)
sampled_idx = sampled_idx.reshape(H, -1)
sampled_depth = sampled_depth.reshape(H, -1)
sampled_dists = sampled_dists.reshape(H, -1)
if H > N:
sampled_idx = sampled_idx[: N]
sampled_depth = sampled_depth[: N]
sampled_dists = sampled_dists[: N]
max_len = sampled_idx.ne(-1).sum(-1).max()
sampled_idx = sampled_idx[:, :max_len]
sampled_depth = sampled_depth[:, :max_len]
sampled_dists = sampled_dists[:, :max_len]
ctx.mark_non_differentiable(sampled_idx)
ctx.mark_non_differentiable(sampled_depth)
ctx.mark_non_differentiable(sampled_dists)
return sampled_idx, sampled_depth, sampled_dists
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None, None
def uniform_ray_sampling(pts_idx: torch.Tensor, min_depth: torch.Tensor, max_depth: torch.Tensor,
step_size: float, max_ray_length: float, deterministic: bool = False) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Sample along rays uniformly
:param pts_idx `Tensor(N, P)`: indices of voxels intersected with rays
:param min_depth `Tensor(N, P)`: min depth of intersections of rays and voxels
:param max_depth `Tensor(N, P)`: max depth of intersections of rays and voxels
:param step_size `float`: size of sampling step
:param max_ray_length `float`: maximum sampling depth along rays
:param deterministic `bool`: (optional) sample deterministically (or randomly), defaults to False
:return `Tensor(N, P')`: voxel indices of sampled points
:return `Tensor(N, P')`: depth of sampled points
:return `Tensor(N, P')`: length of sampled points
"""
return UniformRaySampling.apply(pts_idx, min_depth, max_depth, step_size, max_ray_length,
deterministic)
class InverseCDFRaySampling(Function):
@staticmethod
def forward(ctx, pts_idx, min_depth, max_depth, probs, steps, fixed_step_size=-1, deterministic=False):
G, N, P = 200, pts_idx.size(0), pts_idx.size(1)
H = int(np.ceil(N / G)) * G
if H > N:
pts_idx = torch.cat([pts_idx, pts_idx[:1].expand(H - N, P)], 0)
min_depth = torch.cat([min_depth, min_depth[:1].expand(H - N, P)], 0)
max_depth = torch.cat([max_depth, max_depth[:1].expand(H - N, P)], 0)
probs = torch.cat([probs, probs[:1].expand(H - N, P)], 0)
steps = torch.cat([steps, steps[:1].expand(H - N)], 0)
# print(G, P, np.ceil(N / G), N, H, pts_idx.shape, min_depth.device)
pts_idx = pts_idx.reshape(G, -1, P)
min_depth = min_depth.reshape(G, -1, P)
max_depth = max_depth.reshape(G, -1, P)
probs = probs.reshape(G, -1, P)
steps = steps.reshape(G, -1)
# pre-generate noise
max_steps = steps.ceil().long().max() + P
noise = min_depth.new_zeros(*min_depth.size()[:-1], max_steps)
if deterministic:
noise += 0.5
else:
noise = noise.uniform_().clamp(min=0.001, max=0.999) # in case
# call cuda function
chunk_size = 4 * G # to avoid oom?
results = [
_ext.inverse_cdf_sampling(
pts_idx[:, i:i + chunk_size].contiguous(),
min_depth.float()[:, i:i + chunk_size].contiguous(),
max_depth.float()[:, i:i + chunk_size].contiguous(),
noise.float()[:, i:i + chunk_size].contiguous(),
probs.float()[:, i:i + chunk_size].contiguous(),
steps.float()[:, i:i + chunk_size].contiguous(),
fixed_step_size)
for i in range(0, min_depth.size(1), chunk_size)
]
sampled_idx, sampled_depth, sampled_dists = [
torch.cat([r[i] for r in results], 1)
for i in range(3)
]
sampled_depth = sampled_depth.type_as(min_depth)
sampled_dists = sampled_dists.type_as(min_depth)
sampled_idx = sampled_idx.reshape(H, -1)
sampled_depth = sampled_depth.reshape(H, -1)
sampled_dists = sampled_dists.reshape(H, -1)
if H > N:
sampled_idx = sampled_idx[: N]
sampled_depth = sampled_depth[: N]
sampled_dists = sampled_dists[: N]
max_len = sampled_idx.ne(-1).sum(-1).max()
sampled_idx = sampled_idx[:, :max_len]
sampled_depth = sampled_depth[:, :max_len]
sampled_dists = sampled_dists[:, :max_len]
ctx.mark_non_differentiable(sampled_idx)
ctx.mark_non_differentiable(sampled_depth)
ctx.mark_non_differentiable(sampled_dists)
return sampled_idx, sampled_depth, sampled_dists
@staticmethod
def backward(ctx, a, b, c):
return None, None, None, None, None, None, None
def inverse_cdf_sampling(pts_idx: torch.Tensor, min_depth: torch.Tensor, max_depth: torch.Tensor,
probs: torch.Tensor, steps: torch.Tensor, fixed_step_size: float = -1,
deterministic: bool = False) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Sample along rays by inverse CDF
:param pts_idx `Tensor(N, P)`: indices of voxels intersected with rays
:param min_depth `Tensor(N, P)`: min depth of intersections of rays and voxels
:param max_depth `Tensor(N, P)`: max depth of intersections of rays and voxels
:param probs `Tensor(N, P)`:
:param steps `Tensor(N)`:
:param fixed_step_size `float`:
:param deterministic `bool`: (optional) sample deterministically (or randomly), defaults to False
:return `Tensor(N, P')`: voxel indices of sampled points
:return `Tensor(N, P')`: depth of sampled points
:return `Tensor(N, P')`: length of sampled points
"""
return InverseCDFRaySampling.apply(pts_idx, min_depth, max_depth, probs, steps, fixed_step_size,
deterministic)
# back-up for ray point sampling
@torch.no_grad()
def _parallel_ray_sampling(MARCH_SIZE, pts_idx, min_depth, max_depth, deterministic=False):
# uniform sampling
_min_depth = min_depth.min(1)[0]
_max_depth = max_depth.masked_fill(max_depth.eq(HUGE_FLOAT), 0).max(1)[0]
max_ray_length = (_max_depth - _min_depth).max()
delta = torch.arange(int(max_ray_length / MARCH_SIZE),
device=min_depth.device, dtype=min_depth.dtype)
delta = delta[None, :].expand(min_depth.size(0), delta.size(-1))
if deterministic:
delta = delta + 0.5
else:
delta = delta + delta.clone().uniform_().clamp(min=0.01, max=0.99)
delta = delta * MARCH_SIZE
sampled_depth = min_depth[:, :1] + delta
sampled_idx = (sampled_depth[:, :, None] >= min_depth[:, None, :]).sum(-1) - 1
sampled_idx = pts_idx.gather(1, sampled_idx)
# include all boundary points
sampled_depth = torch.cat([min_depth, max_depth, sampled_depth], -1)
sampled_idx = torch.cat([pts_idx, pts_idx, sampled_idx], -1)
# reorder
sampled_depth, ordered_index = sampled_depth.sort(-1)
sampled_idx = sampled_idx.gather(1, ordered_index)
sampled_dists = sampled_depth[:, 1:] - sampled_depth[:, :-1] # distances
sampled_depth = .5 * (sampled_depth[:, 1:] + sampled_depth[:, :-1]) # mid-points
# remove all invalid depths
min_ids = (sampled_depth[:, :, None] >= min_depth[:, None, :]).sum(-1) - 1
max_ids = (sampled_depth[:, :, None] >= max_depth[:, None, :]).sum(-1)
sampled_depth.masked_fill_(
(max_ids.ne(min_ids)) |
(sampled_depth > _max_depth[:, None]) |
(sampled_dists == 0.0), HUGE_FLOAT)
sampled_depth, ordered_index = sampled_depth.sort(-1) # sort again
sampled_masks = sampled_depth.eq(HUGE_FLOAT)
num_max_steps = (~sampled_masks).sum(-1).max()
sampled_depth = sampled_depth[:, :num_max_steps]
sampled_dists = sampled_dists.gather(1, ordered_index).masked_fill_(
sampled_masks, 0.0)[:, :num_max_steps]
sampled_idx = sampled_idx.gather(1, ordered_index).masked_fill_(
sampled_masks, -1)[:, :num_max_steps]
return sampled_idx, sampled_depth, sampled_dists
@torch.no_grad()
def parallel_ray_sampling(MARCH_SIZE, pts_idx, min_depth, max_depth, deterministic=False):
chunk_size = 4096
full_size = min_depth.shape[0]
if full_size <= chunk_size:
return _parallel_ray_sampling(MARCH_SIZE, pts_idx, min_depth, max_depth, deterministic=deterministic)
outputs = zip(*[
_parallel_ray_sampling(
MARCH_SIZE,
pts_idx[i:i + chunk_size], min_depth[i:i + chunk_size], max_depth[i:i + chunk_size],
deterministic=deterministic)
for i in range(0, full_size, chunk_size)])
sampled_idx, sampled_depth, sampled_dists = outputs
def padding_points(xs, pad):
if len(xs) == 1:
return xs[0]
maxlen = max([x.size(1) for x in xs])
full_size = sum([x.size(0) for x in xs])
xt = xs[0].new_ones(full_size, maxlen).fill_(pad)
st = 0
for i in range(len(xs)):
xt[st: st + xs[i].size(0), :xs[i].size(1)] = xs[i]
st += xs[i].size(0)
return xt
sampled_idx = padding_points(sampled_idx, -1)
sampled_depth = padding_points(sampled_depth, HUGE_FLOAT)
sampled_dists = padding_points(sampled_dists, 0.0)
return sampled_idx, sampled_depth, sampled_dists
def build_easy_octree(points: torch.Tensor, half_voxel: float) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Build an octree.
:param points `Tensor(M, 3)`: centers of leaf voxels
:param half_voxel `float`: half size of voxel
:return `Tensor(M', 3)`: centers of all nodes in octree
:return `Tensor(M', 9)`: flattened octree structure
"""
coords, residual = discretize_points(points, half_voxel)
ranges = coords.max(0)[0] - coords.min(0)[0]
depths = torch.log2(ranges.max().float()).ceil_().long() - 1
center = (coords.max(0)[0] + coords.min(0)[0]) / 2
centers, children = _ext.build_octree(center, coords, int(depths))
centers = centers.float() * half_voxel + residual # transform back to float
return centers, children
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#ifndef _CUDA_UTILS_H
#define _CUDA_UTILS_H
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <cmath>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#define TOTAL_THREADS 512
inline int opt_n_threads(int work_size) {
const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
return max(min(1 << pow_2, TOTAL_THREADS), 1);
}
inline dim3 opt_block_config(int x, int y) {
const int x_threads = opt_n_threads(x);
const int y_threads =
max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
dim3 block_config(x_threads, y_threads, 1);
return block_config;
}
#define CUDA_CHECK_ERRORS() \
do { \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \
cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
__FILE__); \
exit(-1); \
} \
} while (0)
#endif
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
/*
* Copyright 1993-2009 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property and
* proprietary rights in and to this software and related documentation and
* any modifications thereto. Any use, reproduction, disclosure, or distribution
* of this software and related documentation without an express license
* agreement from NVIDIA Corporation is strictly prohibited.
*
*/
/*
This file implements common mathematical operations on vector types
(float3, float4 etc.) since these are not provided as standard by CUDA.
The syntax is modelled on the Cg standard library.
*/
#ifndef CUTIL_MATH_H
#define CUTIL_MATH_H
#include "cuda_runtime.h"
////////////////////////////////////////////////////////////////////////////////
typedef unsigned int uint;
typedef unsigned short ushort;
#ifndef __CUDACC__
#include <math.h>
inline float fminf(float a, float b)
{
return a < b ? a : b;
}
inline float fmaxf(float a, float b)
{
return a > b ? a : b;
}
inline int max(int a, int b)
{
return a > b ? a : b;
}
inline int min(int a, int b)
{
return a < b ? a : b;
}
inline float rsqrtf(float x)
{
return 1.0f / sqrtf(x);
}
#endif
// float functions
////////////////////////////////////////////////////////////////////////////////
// lerp
inline __device__ __host__ float lerp(float a, float b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float clamp(float f, float a, float b)
{
return fmaxf(a, fminf(f, b));
}
inline __device__ __host__ void swap(float &a, float &b)
{
float c = a;
a = b;
b = c;
}
inline __device__ __host__ void swap(int &a, int &b)
{
float c = a;
a = b;
b = c;
}
// int2 functions
////////////////////////////////////////////////////////////////////////////////
// negate
inline __host__ __device__ int2 operator-(int2 &a)
{
return make_int2(-a.x, -a.y);
}
// addition
inline __host__ __device__ int2 operator+(int2 a, int2 b)
{
return make_int2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(int2 &a, int2 b)
{
a.x += b.x; a.y += b.y;
}
// subtract
inline __host__ __device__ int2 operator-(int2 a, int2 b)
{
return make_int2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(int2 &a, int2 b)
{
a.x -= b.x; a.y -= b.y;
}
// multiply
inline __host__ __device__ int2 operator*(int2 a, int2 b)
{
return make_int2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ int2 operator*(int2 a, int s)
{
return make_int2(a.x * s, a.y * s);
}
inline __host__ __device__ int2 operator*(int s, int2 a)
{
return make_int2(a.x * s, a.y * s);
}
inline __host__ __device__ void operator*=(int2 &a, int s)
{
a.x *= s; a.y *= s;
}
// float2 functions
////////////////////////////////////////////////////////////////////////////////
// additional constructors
inline __host__ __device__ float2 make_float2(float s)
{
return make_float2(s, s);
}
inline __host__ __device__ float2 make_float2(int2 a)
{
return make_float2(float(a.x), float(a.y));
}
// negate
inline __host__ __device__ float2 operator-(float2 &a)
{
return make_float2(-a.x, -a.y);
}
// addition
inline __host__ __device__ float2 operator+(float2 a, float2 b)
{
return make_float2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void operator+=(float2 &a, float2 b)
{
a.x += b.x; a.y += b.y;
}
// subtract
inline __host__ __device__ float2 operator-(float2 a, float2 b)
{
return make_float2(a.x - b.x, a.y - b.y);
}
inline __host__ __device__ void operator-=(float2 &a, float2 b)
{
a.x -= b.x; a.y -= b.y;
}
// multiply
inline __host__ __device__ float2 operator*(float2 a, float2 b)
{
return make_float2(a.x * b.x, a.y * b.y);
}
inline __host__ __device__ float2 operator*(float2 a, float s)
{
return make_float2(a.x * s, a.y * s);
}
inline __host__ __device__ float2 operator*(float s, float2 a)
{
return make_float2(a.x * s, a.y * s);
}
inline __host__ __device__ void operator*=(float2 &a, float s)
{
a.x *= s; a.y *= s;
}
// divide
inline __host__ __device__ float2 operator/(float2 a, float2 b)
{
return make_float2(a.x / b.x, a.y / b.y);
}
inline __host__ __device__ float2 operator/(float2 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float2 operator/(float s, float2 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float2 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float2 clamp(float2 v, float a, float b)
{
return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
}
inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
{
return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
}
// dot product
inline __host__ __device__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
// length
inline __host__ __device__ float length(float2 v)
{
return sqrtf(dot(v, v));
}
// normalize
inline __host__ __device__ float2 normalize(float2 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float2 floor(const float2 v)
{
return make_float2(floor(v.x), floor(v.y));
}
// reflect
inline __host__ __device__ float2 reflect(float2 i, float2 n)
{
return i - 2.0f * n * dot(n,i);
}
// absolute value
inline __host__ __device__ float2 fabs(float2 v)
{
return make_float2(fabs(v.x), fabs(v.y));
}
// float3 functions
////////////////////////////////////////////////////////////////////////////////
// additional constructors
inline __host__ __device__ float3 make_float3(float s)
{
return make_float3(s, s, s);
}
inline __host__ __device__ float3 make_float3(float2 a)
{
return make_float3(a.x, a.y, 0.0f);
}
inline __host__ __device__ float3 make_float3(float2 a, float s)
{
return make_float3(a.x, a.y, s);
}
inline __host__ __device__ float3 make_float3(float4 a)
{
return make_float3(a.x, a.y, a.z); // discards w
}
inline __host__ __device__ float3 make_float3(int3 a)
{
return make_float3(float(a.x), float(a.y), float(a.z));
}
// negate
inline __host__ __device__ float3 operator-(float3 &a)
{
return make_float3(-a.x, -a.y, -a.z);
}
// min
static __inline__ __host__ __device__ float3 fminf(float3 a, float3 b)
{
return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
}
// max
static __inline__ __host__ __device__ float3 fmaxf(float3 a, float3 b)
{
return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
}
// addition
inline __host__ __device__ float3 operator+(float3 a, float3 b)
{
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ float3 operator+(float3 a, float b)
{
return make_float3(a.x + b, a.y + b, a.z + b);
}
inline __host__ __device__ void operator+=(float3 &a, float3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ float3 operator-(float3 a, float3 b)
{
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ float3 operator-(float3 a, float b)
{
return make_float3(a.x - b, a.y - b, a.z - b);
}
inline __host__ __device__ void operator-=(float3 &a, float3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ float3 operator*(float3 a, float3 b)
{
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ float3 operator*(float3 a, float s)
{
return make_float3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ float3 operator*(float s, float3 a)
{
return make_float3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(float3 &a, float s)
{
a.x *= s; a.y *= s; a.z *= s;
}
inline __host__ __device__ void operator*=(float3 &a, float3 b)
{
a.x *= b.x; a.y *= b.y; a.z *= b.z;;
}
// divide
inline __host__ __device__ float3 operator/(float3 a, float3 b)
{
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ float3 operator/(float3 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float3 operator/(float s, float3 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float3 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float3 clamp(float3 v, float a, float b)
{
return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
{
return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
// dot product
inline __host__ __device__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
// cross product
inline __host__ __device__ float3 cross(float3 a, float3 b)
{
return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
}
// length
inline __host__ __device__ float length(float3 v)
{
return sqrtf(dot(v, v));
}
// normalize
inline __host__ __device__ float3 normalize(float3 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float3 floor(const float3 v)
{
return make_float3(floor(v.x), floor(v.y), floor(v.z));
}
// reflect
inline __host__ __device__ float3 reflect(float3 i, float3 n)
{
return i - 2.0f * n * dot(n,i);
}
// absolute value
inline __host__ __device__ float3 fabs(float3 v)
{
return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
}
// float4 functions
////////////////////////////////////////////////////////////////////////////////
// additional constructors
inline __host__ __device__ float4 make_float4(float s)
{
return make_float4(s, s, s, s);
}
inline __host__ __device__ float4 make_float4(float3 a)
{
return make_float4(a.x, a.y, a.z, 0.0f);
}
inline __host__ __device__ float4 make_float4(float3 a, float w)
{
return make_float4(a.x, a.y, a.z, w);
}
inline __host__ __device__ float4 make_float4(int4 a)
{
return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
}
// negate
inline __host__ __device__ float4 operator-(float4 &a)
{
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
// min
static __inline__ __host__ __device__ float4 fminf(float4 a, float4 b)
{
return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
}
// max
static __inline__ __host__ __device__ float4 fmaxf(float4 a, float4 b)
{
return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
}
// addition
inline __host__ __device__ float4 operator+(float4 a, float4 b)
{
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void operator+=(float4 &a, float4 b)
{
a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
}
// subtract
inline __host__ __device__ float4 operator-(float4 a, float4 b)
{
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
inline __host__ __device__ void operator-=(float4 &a, float4 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w;
}
// multiply
inline __host__ __device__ float4 operator*(float4 a, float s)
{
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
inline __host__ __device__ float4 operator*(float s, float4 a)
{
return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
}
inline __host__ __device__ void operator*=(float4 &a, float s)
{
a.x *= s; a.y *= s; a.z *= s; a.w *= s;
}
// divide
inline __host__ __device__ float4 operator/(float4 a, float4 b)
{
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
}
inline __host__ __device__ float4 operator/(float4 a, float s)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ float4 operator/(float s, float4 a)
{
float inv = 1.0f / s;
return a * inv;
}
inline __host__ __device__ void operator/=(float4 &a, float s)
{
float inv = 1.0f / s;
a *= inv;
}
// lerp
inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
{
return a + t*(b-a);
}
// clamp
inline __device__ __host__ float4 clamp(float4 v, float a, float b)
{
return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
}
inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
{
return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
}
// dot product
inline __host__ __device__ float dot(float4 a, float4 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
// length
inline __host__ __device__ float length(float4 r)
{
return sqrtf(dot(r, r));
}
// normalize
inline __host__ __device__ float4 normalize(float4 v)
{
float invLen = rsqrtf(dot(v, v));
return v * invLen;
}
// floor
inline __host__ __device__ float4 floor(const float4 v)
{
return make_float4(floor(v.x), floor(v.y), floor(v.z), floor(v.w));
}
// absolute value
inline __host__ __device__ float4 fabs(float4 v)
{
return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
}
// int3 functions
////////////////////////////////////////////////////////////////////////////////
// additional constructors
inline __host__ __device__ int3 make_int3(int s)
{
return make_int3(s, s, s);
}
inline __host__ __device__ int3 make_int3(float3 a)
{
return make_int3(int(a.x), int(a.y), int(a.z));
}
// negate
inline __host__ __device__ int3 operator-(int3 &a)
{
return make_int3(-a.x, -a.y, -a.z);
}
// min
inline __host__ __device__ int3 min(int3 a, int3 b)
{
return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
}
// max
inline __host__ __device__ int3 max(int3 a, int3 b)
{
return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
}
// addition
inline __host__ __device__ int3 operator+(int3 a, int3 b)
{
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(int3 &a, int3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ int3 operator-(int3 a, int3 b)
{
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(int3 &a, int3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ int3 operator*(int3 a, int3 b)
{
return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ int3 operator*(int3 a, int s)
{
return make_int3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ int3 operator*(int s, int3 a)
{
return make_int3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(int3 &a, int s)
{
a.x *= s; a.y *= s; a.z *= s;
}
// divide
inline __host__ __device__ int3 operator/(int3 a, int3 b)
{
return make_int3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ int3 operator/(int3 a, int s)
{
return make_int3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ int3 operator/(int s, int3 a)
{
return make_int3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ void operator/=(int3 &a, int s)
{
a.x /= s; a.y /= s; a.z /= s;
}
// clamp
inline __device__ __host__ int clamp(int f, int a, int b)
{
return max(a, min(f, b));
}
inline __device__ __host__ int3 clamp(int3 v, int a, int b)
{
return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
{
return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
// uint3 functions
////////////////////////////////////////////////////////////////////////////////
// additional constructors
inline __host__ __device__ uint3 make_uint3(uint s)
{
return make_uint3(s, s, s);
}
inline __host__ __device__ uint3 make_uint3(float3 a)
{
return make_uint3(uint(a.x), uint(a.y), uint(a.z));
}
// min
inline __host__ __device__ uint3 min(uint3 a, uint3 b)
{
return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
}
// max
inline __host__ __device__ uint3 max(uint3 a, uint3 b)
{
return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
}
// addition
inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
{
return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
{
a.x += b.x; a.y += b.y; a.z += b.z;
}
// subtract
inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
{
return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
}
inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
{
a.x -= b.x; a.y -= b.y; a.z -= b.z;
}
// multiply
inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
{
return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
}
inline __host__ __device__ uint3 operator*(uint3 a, uint s)
{
return make_uint3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ uint3 operator*(uint s, uint3 a)
{
return make_uint3(a.x * s, a.y * s, a.z * s);
}
inline __host__ __device__ void operator*=(uint3 &a, uint s)
{
a.x *= s; a.y *= s; a.z *= s;
}
// divide
inline __host__ __device__ uint3 operator/(uint3 a, uint3 b)
{
return make_uint3(a.x / b.x, a.y / b.y, a.z / b.z);
}
inline __host__ __device__ uint3 operator/(uint3 a, uint s)
{
return make_uint3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ uint3 operator/(uint s, uint3 a)
{
return make_uint3(a.x / s, a.y / s, a.z / s);
}
inline __host__ __device__ void operator/=(uint3 &a, uint s)
{
a.x /= s; a.y /= s; a.z /= s;
}
// clamp
inline __device__ __host__ uint clamp(uint f, uint a, uint b)
{
return max(a, min(f, b));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
{
return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
}
inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
{
return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
}
#endif
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
#include <utility>
std::tuple<at::Tensor, at::Tensor, at::Tensor> ball_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points,
const float radius, const int n_max);
std::tuple<at::Tensor, at::Tensor, at::Tensor> aabb_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points,
const float voxelsize, const int n_max);
std::tuple<at::Tensor, at::Tensor, at::Tensor> svo_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points, at::Tensor children,
const float voxelsize, const int n_max);
std::tuple< at::Tensor, at::Tensor, at::Tensor > triangle_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor face_points,
const float cagesize, const float blur, const int n_max);
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
#include <utility>
std::tuple<at::Tensor, at::Tensor> build_octree(at::Tensor center, at::Tensor points, int depth);
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <torch/extension.h>
#include <utility>
std::tuple<at::Tensor, at::Tensor, at::Tensor> uniform_ray_sampling(
at::Tensor pts_idx, at::Tensor min_depth, at::Tensor max_depth, at::Tensor uniform_noise,
const float step_size, const int max_steps);
std::tuple<at::Tensor, at::Tensor, at::Tensor> inverse_cdf_sampling(
at::Tensor pts_idx, at::Tensor min_depth, at::Tensor max_depth, at::Tensor uniform_noise,
at::Tensor probs, at::Tensor steps, float fixed_step_size);
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#pragma once
#include <ATen/cuda/CUDAContext.h>
#include <torch/extension.h>
#define CHECK_CUDA(x) \
do { \
TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \
} while (0)
#define CHECK_CONTIGUOUS(x) \
do { \
TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \
} while (0)
#define CHECK_IS_INT(x) \
do { \
TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \
#x " must be an int tensor"); \
} while (0)
#define CHECK_IS_FLOAT(x) \
do { \
TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \
#x " must be a float tensor"); \
} while (0)
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "intersect.h"
#include "octree.h"
#include "sample.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("ball_intersect", &ball_intersect);
m.def("aabb_intersect", &aabb_intersect);
m.def("svo_intersect", &svo_intersect);
m.def("triangle_intersect", &triangle_intersect);
m.def("uniform_ray_sampling", &uniform_ray_sampling);
m.def("inverse_cdf_sampling", &inverse_cdf_sampling);
m.def("build_octree", &build_octree);
}
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "intersect.h"
#include "utils.h"
#include <utility>
void ball_intersect_point_kernel_wrapper(
int b, int n, int m, float radius, int n_max,
const float *ray_start, const float *ray_dir, const float *points,
int *idx, float *min_depth, float *max_depth);
std::tuple< at::Tensor, at::Tensor, at::Tensor > ball_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points,
const float radius, const int n_max){
CHECK_CONTIGUOUS(ray_start);
CHECK_CONTIGUOUS(ray_dir);
CHECK_CONTIGUOUS(points);
CHECK_IS_FLOAT(ray_start);
CHECK_IS_FLOAT(ray_dir);
CHECK_IS_FLOAT(points);
CHECK_CUDA(ray_start);
CHECK_CUDA(ray_dir);
CHECK_CUDA(points);
at::Tensor idx =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Int));
at::Tensor min_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
at::Tensor max_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
ball_intersect_point_kernel_wrapper(points.size(0), points.size(1), ray_start.size(1),
radius, n_max,
ray_start.data_ptr <float>(), ray_dir.data_ptr <float>(), points.data_ptr <float>(),
idx.data_ptr <int>(), min_depth.data_ptr <float>(), max_depth.data_ptr <float>());
return std::make_tuple(idx, min_depth, max_depth);
}
void aabb_intersect_point_kernel_wrapper(
int b, int n, int m, float voxelsize, int n_max,
const float *ray_start, const float *ray_dir, const float *points,
int *idx, float *min_depth, float *max_depth);
std::tuple< at::Tensor, at::Tensor, at::Tensor > aabb_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points,
const float voxelsize, const int n_max){
CHECK_CONTIGUOUS(ray_start);
CHECK_CONTIGUOUS(ray_dir);
CHECK_CONTIGUOUS(points);
CHECK_IS_FLOAT(ray_start);
CHECK_IS_FLOAT(ray_dir);
CHECK_IS_FLOAT(points);
CHECK_CUDA(ray_start);
CHECK_CUDA(ray_dir);
CHECK_CUDA(points);
at::Tensor idx =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Int));
at::Tensor min_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
at::Tensor max_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
aabb_intersect_point_kernel_wrapper(points.size(0), points.size(1), ray_start.size(1),
voxelsize, n_max,
ray_start.data_ptr <float>(), ray_dir.data_ptr <float>(), points.data_ptr <float>(),
idx.data_ptr <int>(), min_depth.data_ptr <float>(), max_depth.data_ptr <float>());
return std::make_tuple(idx, min_depth, max_depth);
}
void svo_intersect_point_kernel_wrapper(
int b, int n, int m, float voxelsize, int n_max,
const float *ray_start, const float *ray_dir, const float *points, const int *children,
int *idx, float *min_depth, float *max_depth);
std::tuple< at::Tensor, at::Tensor, at::Tensor > svo_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor points,
at::Tensor children, const float voxelsize, const int n_max){
CHECK_CONTIGUOUS(ray_start);
CHECK_CONTIGUOUS(ray_dir);
CHECK_CONTIGUOUS(points);
CHECK_CONTIGUOUS(children);
CHECK_IS_FLOAT(ray_start);
CHECK_IS_FLOAT(ray_dir);
CHECK_IS_FLOAT(points);
CHECK_CUDA(ray_start);
CHECK_CUDA(ray_dir);
CHECK_CUDA(points);
CHECK_CUDA(children);
at::Tensor idx =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Int));
at::Tensor min_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
at::Tensor max_depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
svo_intersect_point_kernel_wrapper(points.size(0), points.size(1), ray_start.size(1),
voxelsize, n_max,
ray_start.data_ptr <float>(), ray_dir.data_ptr <float>(), points.data_ptr <float>(),
children.data_ptr <int>(), idx.data_ptr <int>(), min_depth.data_ptr <float>(), max_depth.data_ptr <float>());
return std::make_tuple(idx, min_depth, max_depth);
}
void triangle_intersect_point_kernel_wrapper(
int b, int n, int m, float cagesize, float blur, int n_max,
const float *ray_start, const float *ray_dir, const float *face_points,
int *idx, float *depth, float *uv);
std::tuple< at::Tensor, at::Tensor, at::Tensor > triangle_intersect(at::Tensor ray_start, at::Tensor ray_dir, at::Tensor face_points,
const float cagesize, const float blur, const int n_max){
CHECK_CONTIGUOUS(ray_start);
CHECK_CONTIGUOUS(ray_dir);
CHECK_CONTIGUOUS(face_points);
CHECK_IS_FLOAT(ray_start);
CHECK_IS_FLOAT(ray_dir);
CHECK_IS_FLOAT(face_points);
CHECK_CUDA(ray_start);
CHECK_CUDA(ray_dir);
CHECK_CUDA(face_points);
at::Tensor idx =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max},
at::device(ray_start.device()).dtype(at::ScalarType::Int));
at::Tensor depth =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max * 3},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
at::Tensor uv =
torch::zeros({ray_start.size(0), ray_start.size(1), n_max * 2},
at::device(ray_start.device()).dtype(at::ScalarType::Float));
triangle_intersect_point_kernel_wrapper(face_points.size(0), face_points.size(1), ray_start.size(1),
cagesize, blur, n_max,
ray_start.data_ptr <float>(), ray_dir.data_ptr <float>(), face_points.data_ptr <float>(),
idx.data_ptr <int>(), depth.data_ptr <float>(), uv.data_ptr <float>());
return std::make_tuple(idx, depth, uv);
}
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "cutil_math.h" // required for float3 vector math
__global__ void ball_intersect_point_kernel(int b, int n, int m, float radius, int n_max,
const float *__restrict__ ray_start,
const float *__restrict__ ray_dir,
const float *__restrict__ points, int *__restrict__ idx,
float *__restrict__ min_depth,
float *__restrict__ max_depth) {
int batch_index = blockIdx.x;
points += batch_index * n * 3;
ray_start += batch_index * m * 3;
ray_dir += batch_index * m * 3;
idx += batch_index * m * n_max;
min_depth += batch_index * m * n_max;
max_depth += batch_index * m * n_max;
int index = threadIdx.x;
int stride = blockDim.x;
float radius2 = radius * radius;
for (int j = index; j < m; j += stride) {
float x0 = ray_start[j * 3 + 0];
float y0 = ray_start[j * 3 + 1];
float z0 = ray_start[j * 3 + 2];
float xw = ray_dir[j * 3 + 0];
float yw = ray_dir[j * 3 + 1];
float zw = ray_dir[j * 3 + 2];
for (int l = 0; l < n_max; ++l) {
idx[j * n_max + l] = -1;
}
for (int k = 0, cnt = 0; k < n && cnt < n_max; ++k) {
float x = points[k * 3 + 0] - x0;
float y = points[k * 3 + 1] - y0;
float z = points[k * 3 + 2] - z0;
float d2 = x * x + y * y + z * z;
float d2_proj = pow(x * xw + y * yw + z * zw, 2);
float r2 = d2 - d2_proj;
if (r2 < radius2) {
idx[j * n_max + cnt] = k;
float depth = sqrt(d2_proj);
float depth_blur = sqrt(radius2 - r2);
min_depth[j * n_max + cnt] = depth - depth_blur;
max_depth[j * n_max + cnt] = depth + depth_blur;
++cnt;
}
}
}
}
__device__ float2 RayAABBIntersection(const float3 &ori, const float3 &dir, const float3 &center,
float half_voxel) {
float f_low = 0;
float f_high = 100000.;
float f_dim_low, f_dim_high, temp, inv_ray_dir, start, aabb;
for (int d = 0; d < 3; ++d) {
switch (d) {
case 0:
inv_ray_dir = __fdividef(1.0f, dir.x);
start = ori.x;
aabb = center.x;
break;
case 1:
inv_ray_dir = __fdividef(1.0f, dir.y);
start = ori.y;
aabb = center.y;
break;
case 2:
inv_ray_dir = __fdividef(1.0f, dir.z);
start = ori.z;
aabb = center.z;
break;
}
f_dim_low = (aabb - half_voxel - start) * inv_ray_dir;
f_dim_high = (aabb + half_voxel - start) * inv_ray_dir;
// Make sure low is less than high
if (f_dim_high < f_dim_low) {
temp = f_dim_low;
f_dim_low = f_dim_high;
f_dim_high = temp;
}
// If this dimension's high is less than the low we got then we definitely missed.
// Likewise if the low is less than the high.
if (f_dim_high < f_low || f_dim_low > f_high)
return make_float2(-1.0f, -1.0f);
// Add the clip from this dimension to the previous results
f_low = max(f_dim_low, f_low);
f_high = min(f_dim_high, f_high);
if (f_low >= f_high - 1e-5f)
return make_float2(-1.0f, -1.0f);
}
return make_float2(f_low, f_high);
}
__global__ void aabb_intersect_point_kernel(int b, int n, int m, float voxelsize, int n_max,
const float *__restrict__ ray_start,
const float *__restrict__ ray_dir,
const float *__restrict__ points, int *__restrict__ idx,
float *__restrict__ min_depth,
float *__restrict__ max_depth) {
int batch_index = blockIdx.x;
points += batch_index * n * 3;
ray_start += batch_index * m * 3;
ray_dir += batch_index * m * 3;
idx += batch_index * m * n_max;
min_depth += batch_index * m * n_max;
max_depth += batch_index * m * n_max;
int index = threadIdx.x;
int stride = blockDim.x;
float half_voxel = voxelsize * 0.5;
for (int j = index; j < m; j += stride) {
for (int l = 0; l < n_max; ++l) {
idx[j * n_max + l] = -1;
}
for (int k = 0, cnt = 0; k < n && cnt < n_max; ++k) {
float2 depths = RayAABBIntersection(
make_float3(ray_start[j * 3 + 0], ray_start[j * 3 + 1], ray_start[j * 3 + 2]),
make_float3(ray_dir[j * 3 + 0], ray_dir[j * 3 + 1], ray_dir[j * 3 + 2]),
make_float3(points[k * 3 + 0], points[k * 3 + 1], points[k * 3 + 2]), half_voxel);
if (depths.x > -1.0f) {
idx[j * n_max + cnt] = k;
min_depth[j * n_max + cnt] = depths.x;
max_depth[j * n_max + cnt] = depths.y;
++cnt;
}
}
}
}
__global__ void svo_intersect_point_kernel(int b, int n, int m, float voxelsize, int n_max,
const float *__restrict__ ray_start,
const float *__restrict__ ray_dir,
const float *__restrict__ points,
const int *__restrict__ children, int *__restrict__ idx,
float *__restrict__ min_depth,
float *__restrict__ max_depth) {
/*
TODO: this is an inefficient implementation of the
navie Ray -- Sparse Voxel Octree Intersection.
It can be further improved using:
Revelles, Jorge, Carlos Urena, and Miguel Lastra.
"An efficient parametric algorithm for octree traversal." (2000).
*/
int batch_index = blockIdx.x;
points += batch_index * n * 3;
children += batch_index * n * 9;
ray_start += batch_index * m * 3;
ray_dir += batch_index * m * 3;
idx += batch_index * m * n_max;
min_depth += batch_index * m * n_max;
max_depth += batch_index * m * n_max;
int index = threadIdx.x;
int stride = blockDim.x;
float half_voxel = voxelsize * 0.5;
for (int j = index; j < m; j += stride) {
for (int l = 0; l < n_max; ++l) {
idx[j * n_max + l] = -1;
}
int stack[256] = {-1}; // DFS, initialize the stack
int ptr = 0, cnt = 0, k = -1;
stack[ptr] = n - 1; // ROOT node is always the last
while (ptr > -1 && cnt < n_max) {
assert((ptr < 256));
// evaluate the current node
k = stack[ptr];
float2 depths = RayAABBIntersection(
make_float3(ray_start[j * 3 + 0], ray_start[j * 3 + 1], ray_start[j * 3 + 2]),
make_float3(ray_dir[j * 3 + 0], ray_dir[j * 3 + 1], ray_dir[j * 3 + 2]),
make_float3(points[k * 3 + 0], points[k * 3 + 1], points[k * 3 + 2]),
half_voxel * float(children[k * 9 + 8]));
stack[ptr] = -1;
ptr--;
if (depths.x > -1.0f) { // ray did not miss the voxel
// TODO: here it should be able to know which children is ok, further optimize the
// code
if (children[k * 9 + 8] == 1) { // this is a terminal node
idx[j * n_max + cnt] = k;
min_depth[j * n_max + cnt] = depths.x;
max_depth[j * n_max + cnt] = depths.y;
++cnt;
continue;
}
for (int u = 0; u < 8; u++) {
if (children[k * 9 + u] > -1) {
ptr++;
stack[ptr] = children[k * 9 + u]; // push child to the stack
}
}
}
}
}
}
__device__ float3 RayTriangleIntersection(const float3 &ori, const float3 &dir, const float3 &v0,
const float3 &v1, const float3 &v2, float blur) {
float3 v0v1 = v1 - v0;
float3 v0v2 = v2 - v0;
float3 v0O = ori - v0;
float3 dir_crs_v0v2 = cross(dir, v0v2);
float det = dot(v0v1, dir_crs_v0v2);
det = __fdividef(1.0f, det); // CUDA intrinsic function
float u = dot(v0O, dir_crs_v0v2) * det;
if ((u < 0.0f - blur) || (u > 1.0f + blur))
return make_float3(-1.0f, 0.0f, 0.0f);
float3 v0O_crs_v0v1 = cross(v0O, v0v1);
float v = dot(dir, v0O_crs_v0v1) * det;
if ((v < 0.0f - blur) || (v > 1.0f + blur))
return make_float3(-1.0f, 0.0f, 0.0f);
if (((u + v) < 0.0f - blur) || ((u + v) > 1.0f + blur))
return make_float3(-1.0f, 0.0f, 0.0f);
float t = dot(v0v2, v0O_crs_v0v1) * det;
return make_float3(t, u, v);
}
__global__ void triangle_intersect_point_kernel(int b, int n, int m, float cagesize, float blur,
int n_max, const float *__restrict__ ray_start,
const float *__restrict__ ray_dir,
const float *__restrict__ face_points,
int *__restrict__ idx, float *__restrict__ depth,
float *__restrict__ uv) {
int batch_index = blockIdx.x;
face_points += batch_index * n * 9;
ray_start += batch_index * m * 3;
ray_dir += batch_index * m * 3;
idx += batch_index * m * n_max;
depth += batch_index * m * n_max * 3;
uv += batch_index * m * n_max * 2;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < m; j += stride) {
// go over rays
for (int l = 0; l < n_max; ++l) {
idx[j * n_max + l] = -1;
}
int cnt = 0;
for (int k = 0; k < n && cnt < n_max; ++k) {
// go over triangles
float3 tuv = RayTriangleIntersection(
make_float3(ray_start[j * 3 + 0], ray_start[j * 3 + 1], ray_start[j * 3 + 2]),
make_float3(ray_dir[j * 3 + 0], ray_dir[j * 3 + 1], ray_dir[j * 3 + 2]),
make_float3(face_points[k * 9 + 0], face_points[k * 9 + 1], face_points[k * 9 + 2]),
make_float3(face_points[k * 9 + 3], face_points[k * 9 + 4], face_points[k * 9 + 5]),
make_float3(face_points[k * 9 + 6], face_points[k * 9 + 7], face_points[k * 9 + 8]),
blur);
if (tuv.x > 0) {
int ki = k;
float d = tuv.x, u = tuv.y, v = tuv.z;
// sort
for (int l = 0; l < cnt; l++) {
if (d < depth[j * n_max * 3 + l * 3]) {
swap(ki, idx[j * n_max + l]);
swap(d, depth[j * n_max * 3 + l * 3]);
swap(u, uv[j * n_max * 2 + l * 2]);
swap(v, uv[j * n_max * 2 + l * 2 + 1]);
}
}
idx[j * n_max + cnt] = ki;
depth[j * n_max * 3 + cnt * 3] = d;
uv[j * n_max * 2 + cnt * 2] = u;
uv[j * n_max * 2 + cnt * 2 + 1] = v;
cnt++;
}
}
for (int l = 0; l < cnt; l++) {
// compute min_depth
if (l == 0)
depth[j * n_max * 3 + l * 3 + 1] = -cagesize;
else
depth[j * n_max * 3 + l * 3 + 1] =
-fminf(cagesize,
.5 * (depth[j * n_max * 3 + l * 3] - depth[j * n_max * 3 + l * 3 - 3]));
// compute max_depth
if (l == cnt - 1)
depth[j * n_max * 3 + l * 3 + 2] = cagesize;
else
depth[j * n_max * 3 + l * 3 + 2] =
fminf(cagesize,
.5 * (depth[j * n_max * 3 + l * 3 + 3] - depth[j * n_max * 3 + l * 3]));
}
}
}
void ball_intersect_point_kernel_wrapper(int b, int n, int m, float radius, int n_max,
const float *ray_start, const float *ray_dir,
const float *points, int *idx, float *min_depth,
float *max_depth) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
ball_intersect_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, radius, n_max, ray_start, ray_dir, points, idx, min_depth, max_depth);
CUDA_CHECK_ERRORS();
}
void aabb_intersect_point_kernel_wrapper(int b, int n, int m, float voxelsize, int n_max,
const float *ray_start, const float *ray_dir,
const float *points, int *idx, float *min_depth,
float *max_depth) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
aabb_intersect_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, voxelsize, n_max, ray_start, ray_dir, points, idx, min_depth, max_depth);
CUDA_CHECK_ERRORS();
}
void svo_intersect_point_kernel_wrapper(int b, int n, int m, float voxelsize, int n_max,
const float *ray_start, const float *ray_dir,
const float *points, const int *children, int *idx,
float *min_depth, float *max_depth) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
svo_intersect_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, voxelsize, n_max, ray_start, ray_dir, points, children, idx, min_depth, max_depth);
CUDA_CHECK_ERRORS();
}
void triangle_intersect_point_kernel_wrapper(int b, int n, int m, float cagesize, float blur,
int n_max, const float *ray_start,
const float *ray_dir, const float *face_points,
int *idx, float *depth, float *uv) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
triangle_intersect_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, cagesize, blur, n_max, ray_start, ray_dir, face_points, idx, depth, uv);
CUDA_CHECK_ERRORS();
}
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "octree.h"
#include "utils.h"
#include <utility>
#include <chrono>
using namespace std::chrono;
typedef struct OcTree
{
int depth;
int index;
at::Tensor center;
struct OcTree *children[8];
void init(at::Tensor center, int d, int i) {
this->center = center;
this->depth = d;
this->index = i;
for (int i=0; i<8; i++) this->children[i] = nullptr;
}
}OcTree;
class EasyOctree {
public:
OcTree *root;
int total;
int terminal;
at::Tensor all_centers;
at::Tensor all_children;
EasyOctree(at::Tensor center, int depth) {
root = new OcTree;
root->init(center, depth, -1);
total = -1;
terminal = -1;
}
~EasyOctree() {
OcTree *p = root;
destory(p);
}
void destory(OcTree * &p);
void insert(OcTree * &p, at::Tensor point, int index);
void finalize();
std::pair<int, int> count(OcTree * &p);
};
void EasyOctree::destory(OcTree * &p){
if (p != nullptr) {
for (int i=0; i<8; i++) {
if (p->children[i] != nullptr) destory(p->children[i]);
}
delete p;
p = nullptr;
}
}
void EasyOctree::insert(OcTree * &p, at::Tensor point, int index) {
at::Tensor diff = (point > p->center).to(at::kInt);
int idx = diff[0].item<int>() + 2 * diff[1].item<int>() + 4 * diff[2].item<int>();
if (p->depth == 0) {
p->children[idx] = new OcTree;
p->children[idx]->init(point, -1, index);
} else {
if (p->children[idx] == nullptr) {
int length = 1 << (p->depth - 1);
at::Tensor new_center = p->center + (2 * diff - 1) * length;
p->children[idx] = new OcTree;
p->children[idx]->init(new_center, p->depth-1, -1);
}
insert(p->children[idx], point, index);
}
}
std::pair<int, int> EasyOctree::count(OcTree * &p) {
int total = 0, terminal = 0;
for (int i=0; i<8; i++) {
if (p->children[i] != nullptr) {
std::pair<int, int> sub = count(p->children[i]);
total += sub.first;
terminal += sub.second;
}
}
total += 1;
if (p->depth == -1) terminal += 1;
return std::make_pair(total, terminal);
}
void EasyOctree::finalize() {
std::pair<int, int> outs = count(root);
total = outs.first; terminal = outs.second;
all_centers =
torch::zeros({outs.first, 3}, at::device(root->center.device()).dtype(at::ScalarType::Int));
all_children =
-torch::ones({outs.first, 9}, at::device(root->center.device()).dtype(at::ScalarType::Int));
int node_idx = outs.first - 1;
root->index = node_idx;
std::queue<OcTree*> all_leaves; all_leaves.push(root);
while (!all_leaves.empty()) {
OcTree* node_ptr = all_leaves.front();
all_leaves.pop();
for (int i=0; i<8; i++) {
if (node_ptr->children[i] != nullptr) {
if (node_ptr->children[i]->depth > -1) {
node_idx--;
node_ptr->children[i]->index = node_idx;
}
all_leaves.push(node_ptr->children[i]);
all_children[node_ptr->index][i] = node_ptr->children[i]->index;
}
}
all_children[node_ptr->index][8] = 1 << (node_ptr->depth + 1);
all_centers[node_ptr->index] = node_ptr->center;
}
assert (node_idx == outs.second);
};
std::tuple<at::Tensor, at::Tensor> build_octree(at::Tensor center, at::Tensor points, int depth) {
auto start = high_resolution_clock::now();
EasyOctree tree(center, depth);
for (int k=0; k<points.size(0); k++)
tree.insert(tree.root, points[k], k);
tree.finalize();
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
printf("Building EasyOctree done. total #nodes = %d, terminal #nodes = %d (time taken %f s)\n",
tree.total, tree.terminal, float(duration.count()) / 1000000.);
return std::make_tuple(tree.all_centers, tree.all_children);
}
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include "sample.h"
#include "utils.h"
#include <utility>
void uniform_ray_sampling_kernel_wrapper(
int b, int num_rays, int max_hits, int max_steps, float step_size,
const int *pts_idx, const float *min_depth, const float *max_depth, const float *uniform_noise,
int *sampled_idx, float *sampled_depth, float *sampled_dists);
void inverse_cdf_sampling_kernel_wrapper(
int b, int num_rays, int max_hits, int max_steps, float fixed_step_size,
const int *pts_idx, const float *min_depth, const float *max_depth,
const float *uniform_noise, const float *probs, const float *steps,
int *sampled_idx, float *sampled_depth, float *sampled_dists);
std::tuple< at::Tensor, at::Tensor, at::Tensor> uniform_ray_sampling(
at::Tensor pts_idx, at::Tensor min_depth, at::Tensor max_depth, at::Tensor uniform_noise,
const float step_size, const int max_steps){
CHECK_CONTIGUOUS(pts_idx);
CHECK_CONTIGUOUS(min_depth);
CHECK_CONTIGUOUS(max_depth);
CHECK_CONTIGUOUS(uniform_noise);
CHECK_IS_FLOAT(min_depth);
CHECK_IS_FLOAT(max_depth);
CHECK_IS_FLOAT(uniform_noise);
CHECK_IS_INT(pts_idx);
CHECK_CUDA(pts_idx);
CHECK_CUDA(min_depth);
CHECK_CUDA(max_depth);
CHECK_CUDA(uniform_noise);
at::Tensor sampled_idx =
-torch::ones({pts_idx.size(0), pts_idx.size(1), max_steps},
at::device(pts_idx.device()).dtype(at::ScalarType::Int));
at::Tensor sampled_depth =
torch::zeros({min_depth.size(0), min_depth.size(1), max_steps},
at::device(min_depth.device()).dtype(at::ScalarType::Float));
at::Tensor sampled_dists =
torch::zeros({min_depth.size(0), min_depth.size(1), max_steps},
at::device(min_depth.device()).dtype(at::ScalarType::Float));
uniform_ray_sampling_kernel_wrapper(min_depth.size(0), min_depth.size(1), min_depth.size(2), sampled_depth.size(2),
step_size,
pts_idx.data_ptr <int>(), min_depth.data_ptr <float>(), max_depth.data_ptr <float>(),
uniform_noise.data_ptr <float>(), sampled_idx.data_ptr <int>(),
sampled_depth.data_ptr <float>(), sampled_dists.data_ptr <float>());
return std::make_tuple(sampled_idx, sampled_depth, sampled_dists);
}
std::tuple<at::Tensor, at::Tensor, at::Tensor> inverse_cdf_sampling(
at::Tensor pts_idx, at::Tensor min_depth, at::Tensor max_depth, at::Tensor uniform_noise,
at::Tensor probs, at::Tensor steps, float fixed_step_size) {
CHECK_CONTIGUOUS(pts_idx);
CHECK_CONTIGUOUS(min_depth);
CHECK_CONTIGUOUS(max_depth);
CHECK_CONTIGUOUS(probs);
CHECK_CONTIGUOUS(steps);
CHECK_CONTIGUOUS(uniform_noise);
CHECK_IS_FLOAT(min_depth);
CHECK_IS_FLOAT(max_depth);
CHECK_IS_FLOAT(uniform_noise);
CHECK_IS_FLOAT(probs);
CHECK_IS_FLOAT(steps);
CHECK_IS_INT(pts_idx);
CHECK_CUDA(pts_idx);
CHECK_CUDA(min_depth);
CHECK_CUDA(max_depth);
CHECK_CUDA(uniform_noise);
CHECK_CUDA(probs);
CHECK_CUDA(steps);
int max_steps = uniform_noise.size(-1);
at::Tensor sampled_idx =
-torch::ones({pts_idx.size(0), pts_idx.size(1), max_steps},
at::device(pts_idx.device()).dtype(at::ScalarType::Int));
at::Tensor sampled_depth =
torch::zeros({min_depth.size(0), min_depth.size(1), max_steps},
at::device(min_depth.device()).dtype(at::ScalarType::Float));
at::Tensor sampled_dists =
torch::zeros({min_depth.size(0), min_depth.size(1), max_steps},
at::device(min_depth.device()).dtype(at::ScalarType::Float));
inverse_cdf_sampling_kernel_wrapper(min_depth.size(0), min_depth.size(1), min_depth.size(2), sampled_depth.size(2), fixed_step_size,
pts_idx.data_ptr <int>(), min_depth.data_ptr <float>(), max_depth.data_ptr <float>(),
uniform_noise.data_ptr <float>(), probs.data_ptr <float>(), steps.data_ptr <float>(),
sampled_idx.data_ptr <int>(), sampled_depth.data_ptr <float>(), sampled_dists.data_ptr <float>());
return std::make_tuple(sampled_idx, sampled_depth, sampled_dists);
}
\ No newline at end of file
// Copyright (c) Facebook, Inc. and its affiliates.
//
// This source code is licensed under the MIT license found in the
// LICENSE file in the root directory of this source tree.
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "cutil_math.h" // required for float3 vector math
__global__ void uniform_ray_sampling_kernel(
int b, int num_rays,
int max_hits,
int max_steps,
float step_size,
const int *__restrict__ pts_idx,
const float *__restrict__ min_depth,
const float *__restrict__ max_depth,
const float *__restrict__ uniform_noise,
int *__restrict__ sampled_idx,
float *__restrict__ sampled_depth,
float *__restrict__ sampled_dists) {
int batch_index = blockIdx.x;
int index = threadIdx.x;
int stride = blockDim.x;
pts_idx += batch_index * num_rays * max_hits;
min_depth += batch_index * num_rays * max_hits;
max_depth += batch_index * num_rays * max_hits;
uniform_noise += batch_index * num_rays * max_steps;
sampled_idx += batch_index * num_rays * max_steps;
sampled_depth += batch_index * num_rays * max_steps;
sampled_dists += batch_index * num_rays * max_steps;
// loop over all rays
for (int j = index; j < num_rays; j += stride) {
int H = j * max_hits, K = j * max_steps;
int s = 0, ucur = 0, umin = 0, umax = 0;
float last_min_depth, last_max_depth, curr_depth;
// sort all depths
while (true) {
if ((umax == max_hits) || (ucur == max_steps) || (pts_idx[H + umax] == -1)) {
break; // reach the maximum
}
if (umin < max_hits) {
last_min_depth = min_depth[H + umin];
} else {
last_min_depth = 10000.0;
}
if (umax < max_hits) {
last_max_depth = max_depth[H + umax];
} else {
last_max_depth = 10000.0;
}
if (ucur < max_steps) {
curr_depth = min_depth[H] + (float(ucur) + uniform_noise[K + ucur]) * step_size;
}
if ((last_max_depth <= curr_depth) && (last_max_depth <= last_min_depth)) {
sampled_depth[K + s] = last_max_depth;
sampled_idx[K + s] = pts_idx[H + umax];
umax++; s++; continue;
}
if ((curr_depth <= last_min_depth) && (curr_depth <= last_max_depth)) {
sampled_depth[K + s] = curr_depth;
sampled_idx[K + s] = pts_idx[H + umin - 1];
ucur++; s++; continue;
}
if ((last_min_depth <= curr_depth) && (last_min_depth <= last_max_depth)) {
sampled_depth[K + s] = last_min_depth;
sampled_idx[K + s] = pts_idx[H + umin];
umin++; s++; continue;
}
}
float l_depth, r_depth;
int step = 0;
for (ucur = 0, umin = 0, umax = 0; ucur < max_steps - 1; ucur++) {
if (sampled_idx[K + ucur + 1] == -1) break;
l_depth = sampled_depth[K + ucur];
r_depth = sampled_depth[K + ucur + 1];
sampled_depth[K + ucur] = (l_depth + r_depth) * .5;
sampled_dists[K + ucur] = (r_depth - l_depth);
if ((umin < max_hits) && (sampled_depth[K + ucur] >= min_depth[H + umin]) && (pts_idx[H + umin] > -1)) umin++;
if ((umax < max_hits) && (sampled_depth[K + ucur] >= max_depth[H + umax]) && (pts_idx[H + umax] > -1)) umax++;
if ((umax == max_hits) || (pts_idx[H + umax] == -1)) break;
if ((umin - 1 == umax) && (sampled_dists[K + ucur] > 0)) {
sampled_depth[K + step] = sampled_depth[K + ucur];
sampled_dists[K + step] = sampled_dists[K + ucur];
sampled_idx[K + step] = sampled_idx[K + ucur];
step++;
}
}
for (int s = step; s < max_steps; s++) {
sampled_idx[K + s] = -1;
}
}
}
__global__ void inverse_cdf_sampling_kernel(
int b, int num_rays,
int max_hits,
int max_steps,
float fixed_step_size,
const int *__restrict__ pts_idx,
const float *__restrict__ min_depth,
const float *__restrict__ max_depth,
const float *__restrict__ uniform_noise,
const float *__restrict__ probs,
const float *__restrict__ steps,
int *__restrict__ sampled_idx,
float *__restrict__ sampled_depth,
float *__restrict__ sampled_dists) {
int batch_index = blockIdx.x;
int index = threadIdx.x;
int stride = blockDim.x;
pts_idx += batch_index * num_rays * max_hits;
min_depth += batch_index * num_rays * max_hits;
max_depth += batch_index * num_rays * max_hits;
probs += batch_index * num_rays * max_hits;
steps += batch_index * num_rays;
uniform_noise += batch_index * num_rays * max_steps;
sampled_idx += batch_index * num_rays * max_steps;
sampled_depth += batch_index * num_rays * max_steps;
sampled_dists += batch_index * num_rays * max_steps;
// loop over all rays
for (int j = index; j < num_rays; j += stride) {
int H = j * max_hits, K = j * max_steps;
int curr_bin = 0, s = 0; // current index (bin)
float curr_min_depth = min_depth[H]; // lower depth
float curr_max_depth = max_depth[H]; // upper depth
float curr_min_cdf = 0;
float curr_max_cdf = probs[H];
float step_size = 1.0 / steps[j];
float z_low = curr_min_depth;
int total_steps = int(ceil(steps[j]));
bool done = false;
// optional use a fixed step size
if (fixed_step_size > 0.0) step_size = fixed_step_size;
// sample points
for (int curr_step = 0; curr_step < total_steps; curr_step++) {
float curr_cdf = (float(curr_step) + uniform_noise[K + curr_step]) * step_size;
while (curr_cdf > curr_max_cdf) {
// first include max cdf
sampled_idx[K + s] = pts_idx[H + curr_bin];
sampled_dists[K + s] = (curr_max_depth - z_low);
sampled_depth[K + s] = (curr_max_depth + z_low) * .5;
// move to next cdf
curr_bin++;
s++;
if ((curr_bin >= max_hits) || (pts_idx[H + curr_bin] == -1)) {
done = true; break;
}
curr_min_depth = min_depth[H + curr_bin];
curr_max_depth = max_depth[H + curr_bin];
curr_min_cdf = curr_max_cdf;
curr_max_cdf = curr_max_cdf + probs[H + curr_bin];
z_low = curr_min_depth;
}
if (done) break;
// if the sampled cdf is inside bin
float u = (curr_cdf - curr_min_cdf) / (curr_max_cdf - curr_min_cdf);
float z = curr_min_depth + u * (curr_max_depth - curr_min_depth);
sampled_idx[K + s] = pts_idx[H + curr_bin];
sampled_dists[K + s] = (z - z_low);
sampled_depth[K + s] = (z + z_low) * .5;
z_low = z; s++;
}
// if there are bins still remained
while ((z_low < curr_max_depth) && (~done)) {
sampled_idx[K + s] = pts_idx[H + curr_bin];
sampled_dists[K + s] = (curr_max_depth - z_low);
sampled_depth[K + s] = (curr_max_depth + z_low) * .5;
curr_bin++;
s++;
if ((curr_bin >= max_hits) || (pts_idx[curr_bin] == -1))
break;
curr_min_depth = min_depth[H + curr_bin];
curr_max_depth = max_depth[H + curr_bin];
z_low = curr_min_depth;
}
}
}
void uniform_ray_sampling_kernel_wrapper(
int b, int num_rays, int max_hits, int max_steps, float step_size,
const int *pts_idx, const float *min_depth, const float *max_depth, const float *uniform_noise,
int *sampled_idx, float *sampled_depth, float *sampled_dists) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
uniform_ray_sampling_kernel<<<b, opt_n_threads(num_rays), 0, stream>>>(
b, num_rays, max_hits, max_steps, step_size, pts_idx,
min_depth, max_depth, uniform_noise, sampled_idx, sampled_depth, sampled_dists);
CUDA_CHECK_ERRORS();
}
void inverse_cdf_sampling_kernel_wrapper(
int b, int num_rays, int max_hits, int max_steps, float fixed_step_size,
const int *pts_idx, const float *min_depth, const float *max_depth,
const float *uniform_noise, const float *probs, const float *steps,
int *sampled_idx, float *sampled_depth, float *sampled_dists) {
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
inverse_cdf_sampling_kernel<<<b, opt_n_threads(num_rays), 0, stream>>>(
b, num_rays, max_hits, max_steps, fixed_step_size,
pts_idx, min_depth, max_depth, uniform_noise, probs, steps,
sampled_idx, sampled_depth, sampled_dists);
CUDA_CHECK_ERRORS();
}
\ No newline at end of file
{
"model": "NeRF",
"args": {
"color": "rgb",
"n_pot_encode": 10,
"n_dir_encode": 4,
"fc_params": {
"nf": 256,
"n_layers": 8,
"activation": "relu",
"skips": [ 4 ]
},
"n_featdim": 0,
"sample_range": [0, 10],
"n_samples": 256,
"perturb_sample": true,
"spherical": false,
"lindisp": false,
"raymarching_tolerance": 0,
"raymarching_chunk_size": -1
}
}
\ No newline at end of file
{
"model": "NeRF",
"args": {
"color": "rgb",
"n_pot_encode": 10,
"n_dir_encode": 4,
"fc_params": {
"nf": 256,
"n_layers": 8,
"activation": "relu",
"skips": [ 4 ]
},
"n_featdim": 0,
"space": "voxels",
"voxel_size": 0.5,
"sample_range": [0, 10],
"n_samples": 50,
"perturb_sample": true,
"spherical": false,
"lindisp": false,
"raymarching_tolerance": 0,
"raymarching_chunk_size": -1
}
}
\ No newline at end of file
{
"model": "NSVF",
"args": {
"color": "rgb",
"n_pot_encode": 10,
"n_dir_encode": 4,
"fc_params": {
"nf": 128,
"n_layers": 4,
"activation": "relu",
"skips": [ 4 ]
},
"n_featdim": 0,
"space": "octree",
"voxel_size": 0.5,
"sample_step_ratio": 0.2,
"perturb_sample": true,
"raymarching_tolerance": 0,
"raymarching_chunk_size": -1
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment