Encoder.cu 2.94 KB
Newer Older
Nianchen Deng's avatar
Nianchen Deng committed
1
#include "Encoder.h"
Nianchen Deng's avatar
sync    
Nianchen Deng committed
2
#include "../utils/cuda.h"
Nianchen Deng's avatar
Nianchen Deng committed
3

Nianchen Deng's avatar
sync    
Nianchen Deng committed
4
5
/// idx3.z = 0: x, y, z, sin(x), sin(y), sin(z), cos(x), cos(y), cos(z)
/// idx3.z = 1: sin(2x), sin(2y), sin(2z), cos(2x), cos(2y), cos(2z)
Nianchen Deng's avatar
Nianchen Deng committed
6
/// ...
Nianchen Deng's avatar
sync    
Nianchen Deng committed
7
/// idx3.z = n_freq-1: sin(2^(n_freq-1)x), sin(2^(n_freq-1)y), sin(2^(n_freq-1)z),
Nianchen Deng's avatar
Nianchen Deng committed
8
///                    cos(2^(n_freq-1)x), cos(2^(n_freq-1)y), cos(2^(n_freq-1)z)
Nianchen Deng's avatar
Nianchen Deng committed
9
10
11
12
13
14
15
16
17
18
19
/// Dispatch (n, in_chns, n_freqs)
__global__ void cu_encode0(float *output, float *input, uint n, uint nFreqs) {
    glm::uvec3 idx3 = IDX3;
    if (idx3.x >= n)
        return;
    uint inChns = blockDim.y;
    uint outChns = inChns * (nFreqs * 2 + 1);
    uint i = idx3.x, chn = idx3.y;
    output[i * outChns + chn] = input[i * inChns + chn];
}

Nianchen Deng's avatar
sync    
Nianchen Deng committed
20
__global__ void cu_encode(float *output, float *input, float *freqs, uint n) {
Nianchen Deng's avatar
Nianchen Deng committed
21
    glm::uvec3 idx3 = IDX3;
Nianchen Deng's avatar
sync    
Nianchen Deng committed
22
    if (idx3.x >= n)
Nianchen Deng's avatar
Nianchen Deng committed
23
        return;
Nianchen Deng's avatar
sync    
Nianchen Deng committed
24
    uint inChns = blockDim.y, nFreqs = blockDim.z;
Nianchen Deng's avatar
sync    
Nianchen Deng committed
25
26
27
28
    uint i = idx3.x, chn = idx3.y, freq = idx3.z;
    uint elem = i * inChns + chn;
    uint outChns = inChns * (nFreqs * 2 + 1);
    uint base = i * outChns + chn;
Nianchen Deng's avatar
Nianchen Deng committed
29
    if (freq == 0)
Nianchen Deng's avatar
sync    
Nianchen Deng committed
30
31
32
33
34
35
        output[base] = input[elem];
    float x = freqs[freq] * input[elem];
    float s, c;
    __sincosf(x, &s, &c);
    output[base + inChns * (freq * 2 + 1)] = s;
    output[base + inChns * (freq * 2 + 2)] = c;
Nianchen Deng's avatar
Nianchen Deng committed
36
37
}

Nianchen Deng's avatar
Nianchen Deng committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
__global__ void cu_encode2(glm::vec2 *output, glm::vec2 *input, float *freqs, uint n) {
    glm::uvec3 idx3 = IDX3;
    if (idx3.x >= n)
        return;
    uint nFreqs = blockDim.y;
    uint i = idx3.x, freq = idx3.y;
    uint outChns = nFreqs * 2 + 1;
    uint base = i * outChns;
    if (freq == 0)
        output[base] = input[i];
    glm::vec2 x = freqs[freq] * input[i];
    glm::vec2 s, c;
    __sincosf(x.x, &s.x, &c.x);
    __sincosf(x.y, &s.y, &c.y);
    output[base + (freq * 2 + 1)] = s;
    output[base + (freq * 2 + 2)] = c;
}

/**
 * @brief
 *
 * @param output encoded data, n x out_chns
 * @param input coord data, n x in_chns
 */
Nianchen Deng's avatar
sync    
Nianchen Deng committed
62
void Encoder::encode(sptr<CudaArray<float>> output, sptr<CudaArray<float>> input) {
Nianchen Deng's avatar
Nianchen Deng committed
63
64
65
66
67
    std::ostringstream sout;
    sout << "Encoder => input size: (" << input->n() / _chns << ", " << _chns << "), output size: ("
         << output->n() / outDim() << ", " << outDim() << ")";
    Logger::instance.info(sout.str());
    uint n = input->n() / _chns;
Nianchen Deng's avatar
sync    
Nianchen Deng committed
68
    dim3 blkSize(1024 / _chns / _multires, _chns, _multires);
Nianchen Deng's avatar
Nianchen Deng committed
69
70
71
72
73
    dim3 grdSize(ceilDiv(n, blkSize.x), 1, 1);
    CU_INVOKE(cu_encode)(*output, *input, *_freqs, n);
    blkSize = dim3(1024 / _chns, _chns);
    grdSize = dim3(ceilDiv(n, blkSize.x), 1, 1);
    // CU_INVOKE(cu_encode0)(*output, *input, n, _multires);
Nianchen Deng's avatar
Nianchen Deng committed
74
75
76
    CHECK_EX(cudaGetLastError());
}

Nianchen Deng's avatar
sync    
Nianchen Deng committed
77
void Encoder::_genFreqArray() {
Nianchen Deng's avatar
Nianchen Deng committed
78
79
80
81
82
83
84
85
    float *arr = new float[_multires];
    arr[0] = 1.0f;
    for (auto i = 1; i < _multires; ++i)
        arr[i] = arr[i - 1] * 2.0f;
    _freqs = sptr<CudaArray<float>>(new CudaArray<float>(_multires));
    cudaMemcpy(_freqs->getBuffer(), arr, _multires * sizeof(float), cudaMemcpyHostToDevice);
    delete[] arr;
}