Common.h 3.02 KB
Newer Older
Nianchen Deng's avatar
Nianchen Deng committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#pragma once
#include <memory>
#include <stdexcept>
#include <vector>
#include <string>
#include <sstream>
#include <GL/glew.h>
#include <cuda_gl_interop.h>
#include "../glm/glm.hpp"
#include "Logger.h"

inline unsigned int getElementSize(nv::DataType t)
{
	switch (t)
	{
	case nv::DataType::kINT32:
		return 4;
	case nv::DataType::kFLOAT:
		return 4;
	case nv::DataType::kHALF:
		return 2;
	case nv::DataType::kBOOL:
	case nv::DataType::kINT8:
		return 1;
	}
	throw std::runtime_error("Invalid DataType.");
	return 0;
}

template <typename T>
void dumpRow(std::ostream &os, T* buf, size_t n)
{
	os << buf[0];
	for (size_t i = 1; i < n; ++i) {
		os << " " << buf[i];
	}
	os << std::endl;
}

template <typename T>
void dumpHostBuffer(std::ostream &os, void *buf, size_t bufSize, size_t rowCount, size_t maxDumpRows = 0)
{
	T *typedBuf = static_cast<T *>(buf);
	size_t numItems = bufSize / sizeof(T);
	size_t nInLastRow = numItems % rowCount;
	size_t rows;
	if (nInLastRow == 0) {
		rows = numItems / rowCount;
		nInLastRow = rowCount;
	} else {
		rows = numItems / rowCount + 1;
	}
	if (maxDumpRows == 0) {
		for (size_t i = 0; i < rows - 1; ++i) {
			dumpRow(os, typedBuf, rowCount);
			typedBuf += rowCount;
		}
		dumpRow(os, typedBuf, nInLastRow);
	} else {
		for (size_t i = 0; i < maxDumpRows / 2; ++i)
			dumpRow(os, typedBuf + i * rowCount, rowCount);
		os << "..." << std::endl;
		for (size_t i = rows - maxDumpRows + maxDumpRows / 2; i < rows - 1; ++i)
			dumpRow(os, typedBuf + i * rowCount, rowCount);
		dumpRow(os, typedBuf + (rows - 1) * rowCount, nInLastRow);
	}
}

class CudaStream
{
public:
	CudaStream()
	{
		cudaStreamCreate(&stream);
	}

	operator cudaStream_t()
	{
		return stream;
	}

	virtual ~CudaStream()
	{
		cudaStreamDestroy(stream);
	}

private:
	cudaStream_t stream;
};

class CudaEvent
{
public:
	CudaEvent()
	{
		cudaEventCreate(&mEvent);
	}

	operator cudaEvent_t()
	{
		return mEvent;
	}

	virtual ~CudaEvent()
	{
		cudaEventDestroy(mEvent);
	}

private:
	cudaEvent_t mEvent;
};

struct CudaMapScope
{
	std::vector<cudaGraphicsResource_t> resources_;
	cudaStream_t stream_;

	CudaMapScope(const std::vector<cudaGraphicsResource_t> &resources,
				 cudaStream_t stream = nullptr) : resources_(resources), stream_(stream) {}

	~CudaMapScope()
	{
		if (!resources_.empty())
			cudaGraphicsUnmapResources(resources_.size(),
									   resources_.data(), stream_);
	}

	cudaError_t map()
	{
		if (!resources_.empty())
			return cudaGraphicsMapResources(resources_.size(),
											resources_.data(), stream_);
		return cudaSuccess;
	}
};

template <typename T>
struct Destroy
{
	void operator()(T *t)
	{
		if (t != nullptr)
			t->destroy();
	}
};

template <class T>
using uptr = std::unique_ptr<T, ::Destroy<T>>;
template <class T>
using sptr = std::shared_ptr<T>;

#define INTERVAL(__start__, __end__) (((__end__) - (__start__)) / (float)CLOCKS_PER_SEC * 1000)

#include "Resource.h"
#include "Formatter.h"