template void dumpArray(std::ostream &so, CudaArray &arr, size_t maxDumpRows = 0, size_t elemsPerRow = 1) { int chns = sizeof(T) / sizeof(T2); T2 *hostArr = new T2[arr.n() * chns]; cudaMemcpy(hostArr, arr.getBuffer(), arr.n() * sizeof(T), cudaMemcpyDeviceToHost); dumpHostBuffer(so, hostArr, arr.n() * sizeof(T), chns * elemsPerRow, maxDumpRows); delete[] hostArr; }