working buffering
This commit is contained in:
parent
109566d7b6
commit
c4a2ce1b44
2
Makefile
2
Makefile
|
|
@ -1,6 +1,6 @@
|
||||||
# Compiler and flags
|
# Compiler and flags
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs)
|
CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs) -g -G
|
||||||
COMPILE_OBJ_FLAGS = --device-c
|
COMPILE_OBJ_FLAGS = --device-c
|
||||||
|
|
||||||
# Directories
|
# Directories
|
||||||
|
|
|
||||||
|
|
@ -43,12 +43,13 @@ void DataReader::loadFile(T* dataOut, size_t fileIndex) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& variableName) {
|
void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) {
|
||||||
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
NcVar var = vars.find(variableName)->second;
|
NcVar var = vars.find(varName)->second;
|
||||||
|
cout << "var = " << varName << "with size = " << var.getDim(0).getSize() << "\n";
|
||||||
|
|
||||||
var.getVar(dataOut);
|
var.getVar(dataOut);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,6 @@ public:
|
||||||
|
|
||||||
size_t axisLength(size_t fileIndex, const std::string& axisName);
|
size_t axisLength(size_t fileIndex, const std::string& axisName);
|
||||||
|
|
||||||
// template <typename T>
|
|
||||||
// void readAndAllocateAxis(T** axis_ptr, size_t *size, NcVar var) {
|
|
||||||
|
|
||||||
~DataReader();
|
~DataReader();
|
||||||
private:
|
private:
|
||||||
FilePathManager filePathManager;
|
FilePathManager filePathManager;
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,12 @@ __device__ float getVal(
|
||||||
const size_t &latInd,
|
const size_t &latInd,
|
||||||
const size_t &lonInd
|
const size_t &lonInd
|
||||||
) {
|
) {
|
||||||
|
size_t valArrInd = (d.fieldInd + timeInd) / md.numberOfTimeStepsPerFile;
|
||||||
|
size_t trueTimeInd = (d.fieldInd + timeInd) % md.numberOfTimeStepsPerFile;
|
||||||
size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize;
|
size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize;
|
||||||
size_t size2DMapData = md.widthSize*md.heightSize;
|
size_t size2DMapData = md.widthSize*md.heightSize;
|
||||||
return d.valArrays[0][
|
return d.valArrays[valArrInd][
|
||||||
timeInd*sizeSpatialData
|
trueTimeInd*sizeSpatialData
|
||||||
+ levInd*size2DMapData
|
+ levInd*size2DMapData
|
||||||
+ latInd*md.widthSize
|
+ latInd*md.widthSize
|
||||||
+ lonInd
|
+ lonInd
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,10 @@ struct FieldMetadata {
|
||||||
double *lons;
|
double *lons;
|
||||||
double *lats;
|
double *lats;
|
||||||
double *levs;
|
double *levs;
|
||||||
|
|
||||||
|
size_t timeSize; // Number of different times
|
||||||
|
|
||||||
|
size_t numberOfTimeStepsPerFile;
|
||||||
};
|
};
|
||||||
|
|
||||||
using FieldMetadata = FieldMetadata;
|
using FieldMetadata = FieldMetadata;
|
||||||
|
|
@ -21,16 +25,15 @@ using FieldMetadata = FieldMetadata;
|
||||||
struct FieldData {
|
struct FieldData {
|
||||||
static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct.
|
static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct.
|
||||||
|
|
||||||
// An array of length FILESNUM storing pointers to 4D arrays stored in device memory.
|
// A uniform array of length FILESNUM storing pointers to 4D arrays stored in device memory.
|
||||||
float *valArrays[FILESNUM];
|
float **valArrays;
|
||||||
|
|
||||||
size_t timeSize; // Number of different times
|
size_t fieldInd;
|
||||||
// times is a managed Unified Memory array of size timeSize that indicates
|
|
||||||
// that getVal(md, d, t, i, j, k) is a value at time times[t].
|
// times is a managed Unified Memory array of size (FILESNUM, numberOfTimeStepsPerFile)
|
||||||
int *times;
|
int **times;
|
||||||
};
|
};
|
||||||
|
|
||||||
using FieldData = FieldData;
|
|
||||||
|
|
||||||
extern __device__ float getVal(
|
extern __device__ float getVal(
|
||||||
const FieldMetadata &md,
|
const FieldMetadata &md,
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,8 @@ struct File {
|
||||||
size_t size;
|
size_t size;
|
||||||
float *h_data; // host data
|
float *h_data; // host data
|
||||||
float *d_data; // device data
|
float *d_data; // device data
|
||||||
|
int *times;
|
||||||
|
size_t timeSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LoadFileJob {
|
struct LoadFileJob {
|
||||||
|
|
@ -53,6 +55,9 @@ public:
|
||||||
size_t getSize(size_t fileIndex); // Most probably blocking
|
size_t getSize(size_t fileIndex); // Most probably blocking
|
||||||
void getAxis(GetAxisDoubleJob job);
|
void getAxis(GetAxisDoubleJob job);
|
||||||
void getAxis(GetAxisIntJob job);
|
void getAxis(GetAxisIntJob job);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::pair<size_t, T *> getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking
|
||||||
~impl();
|
~impl();
|
||||||
|
|
||||||
File buffer[numBufferedFiles];
|
File buffer[numBufferedFiles];
|
||||||
|
|
@ -84,37 +89,44 @@ size_t GPUBuffer::impl::getSize(size_t fileIndex) {
|
||||||
return future.get();
|
return future.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <typename T>
|
||||||
pair<size_t, double *> GPUBuffer::getAxis(size_t fileIndex, const string& axisName) {
|
pair<size_t, T *> GPUBuffer::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
promise<pair<size_t, double *>> promise;
|
return pImpl->getAxis<T>(fileIndex, axisName);
|
||||||
future<pair<size_t, double *>> future = promise.get_future();
|
|
||||||
{
|
|
||||||
lock_guard<mutex> lk(pImpl->queuecv_m);
|
|
||||||
pImpl->jobs.push(GetAxisDoubleJob{fileIndex, axisName, move(promise)});
|
|
||||||
}
|
|
||||||
pImpl->queuecv.notify_all();
|
|
||||||
|
|
||||||
future.wait();
|
|
||||||
|
|
||||||
return future.get();
|
|
||||||
}
|
}
|
||||||
|
template pair<size_t, int *> GPUBuffer::getAxis<int>(size_t fileIndex, const string& axisName);
|
||||||
template pair<size_t, double *> GPUBuffer::getAxis<double>(size_t fileIndex, const string& axisName);
|
template pair<size_t, double *> GPUBuffer::getAxis<double>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
pair<size_t, int *> GPUBuffer::getAxis(size_t fileIndex, const string& axisName) {
|
pair<size_t, double *> GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
promise<pair<size_t, int *>> promise;
|
promise<pair<size_t, double *>> promise;
|
||||||
future<pair<size_t, int *>> future = promise.get_future();
|
future<pair<size_t, double *>> future = promise.get_future();
|
||||||
{
|
{
|
||||||
lock_guard<mutex> lk(pImpl->queuecv_m);
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
pImpl->jobs.push(GetAxisIntJob{fileIndex, axisName, move(promise)});
|
jobs.push(GetAxisDoubleJob{fileIndex, axisName, move(promise)});
|
||||||
}
|
}
|
||||||
pImpl->queuecv.notify_all();
|
queuecv.notify_all();
|
||||||
|
|
||||||
future.wait();
|
future.wait();
|
||||||
|
|
||||||
return future.get();
|
return future.get();
|
||||||
}
|
}
|
||||||
template pair<size_t, int *> GPUBuffer::getAxis<int>(size_t fileIndex, const string& axisName);
|
template pair<size_t, double *> GPUBuffer::impl::getAxis<double>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
|
template <>
|
||||||
|
pair<size_t, int *> GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
|
promise<pair<size_t, int *>> promise;
|
||||||
|
future<pair<size_t, int *>> future = promise.get_future();
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
|
jobs.push(GetAxisIntJob{fileIndex, axisName, move(promise)});
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
|
||||||
|
future.wait();
|
||||||
|
|
||||||
|
return future.get();
|
||||||
|
}
|
||||||
|
template pair<size_t, int *> GPUBuffer::impl::getAxis<int>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
|
GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
|
||||||
cudaStreamCreate(&iostream);
|
cudaStreamCreate(&iostream);
|
||||||
|
|
@ -122,34 +134,26 @@ GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
|
||||||
ioworker = make_unique<thread>([this]() { worker(); });
|
ioworker = make_unique<thread>([this]() { worker(); });
|
||||||
|
|
||||||
size_t size = getSize(0);
|
size_t size = getSize(0);
|
||||||
cout << "size = " << size << "\n";
|
auto x = getAxis<int>(0, "time");
|
||||||
|
size_t sizeTime = x.first;
|
||||||
|
cudaFree(x.second);
|
||||||
for (size_t i = 0; i < numBufferedFiles; i++) {
|
for (size_t i = 0; i < numBufferedFiles; i++) {
|
||||||
{
|
{
|
||||||
File &file = buffer[i];
|
File &file = buffer[i];
|
||||||
lock_guard<mutex> lk(file.m);
|
lock_guard<mutex> lk(file.m);
|
||||||
cudaMallocHost(&file.h_data, sizeof(float)*size);
|
cudaMallocHost(&file.h_data, sizeof(float)*size);
|
||||||
cudaError_t status = cudaMalloc(&file.d_data, sizeof(float)*size);
|
cudaMalloc(&file.d_data, sizeof(float)*size);
|
||||||
if (status != cudaSuccess)
|
cudaMallocManaged(&file.times, sizeof(int)*sizeTime);
|
||||||
cerr << "Error allocating device memory. Status code: " << status << "\n";
|
|
||||||
file.size = size;
|
file.size = size;
|
||||||
file.valid = false;
|
file.valid = false;
|
||||||
|
file.timeSize = sizeTime;
|
||||||
}
|
}
|
||||||
loadFile(i, i);
|
// loadFile(i, i);
|
||||||
{
|
|
||||||
// lock_guard<mutex> lk(queuecv_m);
|
|
||||||
// LoadFileJob job = {
|
|
||||||
// .fileIndex = i,
|
|
||||||
// .bufferIndex = i
|
|
||||||
// };
|
|
||||||
// cout << "enqueue file load job\n";
|
|
||||||
// jobs.push(job);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUBuffer::~GPUBuffer() { }
|
GPUBuffer::~GPUBuffer() {
|
||||||
|
}
|
||||||
|
|
||||||
GPUBuffer::impl::~impl() {
|
GPUBuffer::impl::~impl() {
|
||||||
{
|
{
|
||||||
|
|
@ -162,6 +166,7 @@ GPUBuffer::impl::~impl() {
|
||||||
File &file = buffer[i];
|
File &file = buffer[i];
|
||||||
cudaFree(file.d_data);
|
cudaFree(file.d_data);
|
||||||
cudaFree(file.h_data);
|
cudaFree(file.h_data);
|
||||||
|
cudaFree(file.times);
|
||||||
}
|
}
|
||||||
cudaStreamDestroy(iostream);
|
cudaStreamDestroy(iostream);
|
||||||
}
|
}
|
||||||
|
|
@ -172,11 +177,12 @@ void GPUBuffer::impl::loadFile(LoadFileJob job) {
|
||||||
{
|
{
|
||||||
lock_guard<mutex> lk(file.m);
|
lock_guard<mutex> lk(file.m);
|
||||||
assert(!file.valid);
|
assert(!file.valid);
|
||||||
|
dataReader.loadFile<int>(file.times, job.fileIndex, "time"); // TODO: Times dont store anything useful :(
|
||||||
|
cout << "times[1] (inside inside) " << file.times[1] << " for file with fileindex = " << job.fileIndex << "\n";
|
||||||
dataReader.loadFile<float>(file.h_data, job.fileIndex);
|
dataReader.loadFile<float>(file.h_data, job.fileIndex);
|
||||||
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
|
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
|
||||||
cudaStreamSynchronize(iostream);
|
cudaStreamSynchronize(iostream);
|
||||||
buffer[job.bufferIndex].valid = true;
|
buffer[job.bufferIndex].valid = true;
|
||||||
cout << "loaded file with index" << job.bufferIndex << "\n";
|
|
||||||
}
|
}
|
||||||
file.cv.notify_all();
|
file.cv.notify_all();
|
||||||
}
|
}
|
||||||
|
|
@ -190,7 +196,7 @@ void GPUBuffer::impl::getAxis(GetAxisDoubleJob job) {
|
||||||
pair<size_t, double *> array;
|
pair<size_t, double *> array;
|
||||||
array.first = dataReader.axisLength(job.fileIndex, job.axisName);
|
array.first = dataReader.axisLength(job.fileIndex, job.axisName);
|
||||||
cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(double));
|
cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(double));
|
||||||
dataReader.loadFile<double>(array.second, job.fileIndex);
|
dataReader.loadFile<double>(array.second, job.fileIndex, job.axisName);
|
||||||
job.axis.set_value(array);
|
job.axis.set_value(array);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -272,6 +278,8 @@ DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) {
|
||||||
|
|
||||||
DataHandle dh = {
|
DataHandle dh = {
|
||||||
.d_data = file.d_data,
|
.d_data = file.d_data,
|
||||||
|
.times = file.times,
|
||||||
|
.timeSize = file.timeSize,
|
||||||
.size = file.size
|
.size = file.size
|
||||||
};
|
};
|
||||||
return dh;
|
return dh;
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,9 @@
|
||||||
#include "datareader.h"
|
#include "datareader.h"
|
||||||
|
|
||||||
struct DataHandle {
|
struct DataHandle {
|
||||||
float *d_data;
|
float *d_data; // Device memory
|
||||||
|
int* times; // Uniform memory
|
||||||
|
size_t timeSize;
|
||||||
size_t size;
|
size_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,60 +1,93 @@
|
||||||
#include "gpubufferhandler.h"
|
#include "gpubufferhandler.h"
|
||||||
#include "fielddata.h"
|
#include "fielddata.h"
|
||||||
#include "gpubufferhelper.h"
|
|
||||||
|
|
||||||
#include <netcdf>
|
#include <iostream>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace netCDF;
|
|
||||||
|
|
||||||
GPUBufferHandler::GPUBufferHandler(const std::string &path, std::string variableName):
|
|
||||||
filePathManager(path), variableName(variableName), presentTimeIndex(0), fileIndex(0) {
|
|
||||||
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
|
||||||
|
|
||||||
|
GPUBufferHandler::GPUBufferHandler(GPUBuffer& gpuBuffer):
|
||||||
|
gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) {
|
||||||
cudaMallocManaged(&fmd, sizeof(FieldMetadata));
|
cudaMallocManaged(&fmd, sizeof(FieldMetadata));
|
||||||
|
|
||||||
readAndAllocateAxis<double>(&fmd->lons, &fmd->widthSize, vars.find("lon")->second);
|
auto [widthSize, lons] = gpuBuffer.getAxis<double>(0, "lon");
|
||||||
readAndAllocateAxis<double>(&fmd->lats, &fmd->heightSize, vars.find("lat")->second);
|
fmd->widthSize = widthSize;
|
||||||
readAndAllocateAxis<double>(&fmd->levs, &fmd->depthSize, vars.find("lev")->second);
|
fmd->lons = lons;
|
||||||
|
|
||||||
|
auto [heightSize, lats] = gpuBuffer.getAxis<double>(0, "lat");
|
||||||
|
fmd->heightSize = heightSize;
|
||||||
|
fmd->lats = lats;
|
||||||
|
|
||||||
|
auto [depthSize, levs] = gpuBuffer.getAxis<double>(0, "lev");
|
||||||
|
fmd->depthSize = depthSize;
|
||||||
|
fmd->levs = levs;
|
||||||
|
|
||||||
|
|
||||||
|
for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
|
||||||
|
gpuBuffer.loadFile(fileInd,fileInd);
|
||||||
|
fileInd++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fmd->numberOfTimeStepsPerFile = 4; // TODO: Maybe find a better way to do this.
|
||||||
|
fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField;
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
|
||||||
|
|
||||||
|
FieldData fd;
|
||||||
|
cudaMallocManaged(&fd.valArrays, sizeof(sizeof(float *)*FieldData::FILESNUM));
|
||||||
|
cudaMallocManaged(&fd.times, sizeof(sizeof(int *)*FieldData::FILESNUM));
|
||||||
|
size_t fieldDataInd = 0;
|
||||||
|
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << "\n";
|
||||||
|
for (int i = bufferInd; i <= newEndBufferInd; i++) {
|
||||||
|
cout << "getting handle for " << i << "\n";
|
||||||
|
DataHandle x = gpuBuffer.getDataHandle(i);
|
||||||
|
fd.valArrays[fieldDataInd] = x.d_data;
|
||||||
|
fd.times[fieldDataInd] = x.times;
|
||||||
|
fieldDataInd++;
|
||||||
|
}
|
||||||
|
fd.fieldInd = fieldInd;
|
||||||
|
|
||||||
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldData GPUBufferHandler::nextFieldData() {
|
FieldData GPUBufferHandler::nextFieldData() {
|
||||||
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
DataHandle x = gpuBuffer.getDataHandle(bufferInd);
|
||||||
|
size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
|
size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
FieldData fd;
|
size_t endFieldInd = (fieldInd + GPUBufferHandler::numberOfTimeStepsPerField - 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
size_t timeSize;
|
size_t endBufferInd = (bufferInd + (fieldInd + GPUBufferHandler::numberOfTimeStepsPerField - 1)/fmd->numberOfTimeStepsPerFile) % GPUBuffer::numBufferedFiles;
|
||||||
readAndAllocateAxis(&fd.times, &fd.timeSize, vars.find("time")->second);
|
|
||||||
|
|
||||||
NcVar var = vars.find(variableName)->second;
|
size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
|
size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
int length = 1;
|
// size_t newBufferInd = (bufferInd + 1) % GPUBuffer::numBufferedFiles;
|
||||||
for (NcDim dim: var.getDims()) {
|
// size_t newFieldInd = (fieldInd + ((bufferInd + 1) / 4)) % x.timeSize;
|
||||||
length *= dim.getSize();
|
|
||||||
|
// size_t endBufferInd = (bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) % GPUBuffer::numBufferedFiles;
|
||||||
|
// size_t endFieldInd = (fieldInd + ((bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) / 4)) % x.timeSize;
|
||||||
|
|
||||||
|
// size_t newEndBufferInd = (endBufferInd + 1) % GPUBuffer::numBufferedFiles;
|
||||||
|
// size_t newEndFieldInd = (endFieldInd + ((endBufferInd + 1) / 4)) % x.timeSize;
|
||||||
|
|
||||||
|
if(firstTimeStep) {
|
||||||
|
firstTimeStep = false;
|
||||||
|
return setupField(endFieldInd);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store NetCDF variable in pinned memory on host
|
if (newBufferInd != bufferInd) {
|
||||||
float *h_array;
|
fileInd++;
|
||||||
|
gpuBuffer.loadFile(fileInd, bufferInd);
|
||||||
|
bufferInd = newBufferInd;
|
||||||
|
fieldInd = newFieldInd;
|
||||||
|
}
|
||||||
|
|
||||||
cudaMallocHost(&h_array, sizeof(float)*length);
|
if (newEndBufferInd != endBufferInd) {
|
||||||
|
// maybe dont do things?
|
||||||
|
}
|
||||||
|
|
||||||
var.getVar(h_array);
|
return setupField(newEndBufferInd);
|
||||||
|
|
||||||
// Copy data to device
|
|
||||||
cudaError_t status = cudaMalloc(&fd.valArrays[0], sizeof(float)*length);
|
|
||||||
if (status != cudaSuccess)
|
|
||||||
cout << "Error allocating memory: " << status << "\n";
|
|
||||||
|
|
||||||
cudaMemcpyAsync(fd.valArrays[0], h_array, sizeof(float)*length, cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams
|
|
||||||
|
|
||||||
cudaFreeHost(h_array);
|
|
||||||
|
|
||||||
return fd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUBufferHandler::~GPUBufferHandler() {
|
GPUBufferHandler::~GPUBufferHandler() {
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
class GPUBufferHandler {
|
class GPUBufferHandler {
|
||||||
public:
|
public:
|
||||||
GPUBufferHandler();
|
GPUBufferHandler(GPUBuffer& gpuBuffer);
|
||||||
|
|
||||||
FieldData nextFieldData();
|
FieldData nextFieldData();
|
||||||
|
|
||||||
|
|
@ -16,8 +16,15 @@ public:
|
||||||
|
|
||||||
FieldMetadata *fmd;
|
FieldMetadata *fmd;
|
||||||
|
|
||||||
|
static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GPUBuffer
|
FieldData setupField(size_t endBufferInd);
|
||||||
|
GPUBuffer& gpuBuffer;
|
||||||
|
size_t fileInd;
|
||||||
|
size_t bufferInd;
|
||||||
|
size_t fieldInd;
|
||||||
|
bool firstTimeStep = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //GPUBUFFERHANDLER_H
|
#endif //GPUBUFFERHANDLER_H
|
||||||
|
|
|
||||||
|
|
@ -1,14 +0,0 @@
|
||||||
#include <netcdf>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace netCDF;
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void readAndAllocateAxis(T** axis_ptr, size_t *size, NcVar var) {
|
|
||||||
assert(var.getDimCount() == 1);
|
|
||||||
netCDF::NcDim dim = var.getDim(0);
|
|
||||||
*size = dim.getSize();
|
|
||||||
cudaError_t status = cudaMallocManaged(axis_ptr, *size*sizeof(T));
|
|
||||||
var.getVar(*axis_ptr);
|
|
||||||
}
|
|
||||||
58
src/main.cu
58
src/main.cu
|
|
@ -1,5 +1,5 @@
|
||||||
// #include "hurricanedata/fielddata.h"
|
// #include "hurricanedata/fielddata.h"
|
||||||
// #include "hurricanedata/gpubufferhandler.h"
|
#include "hurricanedata/gpubufferhandler.h"
|
||||||
#include "hurricanedata/datareader.h"
|
#include "hurricanedata/datareader.h"
|
||||||
#include "hurricanedata/gpubuffer.h"
|
#include "hurricanedata/gpubuffer.h"
|
||||||
|
|
||||||
|
|
@ -10,32 +10,9 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
|
||||||
|
__global__ void getSingleValue(float *ans, const FieldMetadata &fmd, FieldData fd) {
|
||||||
// Not parallel computation
|
float xi = getVal(fmd, fd, 1, 20, 100, 100);
|
||||||
// __global__ void computeMean(float *ans, const FieldMetadata &fmd, FieldData fd) {
|
*ans = xi;
|
||||||
// float sum = 0;
|
|
||||||
// size_t num_not_masked_values = 0;
|
|
||||||
// for (int i = 0; i < fmd.widthSize; i++) {
|
|
||||||
// double xi = getVal(fmd, fd, 2, 20, 100, i);
|
|
||||||
// if (xi < 1E14) { /* If x is not missing value */
|
|
||||||
// num_not_masked_values++;
|
|
||||||
// sum += xi;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// *ans = sum/num_not_masked_values;
|
|
||||||
// }
|
|
||||||
|
|
||||||
__global__ void computeMean(float *ans, DataHandle dh) {
|
|
||||||
float sum = 0;
|
|
||||||
size_t num_not_masked_values = 0;
|
|
||||||
for (int i = 0; i < dh.size; i++) {
|
|
||||||
double xi = dh.d_data[i];
|
|
||||||
if (xi < 1E14) { /* If x is not missing value */
|
|
||||||
num_not_masked_values++;
|
|
||||||
sum += xi;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*ans = sum/num_not_masked_values;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
|
@ -52,29 +29,26 @@ int main() {
|
||||||
|
|
||||||
std::cout << "created buffer\n";
|
std::cout << "created buffer\n";
|
||||||
|
|
||||||
auto dataHandle = buffer.getDataHandle(0);
|
GPUBufferHandler bufferHandler(buffer);
|
||||||
|
|
||||||
std::cout << "got a data handle\n";
|
std::cout << "created buffer handler\n";
|
||||||
|
|
||||||
auto x = buffer.getAxis<int>(0, "time");
|
auto fd = bufferHandler.nextFieldData();
|
||||||
std::cout << "size of x=" << x.first << "\n";
|
|
||||||
std::cout << "x[1]= " <<x.second[1] << "\n";
|
|
||||||
|
|
||||||
|
std::cout << "aquired field\n";
|
||||||
|
|
||||||
// GPUBufferHandler buffer{path, variable};
|
float *ptr_test_read;
|
||||||
|
cudaMallocManaged(&ptr_test_read, sizeof(float));
|
||||||
|
|
||||||
// auto fd = buffer.nextFieldData();
|
getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
|
||||||
|
|
||||||
float *ptr_mean;
|
|
||||||
cudaMallocManaged(&ptr_mean, sizeof(float));
|
|
||||||
|
|
||||||
computeMean<<<1, 1>>>(ptr_mean, dataHandle);
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
std::cout << "Mean = " << std::fixed << std::setprecision(6) << *ptr_mean << "\n";
|
std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
|
||||||
|
|
||||||
// cudaFree(fd.valArrays[0]);
|
// TODO: Write a example loop using buffering and measure it.
|
||||||
cudaFree(ptr_mean);
|
|
||||||
|
cudaFree(fd.valArrays[0]); // TODO: Free data properly in FieldData (maybe make an iterator)
|
||||||
|
cudaFree(ptr_test_read);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -46,5 +46,6 @@ print(Mean2)
|
||||||
|
|
||||||
print(f"Why does {np.mean(row):.10f} not equal {sumval/n:.10f} ?!")
|
print(f"Why does {np.mean(row):.10f} not equal {sumval/n:.10f} ?!")
|
||||||
|
|
||||||
|
|
||||||
# Close the NetCDF file
|
# Close the NetCDF file
|
||||||
ncfile.close()
|
ncfile.close()
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
import numpy as np
|
||||||
|
from netCDF4 import Dataset
|
||||||
|
|
||||||
|
# Load the NetCDF file
|
||||||
|
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
|
||||||
|
ncfile = Dataset(file_path, 'r')
|
||||||
|
|
||||||
|
# Check the available variables in the file
|
||||||
|
print(ncfile.variables.keys())
|
||||||
|
|
||||||
|
Temp = ncfile.variables['T'][:]
|
||||||
|
|
||||||
|
print(f"{Temp[1, 20, 100, 100]=}")
|
||||||
|
print(f"{Temp.flat[12949732]=}")
|
||||||
|
|
||||||
|
|
||||||
|
# Close the NetCDF file
|
||||||
|
ncfile.close()
|
||||||
Loading…
Reference in New Issue