moved times to metadata, improved freeing, created example
This commit is contained in:
parent
c4a2ce1b44
commit
3c8e68ac1d
|
|
@ -49,7 +49,6 @@ void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) {
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
NcVar var = vars.find(varName)->second;
|
NcVar var = vars.find(varName)->second;
|
||||||
cout << "var = " << varName << "with size = " << var.getDim(0).getSize() << "\n";
|
|
||||||
|
|
||||||
var.getVar(dataOut);
|
var.getVar(dataOut);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,8 @@ struct FieldMetadata {
|
||||||
|
|
||||||
size_t timeSize; // Number of different times
|
size_t timeSize; // Number of different times
|
||||||
|
|
||||||
|
// times is a managed Unified Memory array of size numberOfTimeStepsPerFile
|
||||||
|
int *times;
|
||||||
size_t numberOfTimeStepsPerFile;
|
size_t numberOfTimeStepsPerFile;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -29,9 +31,6 @@ struct FieldData {
|
||||||
float **valArrays;
|
float **valArrays;
|
||||||
|
|
||||||
size_t fieldInd;
|
size_t fieldInd;
|
||||||
|
|
||||||
// times is a managed Unified Memory array of size (FILESNUM, numberOfTimeStepsPerFile)
|
|
||||||
int **times;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,6 @@ struct File {
|
||||||
size_t size;
|
size_t size;
|
||||||
float *h_data; // host data
|
float *h_data; // host data
|
||||||
float *d_data; // device data
|
float *d_data; // device data
|
||||||
int *times;
|
|
||||||
size_t timeSize;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LoadFileJob {
|
struct LoadFileJob {
|
||||||
|
|
@ -143,10 +141,8 @@ GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
|
||||||
lock_guard<mutex> lk(file.m);
|
lock_guard<mutex> lk(file.m);
|
||||||
cudaMallocHost(&file.h_data, sizeof(float)*size);
|
cudaMallocHost(&file.h_data, sizeof(float)*size);
|
||||||
cudaMalloc(&file.d_data, sizeof(float)*size);
|
cudaMalloc(&file.d_data, sizeof(float)*size);
|
||||||
cudaMallocManaged(&file.times, sizeof(int)*sizeTime);
|
|
||||||
file.size = size;
|
file.size = size;
|
||||||
file.valid = false;
|
file.valid = false;
|
||||||
file.timeSize = sizeTime;
|
|
||||||
}
|
}
|
||||||
// loadFile(i, i);
|
// loadFile(i, i);
|
||||||
}
|
}
|
||||||
|
|
@ -165,20 +161,17 @@ GPUBuffer::impl::~impl() {
|
||||||
for (size_t i = 0; i < numBufferedFiles; i++) {
|
for (size_t i = 0; i < numBufferedFiles; i++) {
|
||||||
File &file = buffer[i];
|
File &file = buffer[i];
|
||||||
cudaFree(file.d_data);
|
cudaFree(file.d_data);
|
||||||
cudaFree(file.h_data);
|
cudaFreeHost(file.h_data);
|
||||||
cudaFree(file.times);
|
|
||||||
}
|
}
|
||||||
cudaStreamDestroy(iostream);
|
cudaStreamDestroy(iostream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUBuffer::impl::loadFile(LoadFileJob job) {
|
void GPUBuffer::impl::loadFile(LoadFileJob job) {
|
||||||
File &file = buffer[job.bufferIndex];
|
File &file = buffer[job.bufferIndex];
|
||||||
|
|
||||||
{
|
{
|
||||||
lock_guard<mutex> lk(file.m);
|
lock_guard<mutex> lk(file.m);
|
||||||
assert(!file.valid);
|
assert(!file.valid);
|
||||||
dataReader.loadFile<int>(file.times, job.fileIndex, "time"); // TODO: Times dont store anything useful :(
|
cout << "loading file with index " << job.fileIndex << "\n";
|
||||||
cout << "times[1] (inside inside) " << file.times[1] << " for file with fileindex = " << job.fileIndex << "\n";
|
|
||||||
dataReader.loadFile<float>(file.h_data, job.fileIndex);
|
dataReader.loadFile<float>(file.h_data, job.fileIndex);
|
||||||
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
|
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
|
||||||
cudaStreamSynchronize(iostream);
|
cudaStreamSynchronize(iostream);
|
||||||
|
|
@ -278,8 +271,6 @@ DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) {
|
||||||
|
|
||||||
DataHandle dh = {
|
DataHandle dh = {
|
||||||
.d_data = file.d_data,
|
.d_data = file.d_data,
|
||||||
.times = file.times,
|
|
||||||
.timeSize = file.timeSize,
|
|
||||||
.size = file.size
|
.size = file.size
|
||||||
};
|
};
|
||||||
return dh;
|
return dh;
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,6 @@
|
||||||
|
|
||||||
struct DataHandle {
|
struct DataHandle {
|
||||||
float *d_data; // Device memory
|
float *d_data; // Device memory
|
||||||
int* times; // Uniform memory
|
|
||||||
size_t timeSize;
|
|
||||||
size_t size;
|
size_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,28 +21,31 @@ gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) {
|
||||||
fmd->depthSize = depthSize;
|
fmd->depthSize = depthSize;
|
||||||
fmd->levs = levs;
|
fmd->levs = levs;
|
||||||
|
|
||||||
|
|
||||||
for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
|
for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
|
||||||
gpuBuffer.loadFile(fileInd,fileInd);
|
gpuBuffer.loadFile(fileInd,fileInd);
|
||||||
fileInd++;
|
fileInd++;
|
||||||
}
|
}
|
||||||
|
|
||||||
fmd->numberOfTimeStepsPerFile = 4; // TODO: Maybe find a better way to do this.
|
|
||||||
fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField;
|
fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField;
|
||||||
|
|
||||||
|
cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int)));
|
||||||
|
|
||||||
|
auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis<int>(0, "time");
|
||||||
|
fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile;
|
||||||
|
fmd->times = times;
|
||||||
|
|
||||||
|
cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM);
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
|
FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
|
||||||
|
|
||||||
FieldData fd;
|
FieldData fd;
|
||||||
cudaMallocManaged(&fd.valArrays, sizeof(sizeof(float *)*FieldData::FILESNUM));
|
|
||||||
cudaMallocManaged(&fd.times, sizeof(sizeof(int *)*FieldData::FILESNUM));
|
|
||||||
size_t fieldDataInd = 0;
|
size_t fieldDataInd = 0;
|
||||||
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << "\n";
|
fd.valArrays = valArrays;
|
||||||
|
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n";
|
||||||
for (int i = bufferInd; i <= newEndBufferInd; i++) {
|
for (int i = bufferInd; i <= newEndBufferInd; i++) {
|
||||||
cout << "getting handle for " << i << "\n";
|
|
||||||
DataHandle x = gpuBuffer.getDataHandle(i);
|
DataHandle x = gpuBuffer.getDataHandle(i);
|
||||||
fd.valArrays[fieldDataInd] = x.d_data;
|
fd.valArrays[fieldDataInd] = x.d_data;
|
||||||
fd.times[fieldDataInd] = x.times;
|
|
||||||
fieldDataInd++;
|
fieldDataInd++;
|
||||||
}
|
}
|
||||||
fd.fieldInd = fieldInd;
|
fd.fieldInd = fieldInd;
|
||||||
|
|
@ -51,7 +54,6 @@ FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
|
||||||
}
|
}
|
||||||
|
|
||||||
FieldData GPUBufferHandler::nextFieldData() {
|
FieldData GPUBufferHandler::nextFieldData() {
|
||||||
|
|
||||||
DataHandle x = gpuBuffer.getDataHandle(bufferInd);
|
DataHandle x = gpuBuffer.getDataHandle(bufferInd);
|
||||||
size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
@ -62,25 +64,17 @@ FieldData GPUBufferHandler::nextFieldData() {
|
||||||
size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
// size_t newBufferInd = (bufferInd + 1) % GPUBuffer::numBufferedFiles;
|
|
||||||
// size_t newFieldInd = (fieldInd + ((bufferInd + 1) / 4)) % x.timeSize;
|
|
||||||
|
|
||||||
// size_t endBufferInd = (bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) % GPUBuffer::numBufferedFiles;
|
|
||||||
// size_t endFieldInd = (fieldInd + ((bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) / 4)) % x.timeSize;
|
|
||||||
|
|
||||||
// size_t newEndBufferInd = (endBufferInd + 1) % GPUBuffer::numBufferedFiles;
|
|
||||||
// size_t newEndFieldInd = (endFieldInd + ((endBufferInd + 1) / 4)) % x.timeSize;
|
|
||||||
|
|
||||||
if(firstTimeStep) {
|
if(firstTimeStep) {
|
||||||
firstTimeStep = false;
|
firstTimeStep = false;
|
||||||
return setupField(endFieldInd);
|
return setupField(endBufferInd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fieldInd = newFieldInd;
|
||||||
|
|
||||||
if (newBufferInd != bufferInd) {
|
if (newBufferInd != bufferInd) {
|
||||||
fileInd++;
|
fileInd++;
|
||||||
gpuBuffer.loadFile(fileInd, bufferInd);
|
gpuBuffer.loadFile(fileInd, bufferInd);
|
||||||
bufferInd = newBufferInd;
|
bufferInd = newBufferInd;
|
||||||
fieldInd = newFieldInd;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newEndBufferInd != endBufferInd) {
|
if (newEndBufferInd != endBufferInd) {
|
||||||
|
|
@ -94,5 +88,6 @@ GPUBufferHandler::~GPUBufferHandler() {
|
||||||
cudaFree(fmd->lons);
|
cudaFree(fmd->lons);
|
||||||
cudaFree(fmd->lats);
|
cudaFree(fmd->lats);
|
||||||
cudaFree(fmd->levs);
|
cudaFree(fmd->levs);
|
||||||
|
cudaFree(valArrays);
|
||||||
cudaFree(fmd);
|
cudaFree(fmd);
|
||||||
}
|
}
|
||||||
|
|
@ -18,6 +18,8 @@ public:
|
||||||
|
|
||||||
static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata
|
static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata
|
||||||
|
|
||||||
|
static void freeFieldData();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
FieldData setupField(size_t endBufferInd);
|
FieldData setupField(size_t endBufferInd);
|
||||||
GPUBuffer& gpuBuffer;
|
GPUBuffer& gpuBuffer;
|
||||||
|
|
@ -25,6 +27,8 @@ private:
|
||||||
size_t bufferInd;
|
size_t bufferInd;
|
||||||
size_t fieldInd;
|
size_t fieldInd;
|
||||||
bool firstTimeStep = true;
|
bool firstTimeStep = true;
|
||||||
|
|
||||||
|
float **valArrays;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //GPUBUFFERHANDLER_H
|
#endif //GPUBUFFERHANDLER_H
|
||||||
|
|
|
||||||
23
src/main.cu
23
src/main.cu
|
|
@ -16,7 +16,7 @@ __global__ void getSingleValue(float *ans, const FieldMetadata &fmd, FieldData f
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
std::string path = "data";
|
std::string path = "data/atmosphere_MERRA-wind-speed[179253532]";
|
||||||
|
|
||||||
std::string variable = "T";
|
std::string variable = "T";
|
||||||
|
|
||||||
|
|
@ -31,24 +31,23 @@ int main() {
|
||||||
|
|
||||||
GPUBufferHandler bufferHandler(buffer);
|
GPUBufferHandler bufferHandler(buffer);
|
||||||
|
|
||||||
std::cout << "created buffer handler\n";
|
|
||||||
|
|
||||||
auto fd = bufferHandler.nextFieldData();
|
|
||||||
|
|
||||||
std::cout << "aquired field\n";
|
|
||||||
|
|
||||||
float *ptr_test_read;
|
float *ptr_test_read;
|
||||||
cudaMallocManaged(&ptr_test_read, sizeof(float));
|
cudaMallocManaged(&ptr_test_read, sizeof(float));
|
||||||
|
|
||||||
getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
|
std::cout << "created buffer handler\n";
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
FieldData fd = bufferHandler.nextFieldData();
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
|
||||||
|
|
||||||
std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
// TODO: Write a example loop using buffering and measure it.
|
std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
cudaFree(fd.valArrays[0]); // TODO: Free data properly in FieldData (maybe make an iterator)
|
// TODO: Write an example loop using buffering and measure it.
|
||||||
|
|
||||||
|
// TODO: Free data properly in FieldData (maybe make an iterator)
|
||||||
cudaFree(ptr_test_read);
|
cudaFree(ptr_test_read);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue