From 3c8e68ac1d81d3c70d60ed65bd63fb5d0d7d0c18 Mon Sep 17 00:00:00 2001 From: Robin Date: Thu, 26 Dec 2024 17:23:07 +0100 Subject: [PATCH] moved times to metadata, improved freeing, created example --- src/hurricanedata/datareader.cu | 1 - src/hurricanedata/fielddata.h | 5 ++-- src/hurricanedata/gpubuffer.cu | 13 ++--------- src/hurricanedata/gpubuffer.h | 2 -- src/hurricanedata/gpubufferhandler.cu | 33 ++++++++++++--------------- src/hurricanedata/gpubufferhandler.h | 4 ++++ src/main.cu | 23 +++++++++---------- 7 files changed, 33 insertions(+), 48 deletions(-) diff --git a/src/hurricanedata/datareader.cu b/src/hurricanedata/datareader.cu index 50619ba..4534aa4 100644 --- a/src/hurricanedata/datareader.cu +++ b/src/hurricanedata/datareader.cu @@ -49,7 +49,6 @@ void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) { multimap vars = data.getVars(); NcVar var = vars.find(varName)->second; - cout << "var = " << varName << "with size = " << var.getDim(0).getSize() << "\n"; var.getVar(dataOut); } diff --git a/src/hurricanedata/fielddata.h b/src/hurricanedata/fielddata.h index e3073a3..d7ac532 100644 --- a/src/hurricanedata/fielddata.h +++ b/src/hurricanedata/fielddata.h @@ -17,6 +17,8 @@ struct FieldMetadata { size_t timeSize; // Number of different times + // times is a managed Unified Memory array of size numberOfTimeStepsPerFile + int *times; size_t numberOfTimeStepsPerFile; }; @@ -29,9 +31,6 @@ struct FieldData { float **valArrays; size_t fieldInd; - - // times is a managed Unified Memory array of size (FILESNUM, numberOfTimeStepsPerFile) - int **times; }; diff --git a/src/hurricanedata/gpubuffer.cu b/src/hurricanedata/gpubuffer.cu index c0392ea..7edac38 100644 --- a/src/hurricanedata/gpubuffer.cu +++ b/src/hurricanedata/gpubuffer.cu @@ -18,8 +18,6 @@ struct File { size_t size; float *h_data; // host data float *d_data; // device data - int *times; - size_t timeSize; }; struct LoadFileJob { @@ -143,10 +141,8 @@ GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) { lock_guard lk(file.m); cudaMallocHost(&file.h_data, sizeof(float)*size); cudaMalloc(&file.d_data, sizeof(float)*size); - cudaMallocManaged(&file.times, sizeof(int)*sizeTime); file.size = size; file.valid = false; - file.timeSize = sizeTime; } // loadFile(i, i); } @@ -165,20 +161,17 @@ GPUBuffer::impl::~impl() { for (size_t i = 0; i < numBufferedFiles; i++) { File &file = buffer[i]; cudaFree(file.d_data); - cudaFree(file.h_data); - cudaFree(file.times); + cudaFreeHost(file.h_data); } cudaStreamDestroy(iostream); } void GPUBuffer::impl::loadFile(LoadFileJob job) { File &file = buffer[job.bufferIndex]; - { lock_guard lk(file.m); assert(!file.valid); - dataReader.loadFile(file.times, job.fileIndex, "time"); // TODO: Times dont store anything useful :( - cout << "times[1] (inside inside) " << file.times[1] << " for file with fileindex = " << job.fileIndex << "\n"; + cout << "loading file with index " << job.fileIndex << "\n"; dataReader.loadFile(file.h_data, job.fileIndex); cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream); cudaStreamSynchronize(iostream); @@ -278,8 +271,6 @@ DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) { DataHandle dh = { .d_data = file.d_data, - .times = file.times, - .timeSize = file.timeSize, .size = file.size }; return dh; diff --git a/src/hurricanedata/gpubuffer.h b/src/hurricanedata/gpubuffer.h index 46d031d..aaf1768 100644 --- a/src/hurricanedata/gpubuffer.h +++ b/src/hurricanedata/gpubuffer.h @@ -9,8 +9,6 @@ struct DataHandle { float *d_data; // Device memory - int* times; // Uniform memory - size_t timeSize; size_t size; }; diff --git a/src/hurricanedata/gpubufferhandler.cu b/src/hurricanedata/gpubufferhandler.cu index 9eb7eb7..5ca09e0 100644 --- a/src/hurricanedata/gpubufferhandler.cu +++ b/src/hurricanedata/gpubufferhandler.cu @@ -21,28 +21,31 @@ gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) { fmd->depthSize = depthSize; fmd->levs = levs; - for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) { gpuBuffer.loadFile(fileInd,fileInd); fileInd++; } - fmd->numberOfTimeStepsPerFile = 4; // TODO: Maybe find a better way to do this. fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField; + + cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int))); + + auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis(0, "time"); + fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile; + fmd->times = times; + + cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM); } FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) { FieldData fd; - cudaMallocManaged(&fd.valArrays, sizeof(sizeof(float *)*FieldData::FILESNUM)); - cudaMallocManaged(&fd.times, sizeof(sizeof(int *)*FieldData::FILESNUM)); size_t fieldDataInd = 0; - cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << "\n"; + fd.valArrays = valArrays; + cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n"; for (int i = bufferInd; i <= newEndBufferInd; i++) { - cout << "getting handle for " << i << "\n"; DataHandle x = gpuBuffer.getDataHandle(i); fd.valArrays[fieldDataInd] = x.d_data; - fd.times[fieldDataInd] = x.times; fieldDataInd++; } fd.fieldInd = fieldInd; @@ -51,7 +54,6 @@ FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) { } FieldData GPUBufferHandler::nextFieldData() { - DataHandle x = gpuBuffer.getDataHandle(bufferInd); size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile; size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; @@ -62,25 +64,17 @@ FieldData GPUBufferHandler::nextFieldData() { size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile; size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; - // size_t newBufferInd = (bufferInd + 1) % GPUBuffer::numBufferedFiles; - // size_t newFieldInd = (fieldInd + ((bufferInd + 1) / 4)) % x.timeSize; - - // size_t endBufferInd = (bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) % GPUBuffer::numBufferedFiles; - // size_t endFieldInd = (fieldInd + ((bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) / 4)) % x.timeSize; - - // size_t newEndBufferInd = (endBufferInd + 1) % GPUBuffer::numBufferedFiles; - // size_t newEndFieldInd = (endFieldInd + ((endBufferInd + 1) / 4)) % x.timeSize; - if(firstTimeStep) { firstTimeStep = false; - return setupField(endFieldInd); + return setupField(endBufferInd); } + fieldInd = newFieldInd; + if (newBufferInd != bufferInd) { fileInd++; gpuBuffer.loadFile(fileInd, bufferInd); bufferInd = newBufferInd; - fieldInd = newFieldInd; } if (newEndBufferInd != endBufferInd) { @@ -94,5 +88,6 @@ GPUBufferHandler::~GPUBufferHandler() { cudaFree(fmd->lons); cudaFree(fmd->lats); cudaFree(fmd->levs); + cudaFree(valArrays); cudaFree(fmd); } \ No newline at end of file diff --git a/src/hurricanedata/gpubufferhandler.h b/src/hurricanedata/gpubufferhandler.h index 9210eea..2f1cfcc 100644 --- a/src/hurricanedata/gpubufferhandler.h +++ b/src/hurricanedata/gpubufferhandler.h @@ -18,6 +18,8 @@ public: static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata + static void freeFieldData(); + private: FieldData setupField(size_t endBufferInd); GPUBuffer& gpuBuffer; @@ -25,6 +27,8 @@ private: size_t bufferInd; size_t fieldInd; bool firstTimeStep = true; + + float **valArrays; }; #endif //GPUBUFFERHANDLER_H diff --git a/src/main.cu b/src/main.cu index 7eeadf9..b7da2a0 100644 --- a/src/main.cu +++ b/src/main.cu @@ -16,7 +16,7 @@ __global__ void getSingleValue(float *ans, const FieldMetadata &fmd, FieldData f } int main() { - std::string path = "data"; + std::string path = "data/atmosphere_MERRA-wind-speed[179253532]"; std::string variable = "T"; @@ -31,24 +31,23 @@ int main() { GPUBufferHandler bufferHandler(buffer); - std::cout << "created buffer handler\n"; - - auto fd = bufferHandler.nextFieldData(); - - std::cout << "aquired field\n"; - float *ptr_test_read; cudaMallocManaged(&ptr_test_read, sizeof(float)); - getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd); + std::cout << "created buffer handler\n"; + for (int i = 0; i < 10; i++) { + FieldData fd = bufferHandler.nextFieldData(); - cudaDeviceSynchronize(); + getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd); - std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n"; + cudaDeviceSynchronize(); - // TODO: Write a example loop using buffering and measure it. + std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n"; + } + + // TODO: Write an example loop using buffering and measure it. - cudaFree(fd.valArrays[0]); // TODO: Free data properly in FieldData (maybe make an iterator) + // TODO: Free data properly in FieldData (maybe make an iterator) cudaFree(ptr_test_read); return 0; }