From 3c8e68ac1d81d3c70d60ed65bd63fb5d0d7d0c18 Mon Sep 17 00:00:00 2001
From: Robin <r.s.b.sachsenweger.ballantyne@gmail.com>
Date: Thu, 26 Dec 2024 17:23:07 +0100
Subject: [PATCH] moved times to metadata, improved freeing, created example

---
 src/hurricanedata/datareader.cu       |  1 -
 src/hurricanedata/fielddata.h         |  5 ++--
 src/hurricanedata/gpubuffer.cu        | 13 ++---------
 src/hurricanedata/gpubuffer.h         |  2 --
 src/hurricanedata/gpubufferhandler.cu | 33 ++++++++++++---------------
 src/hurricanedata/gpubufferhandler.h  |  4 ++++
 src/main.cu                           | 23 +++++++++----------
 7 files changed, 33 insertions(+), 48 deletions(-)
diff --git a/src/hurricanedata/datareader.cu b/src/hurricanedata/datareader.cu
index 50619ba..4534aa4 100644
--- a/src/hurricanedata/datareader.cu
+++ b/src/hurricanedata/datareader.cu
@@ -49,7 +49,6 @@ void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) {
     multimap<string, NcVar> vars = data.getVars();
 
     NcVar var = vars.find(varName)->second;   
-    cout << "var = " << varName << "with size = " << var.getDim(0).getSize() << "\n";
 
     var.getVar(dataOut);
 }
diff --git a/src/hurricanedata/fielddata.h b/src/hurricanedata/fielddata.h
index e3073a3..d7ac532 100644
--- a/src/hurricanedata/fielddata.h
+++ b/src/hurricanedata/fielddata.h
@@ -17,6 +17,8 @@ struct FieldMetadata {
 
     size_t timeSize; // Number of different times
 
+    // times is a managed Unified Memory array of size numberOfTimeStepsPerFile
+    int *times;
     size_t numberOfTimeStepsPerFile;
 };
 
@@ -29,9 +31,6 @@ struct FieldData {
     float **valArrays;
 
     size_t fieldInd;
-
-    // times is a managed Unified Memory array of size (FILESNUM, numberOfTimeStepsPerFile)
-    int **times;
 };
 
 
diff --git a/src/hurricanedata/gpubuffer.cu b/src/hurricanedata/gpubuffer.cu
index c0392ea..7edac38 100644
--- a/src/hurricanedata/gpubuffer.cu
+++ b/src/hurricanedata/gpubuffer.cu
@@ -18,8 +18,6 @@ struct File {
     size_t size;
     float *h_data; // host data
     float *d_data; // device data
-    int *times;
-    size_t timeSize;
 };
 
 struct LoadFileJob {
@@ -143,10 +141,8 @@ GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
             lock_guard<mutex> lk(file.m);
             cudaMallocHost(&file.h_data, sizeof(float)*size);
             cudaMalloc(&file.d_data, sizeof(float)*size);
-            cudaMallocManaged(&file.times, sizeof(int)*sizeTime);
             file.size = size;
             file.valid = false;
-            file.timeSize = sizeTime;
         }
         // loadFile(i, i);
     }
@@ -165,20 +161,17 @@ GPUBuffer::impl::~impl() {
     for (size_t i = 0; i < numBufferedFiles; i++) {
         File &file = buffer[i];
         cudaFree(file.d_data);
-        cudaFree(file.h_data);
-        cudaFree(file.times);
+        cudaFreeHost(file.h_data);
     }
     cudaStreamDestroy(iostream);
 }
 
 void GPUBuffer::impl::loadFile(LoadFileJob job) {
     File &file = buffer[job.bufferIndex];
-
     {
         lock_guard<mutex> lk(file.m);
         assert(!file.valid);
-        dataReader.loadFile<int>(file.times, job.fileIndex, "time"); // TODO: Times dont store anything useful :(
-        cout << "times[1] (inside inside) " << file.times[1]  << " for file with fileindex = " << job.fileIndex <<  "\n";
+        cout << "loading file with index " << job.fileIndex << "\n";
         dataReader.loadFile<float>(file.h_data, job.fileIndex);
         cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
         cudaStreamSynchronize(iostream);
@@ -278,8 +271,6 @@ DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) {
 
     DataHandle dh = {
         .d_data = file.d_data,
-        .times = file.times,
-        .timeSize = file.timeSize,
         .size = file.size
     };
     return dh;
diff --git a/src/hurricanedata/gpubuffer.h b/src/hurricanedata/gpubuffer.h
index 46d031d..aaf1768 100644
--- a/src/hurricanedata/gpubuffer.h
+++ b/src/hurricanedata/gpubuffer.h
@@ -9,8 +9,6 @@
 
 struct DataHandle {
     float *d_data; // Device memory
-    int* times; // Uniform memory
-    size_t timeSize;
     size_t size;
 };
 
diff --git a/src/hurricanedata/gpubufferhandler.cu b/src/hurricanedata/gpubufferhandler.cu
index 9eb7eb7..5ca09e0 100644
--- a/src/hurricanedata/gpubufferhandler.cu
+++ b/src/hurricanedata/gpubufferhandler.cu
@@ -21,28 +21,31 @@ gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) {
     fmd->depthSize = depthSize;
     fmd->levs = levs;
 
-
     for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
         gpuBuffer.loadFile(fileInd,fileInd);
         fileInd++;
     }
 
-    fmd->numberOfTimeStepsPerFile = 4; // TODO: Maybe find a better way to do this.
     fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField;
+
+    cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int)));
+
+    auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis<int>(0, "time");
+    fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile;
+    fmd->times = times;
+
+    cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM);
 }
 
 FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
     
     FieldData fd;
-    cudaMallocManaged(&fd.valArrays, sizeof(sizeof(float *)*FieldData::FILESNUM));
-    cudaMallocManaged(&fd.times, sizeof(sizeof(int *)*FieldData::FILESNUM));
     size_t fieldDataInd = 0;
-    cout << "getting field from files " << bufferInd  << " to " << newEndBufferInd << "\n";
+    fd.valArrays = valArrays;
+    cout << "getting field from files " << bufferInd  << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n";
     for (int i = bufferInd; i <= newEndBufferInd; i++) {
-        cout << "getting handle for " << i << "\n";
         DataHandle x = gpuBuffer.getDataHandle(i);
         fd.valArrays[fieldDataInd] = x.d_data;
-        fd.times[fieldDataInd] = x.times;
         fieldDataInd++;
     }
     fd.fieldInd = fieldInd;
@@ -51,7 +54,6 @@ FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
 }
 
 FieldData GPUBufferHandler::nextFieldData() {
-    
     DataHandle x = gpuBuffer.getDataHandle(bufferInd);
     size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
     size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
@@ -62,25 +64,17 @@ FieldData GPUBufferHandler::nextFieldData() {
     size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
     size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
 
-    // size_t newBufferInd = (bufferInd + 1) % GPUBuffer::numBufferedFiles;
-    // size_t newFieldInd = (fieldInd + ((bufferInd + 1) / 4)) % x.timeSize;
-
-    // size_t endBufferInd = (bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) % GPUBuffer::numBufferedFiles;
-    // size_t endFieldInd = (fieldInd + ((bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) / 4)) % x.timeSize;
-
-    // size_t newEndBufferInd = (endBufferInd + 1) % GPUBuffer::numBufferedFiles;
-    // size_t newEndFieldInd = (endFieldInd + ((endBufferInd + 1) / 4)) % x.timeSize;
-
     if(firstTimeStep) {
         firstTimeStep = false;
-        return setupField(endFieldInd);
+        return setupField(endBufferInd);
     } 
 
+    fieldInd = newFieldInd;
+
     if (newBufferInd != bufferInd) {
         fileInd++;
         gpuBuffer.loadFile(fileInd, bufferInd);
         bufferInd = newBufferInd;
-        fieldInd = newFieldInd;
     }
 
     if (newEndBufferInd != endBufferInd) {
@@ -94,5 +88,6 @@ GPUBufferHandler::~GPUBufferHandler() {
     cudaFree(fmd->lons);
     cudaFree(fmd->lats);
     cudaFree(fmd->levs);
+    cudaFree(valArrays);
     cudaFree(fmd);
 }
\ No newline at end of file
diff --git a/src/hurricanedata/gpubufferhandler.h b/src/hurricanedata/gpubufferhandler.h
index 9210eea..2f1cfcc 100644
--- a/src/hurricanedata/gpubufferhandler.h
+++ b/src/hurricanedata/gpubufferhandler.h
@@ -18,6 +18,8 @@ public:
 
     static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata
 
+    static void freeFieldData();
+
 private:
     FieldData setupField(size_t endBufferInd);
     GPUBuffer& gpuBuffer;
@@ -25,6 +27,8 @@ private:
     size_t bufferInd;
     size_t fieldInd;
     bool firstTimeStep = true;
+
+    float **valArrays;
 };
 
 #endif //GPUBUFFERHANDLER_H
diff --git a/src/main.cu b/src/main.cu
index 7eeadf9..b7da2a0 100644
--- a/src/main.cu
+++ b/src/main.cu
@@ -16,7 +16,7 @@ __global__ void getSingleValue(float *ans, const FieldMetadata &fmd, FieldData f
 }
 
 int main() {
-    std::string path = "data";
+    std::string path = "data/atmosphere_MERRA-wind-speed[179253532]";
 
     std::string variable = "T";
 
@@ -31,24 +31,23 @@ int main() {
 
     GPUBufferHandler bufferHandler(buffer);
 
-    std::cout << "created buffer handler\n";
-
-    auto fd = bufferHandler.nextFieldData();
-
-    std::cout << "aquired field\n";
-
     float *ptr_test_read;
     cudaMallocManaged(&ptr_test_read, sizeof(float));
 
-    getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
+    std::cout << "created buffer handler\n";
+    for (int i = 0; i < 10; i++) {
+        FieldData fd = bufferHandler.nextFieldData();
 
-    cudaDeviceSynchronize();
+        getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
 
-    std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
+        cudaDeviceSynchronize();
 
-    // TODO: Write a example loop using buffering and measure it.
+        std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
+    }
+    
+    // TODO: Write an example loop using buffering and measure it.
 
-    cudaFree(fd.valArrays[0]); // TODO: Free data properly in FieldData (maybe make an iterator)
+    // TODO: Free data properly in FieldData (maybe make an iterator)
     cudaFree(ptr_test_read);
     return 0;
 }