Tried using DVR on actual data

2025-06-07 02:13:10 +02:00 · 2024-12-29 21:23:58 +01:00
parent f0c6141f2c
commit daf19578e7
2 changed files with 315 additions and 258 deletions
--- a/src/main.cu
+++ b/src/main.cu
@@ -1,276 +1,333 @@
-// #include <iostream>
+#include <iostream>
-// #include <fstream>
+#include <fstream>
-// #include <cmath>
+#include <cmath>
-// #include <cuda_runtime.h>
+#include <cuda_runtime.h>
 #include <vector>
 #include <algorithm>
-// #include "linalg/linalg.h" 
+#include "hurricanedata/datareader.h"
-// #include "objs/sphere.h"
+#include "linalg/linalg.h" 
-// #include "img/handler.h"
+#include "objs/sphere.h"
 #include "img/handler.h"
-// // TODO: Eventually, export this into a better place (i.e., such that we do not have to recompile every time we change a parameter)
+// TODO: Eventually, export this into a better place (i.e., such that we do not have to recompile every time we change a parameter)
-// static const int VOLUME_WIDTH  = 1024;
+static const int VOLUME_WIDTH  = 49;
-// static const int VOLUME_HEIGHT = 1024;
+static const int VOLUME_HEIGHT = 51;
-// static const int VOLUME_DEPTH  = 1024;
+static const int VOLUME_DEPTH  = 42;
-// static const int IMAGE_WIDTH   = 2560;
+static const int IMAGE_WIDTH   = 2560;
-// static const int IMAGE_HEIGHT  = 1440;
+static const int IMAGE_HEIGHT  = 1440;
-// static const int SAMPLES_PER_PIXEL = 8;  // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map)
+static const int SAMPLES_PER_PIXEL = 8;  // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map)
-// __constant__ int d_volumeWidth;
+__constant__ int d_volumeWidth;
-// __constant__ int d_volumeHeight;
+__constant__ int d_volumeHeight;
-// __constant__ int d_volumeDepth;
+__constant__ int d_volumeDepth;
-// static float* d_volume = nullptr;  // TODO: Adjust according to how data is loaded
+static float* d_volume = nullptr;  // TODO: Adjust according to how data is loaded
-// // ----------------------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------------------
-// __device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) {
+__device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) {
-//     double ambientStrength  = 0.3;
+    double ambientStrength  = 0.3;
-//     double diffuseStrength  = 0.8;
+    double diffuseStrength  = 0.8;
-//     double specularStrength = 0.5;
+    double specularStrength = 0.5;
-//     int    shininess        = 32;
+    int    shininess        = 32;
-//     Vec3 ambient = baseColor * ambientStrength;
+    Vec3 ambient = baseColor * ambientStrength;
-//     double diff = fmax(normal.dot(lightDir), 0.0);
+    double diff = fmax(normal.dot(lightDir), 0.0);
-//     Vec3 diffuse = baseColor * (diffuseStrength * diff);
+    Vec3 diffuse = baseColor * (diffuseStrength * diff);
-//     Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
+    Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
-//     double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess);
+    double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess);
-//     Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
+    Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
-//     return ambient + diffuse + specular;
+    return ambient + diffuse + specular;
-// }
+}
-// // Raycast + phong
+// Raycast + phong
-// __global__ void raycastKernel(float*  volumeData, unsigned char* framebuffer, int imageWidth, int imageHeight, Vec3 cameraPos, Vec3 cameraDir, Vec3 cameraUp, float fov, float stepSize, Vec3 lightPos) {
+__global__ void raycastKernel(float*  volumeData, unsigned char* framebuffer, int imageWidth, int imageHeight, Vec3 cameraPos, Vec3 cameraDir, Vec3 cameraUp, float fov, float stepSize, Vec3 lightPos) {
-//     int px = blockIdx.x * blockDim.x + threadIdx.x;
+    int px = blockIdx.x * blockDim.x + threadIdx.x;
-//     int py = blockIdx.y * blockDim.y + threadIdx.y;
+    int py = blockIdx.y * blockDim.y + threadIdx.y;
-//     if (px >= imageWidth || py >= imageHeight) return;
+    if (px >= imageWidth || py >= imageHeight) return;
-//     float accumR = 0.0f;
+    float accumR = 0.0f;
-//     float accumG = 0.0f;
+    float accumG = 0.0f;
-//     float accumB = 0.0f;
+    float accumB = 0.0f;
-//     // Multiple samples per pixel
+    // Multiple samples per pixel
-//     for (int s = 0; s < SAMPLES_PER_PIXEL; s++) {
+    for (int s = 0; s < SAMPLES_PER_PIXEL; s++) {
-//         // Map to [-1, 1]
+        // Map to [-1, 1]
-//         float u = ((px + 0.5f) / imageWidth ) * 2.0f - 1.0f;
+        float u = ((px + 0.5f) / imageWidth ) * 2.0f - 1.0f;
-//         float v = ((py + 0.5f) / imageHeight) * 2.0f - 1.0f;
+        float v = ((py + 0.5f) / imageHeight) * 2.0f - 1.0f;
-//         // TODO: Move this (and all similar transformation code) to its own separate file
+        // TODO: Move this (and all similar transformation code) to its own separate file
-//         float tanHalfFov = tanf(fov * 0.5f);
+        float tanHalfFov = tanf(fov * 0.5f);
-//         u *= tanHalfFov;
+        u *= tanHalfFov;
-//         v *= tanHalfFov;
+        v *= tanHalfFov;
-//         // Find ray direction
+        // Find ray direction
-//         Vec3 cameraRight = (cameraDir.cross(cameraUp)).normalize();
+        Vec3 cameraRight = (cameraDir.cross(cameraUp)).normalize();
-//         cameraUp = (cameraRight.cross(cameraDir)).normalize();
+        cameraUp = (cameraRight.cross(cameraDir)).normalize();
-//         Vec3 rayDir = (cameraDir + cameraRight*u + cameraUp*v).normalize();
+        Vec3 rayDir = (cameraDir + cameraRight*u + cameraUp*v).normalize();
-//         // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this
+        // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this
-//         float tNear = 0.f;   // TODO: These are also linear transforms, so move away
+        float tNear = 0.f;   // TODO: These are also linear transforms, so move away
-//         float tFar  = 1e6f;
+        float tFar  = 1e6f;
-//         auto intersectAxis = [&](float start, float dirVal) {
+        auto intersectAxis = [&](float start, float dirVal) {
-//             if (fabsf(dirVal) < 1e-10f) {  // TDDO: Add a constant - epsilon
+            if (fabsf(dirVal) < 1e-10f) {  // TDDO: Add a constant - epsilon
-//                 if (start < 0.f || start > 1.f) {
+                if (start < 0.f || start > 1.f) {
-//                     tNear = 1e9f;
+                    tNear = 1e9f;
-//                     tFar  = -1e9f;
+                    tFar  = -1e9f;
-//                 }
+                }
-//             } else {
+            } else {
-//                 float t0 = (0.0f - start) / dirVal;  // TODO: 0.0 and 1.0 depend on the box size -> move to a constant
+                float t0 = (0.0f - start) / dirVal;  // TODO: 0.0 and 1.0 depend on the box size -> move to a constant
-//                 float t1 = (1.0f - start) / dirVal;
+                float t1 = (1.0f - start) / dirVal;
-//                 if (t0>t1) { 
+                if (t0>t1) { 
-//                     float tmp=t0; 
+                    float tmp=t0; 
-//                     t0=t1; 
+                    t0=t1; 
-//                     t1=tmp; 
+                    t1=tmp; 
-//                 }
+                }
-//                 if (t0>tNear) tNear = t0;
+                if (t0>tNear) tNear = t0;
-//                 if (t1<tFar ) tFar  = t1;
+                if (t1<tFar ) tFar  = t1;
-//             }
+            }
-//         };
+        };
-//         intersectAxis(cameraPos.x, rayDir.x);
+        intersectAxis(cameraPos.x, rayDir.x);
-//         intersectAxis(cameraPos.y, rayDir.y);
+        intersectAxis(cameraPos.y, rayDir.y);
-//         intersectAxis(cameraPos.z, rayDir.z);
+        intersectAxis(cameraPos.z, rayDir.z);
-//         if (tNear > tFar) continue;  // No intersectionn
+        if (tNear > tFar) continue;  // No intersectionn
-//         if (tNear < 0.0f) tNear = 0.0f;
+        if (tNear < 0.0f) tNear = 0.0f;
-//         float colorR = 0.f, colorG = 0.f, colorB = 0.f;
+        float colorR = 0.f, colorG = 0.f, colorB = 0.f;
-//         float alphaAccum = 0.f;
+        float alphaAccum = 0.f;
-//         float tCurrent = tNear;
+        float tCurrent = tNear;
-//         while (tCurrent < tFar && alphaAccum < 0.99f) {
+        while (tCurrent < tFar && alphaAccum < 0.65f) { // TODO: Idk what a good accumulation value is
-//             Vec3 pos = cameraPos + rayDir * tCurrent;
+            Vec3 pos = cameraPos + rayDir * tCurrent;
-//             // Convert to volume indices
+            // Convert to volume indices
-//             float fx = pos.x * (d_volumeWidth  - 1);
+            float fx = pos.x * (d_volumeWidth  - 1);
-//             float fy = pos.y * (d_volumeHeight - 1);
+            float fy = pos.y * (d_volumeHeight - 1);
-//             float fz = pos.z * (d_volumeDepth  - 1);
+            float fz = pos.z * (d_volumeDepth  - 1);
-//             int ix = (int)roundf(fx);
+            int ix = (int)roundf(fx);
-//             int iy = (int)roundf(fy);
+            int iy = (int)roundf(fy);
-//             int iz = (int)roundf(fz);
+            int iz = (int)roundf(fz);
-//             // Sample
+            // Sample
-//             float density = sampleVolumeNearest(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz);
+            float density = sampleVolumeNearest(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz);
-//             // Basic transfer function. TODO: Move to a separate file, and then improve
+            // Basic transfer function. TODO: Move to a separate file, and then improve
-//             float alphaSample = density * 0.05f;
+            float alphaSample = density * 0.1f;
-//             Vec3 baseColor = Vec3(density, 0.1f*density, 1.f - density);
+            // float alphaSample = 1.0f - expf(-density * 0.1f);
            Vec3 baseColor = Vec3(density, 0.1f*density, 1.f - density);  // TODO: Implement a proper transfer function
-//             // If density ~ 0, skip shading
+            // If density ~ 0, skip shading
-//             if (density > 0.001f) {
+            if (density > 0.001f) {
-//                 Vec3 grad = computeGradient(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz);
+                Vec3 grad = computeGradient(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz);
-//                 Vec3 normal = -grad.normalize();
+                Vec3 normal = -grad.normalize();
-//                 Vec3 lightDir = (lightPos - pos).normalize();
+                Vec3 lightDir = (lightPos - pos).normalize();
-//                 Vec3 viewDir  = -rayDir.normalize();
+                Vec3 viewDir  = -rayDir.normalize();
-//                 // Apply Phong
+                // Apply Phong
-//                 Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor);
+                Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor);
-//                 // Compose
+                // Compose
-//                 colorR     += (1.0f - alphaAccum) * shadedColor.x * alphaSample;
+                colorR     += (1.0f - alphaAccum) * shadedColor.x * alphaSample;
-//                 colorG     += (1.0f - alphaAccum) * shadedColor.y * alphaSample;
+                colorG     += (1.0f - alphaAccum) * shadedColor.y * alphaSample;
-//                 colorB     += (1.0f - alphaAccum) * shadedColor.z * alphaSample;
+                colorB     += (1.0f - alphaAccum) * shadedColor.z * alphaSample;
-//                 alphaAccum += (1.0f - alphaAccum) * alphaSample;
+                alphaAccum += (1.0f - alphaAccum) * alphaSample;
-//             }
+            }
-//             tCurrent += stepSize;
+            tCurrent += stepSize;
-//         }
+        }
-//         accumR += colorR;
+        accumR += colorR;
-//         accumG += colorG;
+        accumG += colorG;
-//         accumB += colorB;
+        accumB += colorB;
-//     }
+    }
-//     // Average samples
+    // Average samples
-//     accumR /= (float)SAMPLES_PER_PIXEL;
+    accumR /= (float)SAMPLES_PER_PIXEL;
-//     accumG /= (float)SAMPLES_PER_PIXEL;
+    accumG /= (float)SAMPLES_PER_PIXEL;
-//     accumB /= (float)SAMPLES_PER_PIXEL;
+    accumB /= (float)SAMPLES_PER_PIXEL;
-//     // Final colour
+    // Final colour
-//     int fbIndex = (py * imageWidth + px) * 3;
+    int fbIndex = (py * imageWidth + px) * 3;
-//     framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255);
+    framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255);
-//     framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255);
+    framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255);
-//     framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255);
+    framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255);
-// }
+}
-// int main(int argc, char** argv) {
+void getTemperature(std::vector<float>& temperatureData) {
-//     // Generate debug volume data
+    std::string path = "data/trimmed";
-//     float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH];
+    std::string variable = "T";
    DataReader dataReader(path, variable);
    int idx = 5;
    size_t dataLength = dataReader.fileLength(idx);
    temperatureData.resize(dataLength);
    dataReader.loadFile(temperatureData.data(), idx);
 }
 void getSpeed(std::vector<float>& speedData) {
    std::string path = "data/trimmed";
    std::string varU = "U";
    std::string varV = "V";
    DataReader dataReaderU(path, varU);
    DataReader dataReaderV(path, varV);
    int idx = 50;
    size_t dataLength = dataReaderU.fileLength(idx);
    speedData.resize(dataLength);
    std::vector<float> uData(dataLength);
    std::vector<float> vData(dataLength);
    dataReaderU.loadFile(uData.data(), idx);
    dataReaderV.loadFile(vData.data(), idx);
    for (int i = 0; i < dataLength; i++) {
        speedData[i] = sqrt(uData[i]*uData[i] + vData[i]*vData[i]);
    }
 }
 int main(int argc, char** argv) {
    std::vector<float> data;
    // getTemperature(data);
    getSpeed(data);
    // Generate debug volume data
    float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH];
    // generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH);
    for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {
        // Discard temperatures above a small star (supposedly, missing temperature values)
        hostVolume[i] = data[i];
        if (data[i] > 1000.0f) hostVolume[i] = 0.0f;
    }
-//     // Allocate + copy data to GPU
+        // Min-max normalization
-//     size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH;
+    float minVal = *std::min_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
-//     cudaMalloc((void**)&d_volume, volumeSize);
+    float maxVal = *std::max_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
-//     cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice);
+    for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {
        hostVolume[i] = (hostVolume[i] - minVal) / (maxVal - minVal);
    }
-//     int w = VOLUME_WIDTH, h = VOLUME_HEIGHT, d = VOLUME_DEPTH;
+    // Allocate + copy data to GPU
-//     cudaMemcpyToSymbol(d_volumeWidth,  &w, sizeof(int));
+    size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH;
-//     cudaMemcpyToSymbol(d_volumeHeight, &h, sizeof(int));
+    cudaMalloc((void**)&d_volume, volumeSize);
-//     cudaMemcpyToSymbol(d_volumeDepth,  &d, sizeof(int));
+    cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice);
-//     // Allocate framebuffer
+    int w = VOLUME_WIDTH, h = VOLUME_HEIGHT, d = VOLUME_DEPTH;
-//     unsigned char* d_framebuffer;
+    cudaMemcpyToSymbol(d_volumeWidth,  &w, sizeof(int));
-//     size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char);
+    cudaMemcpyToSymbol(d_volumeHeight, &h, sizeof(int));
-//     cudaMalloc((void**)&d_framebuffer, fbSize);
+    cudaMemcpyToSymbol(d_volumeDepth,  &d, sizeof(int));
 //     cudaMemset(d_framebuffer, 0, fbSize);
-//     // Camera and Light
+    // Allocate framebuffer
-//     Vec3 cameraPos(0.5, 0.5, -2.0);
+    unsigned char* d_framebuffer;
-//     Vec3 cameraDir(0.0, 0.0, 1.0);
+    size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char);
-//     Vec3 cameraUp(0.0, 1.0, 0.0);
+    cudaMalloc((void**)&d_framebuffer, fbSize);
-//     float fov = 60.0f * (M_PI / 180.0f);
+    cudaMemset(d_framebuffer, 0, fbSize);
 //     float stepSize = 0.002f;
 //     Vec3 lightPos(1.5, 2.0, -1.0);
-//     // Launch kernel
+    // Camera and Light
-//     dim3 blockSize(16, 16);
+    Vec3 cameraPos(-0.7, -1.0, -2.0);
-//     dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x,
+    Vec3 cameraDir(0.4, 0.6, 1.0);
-//                   (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y);
+    Vec3 cameraUp(0.0, 1.0, 0.0);
    float fov = 60.0f * (M_PI / 180.0f);
    float stepSize = 0.002f;
    Vec3 lightPos(1.5, 2.0, -1.0);
    // Launch kernel
    dim3 blockSize(16, 16);
    dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x,
                  (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y);
    raycastKernel<<<gridSize, blockSize>>>(
        d_volume,
        d_framebuffer,
        IMAGE_WIDTH,
        IMAGE_HEIGHT,
        cameraPos,
        cameraDir.normalize(),
        cameraUp.normalize(),
        fov,
        stepSize,
        lightPos
    );
    cudaDeviceSynchronize();
    // Copy framebuffer back to CPU
    unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3];
    cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost);
    // Export image
    saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT);
    // Cleanup
    delete[] hostVolume;
    delete[] hostFramebuffer;
    cudaFree(d_volume);
    cudaFree(d_framebuffer);
    std::cout << "Phong-DVR rendering done. Image saved to output.ppm" << std::endl;
    return 0;
 }
 // // gpu-buffer-handler branch main
 // #include "hurricanedata/fielddata.h"
 // #include "hurricanedata/gpubufferhandler.h"
 // #include "hurricanedata/datareader.h"
 // #include "hurricanedata/gpubuffer.h"
 // #include <cuda_runtime.h>
 // #include <device_launch_parameters.h>
 // #include <iostream>
 // #include <cmath>
 // #include <memory>
 // #include <iomanip> 
 // __global__ void middleOfTwoValues(float *ans, const FieldMetadata &fmd, FieldData fd) {
 //     float xi = getVal(fmd, fd, 0, 20, 100, 100);
 //     float yi = getVal(fmd, fd, 1, 20, 100, 100);
 //     *ans = (xi+yi)/2;
 // }
 // int main() {
 //     // std::string path = "data/atmosphere_MERRA-wind-speed[179253532]";
 //     std::string path = "data/trimmed";
 //     std::string variable = "T";
 //     DataReader dataReader{path, variable};
 //     std::cout << "created datareader\n";
 //     GPUBuffer buffer (dataReader);
 //     std::cout << "created buffer\n";
 //     GPUBufferHandler bufferHandler(buffer);
 //     float *ptr_test_read;
 //     cudaMallocManaged(&ptr_test_read, sizeof(float));
 //     std::cout << "created buffer handler\n";
 //     for (int i = 0; i < 10; i++) {
 //         FieldData fd = bufferHandler.nextFieldData();
 //         middleOfTwoValues<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
 //     raycastKernel<<<gridSize, blockSize>>>(
 //         d_volume,
 //         d_framebuffer,
 //         IMAGE_WIDTH,
 //         IMAGE_HEIGHT,
 //         cameraPos,
 //         cameraDir.normalize(),
 //         cameraUp.normalize(),
 //         fov,
 //         stepSize,
 //         lightPos
 //     );
 //         cudaDeviceSynchronize();
 //         std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
 //     }
-//     // Copy framebuffer back to CPU
+//     // TODO: measure data transfer time in this example code.
-//     unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3];
+//     cudaFree(ptr_test_read);
 //     cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost);
 //     // Export image
 //     saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT);
 //     // Cleanup
 //     delete[] hostVolume;
 //     delete[] hostFramebuffer;
 //     cudaFree(d_volume);
 //     cudaFree(d_framebuffer);
 //     std::cout << "Phong-DVR rendering done. Image saved to output.ppm" << std::endl;
 //     return 0;
 // }
 // gpu-buffer-handler branch main
 #include "hurricanedata/fielddata.h"
 #include "hurricanedata/gpubufferhandler.h"
 #include "hurricanedata/datareader.h"
 #include "hurricanedata/gpubuffer.h"
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <iostream>
 #include <cmath>
 #include <memory>
 #include <iomanip> 
 __global__ void middleOfTwoValues(float *ans, const FieldMetadata &fmd, FieldData fd) {
    float xi = getVal(fmd, fd, 0, 20, 100, 100);
    float yi = getVal(fmd, fd, 1, 20, 100, 100);
    *ans = (xi+yi)/2;
 }
 int main() {
    std::string path = "data/atmosphere_MERRA-wind-speed[179253532]";
    std::string variable = "T";
    DataReader dataReader{path, variable};
    std::cout << "created datareader\n";
    GPUBuffer buffer (dataReader);
    std::cout << "created buffer\n";
    GPUBufferHandler bufferHandler(buffer);
    float *ptr_test_read;
    cudaMallocManaged(&ptr_test_read, sizeof(float));
    std::cout << "created buffer handler\n";
    for (int i = 0; i < 10; i++) {
        FieldData fd = bufferHandler.nextFieldData();
        middleOfTwoValues<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
        cudaDeviceSynchronize();
        std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
    }
    // TODO: measure data transfer time in this example code.
    cudaFree(ptr_test_read);
    return 0;
 }