Merge pull request #3 from MartinOpat/ray-casting

Ray casting
2025-06-07 02:13:10 +02:00 · 2025-01-11 15:15:08 +01:00
parent 6f22230ead 340efd641d
commit 02cf845ef0
14 changed files with 411 additions and 116 deletions
--- a/src/consts.cu
+++ b/src/consts.cu
@@ -1,18 +1,58 @@
 #include "consts.h"
 // ----------------------- Colour mapping -----------------------
 __constant__ ColorStop d_stopsPythonLike[5];
 __constant__ ColorStop d_stopsGrayscale[2];
 __constant__ ColorStop d_stopsBluePurleRed[3];
 const ColorStop h_stopsPythonLike[] = {
        { 0.0f, Color3::init(0.2298057f, 0.29871797f, 0.75368315f) }, // Dark Blue
        { 0.25f, Color3::init(0.23437708f, 0.30554173f, 0.75967953f) }, // Mid Blue
        { 0.5f, Color3::init(0.27582712f, 0.36671692f, 0.81255294f) }, // White
        { 0.75f, Color3::init(0.79606387f, 0.84869321f, 0.93347147f) }, // Light Orange
        { 1.0f, Color3::init(0.70567316f, 0.01555616f, 0.15023281f) }  // Red
 };
 const ColorStop h_stopsGrayscale[] = {
        { 0.0f, Color3::init(0.0f, 0.0f, 0.0f) },  // No colour
        { 1.0f, Color3::init(1.0f, 1.0f, 1.0f) }   // White
 };
 const ColorStop h_stopsBluePurleRed[] = {
        { 0.0f, Color3::init(0.0f, 0.0f, 1.0f) },   // deep blue
        { 0.5f, Color3::init(0.5f, 0.0f, 0.5f) },   // purple
        { 1.0f, Color3::init(1.0f, 0.0f, 0.0f) }    // deep red
 };
 // ----------------------- Camera and Light -----------------------
 __device__ Point3 d_cameraPos;
 __device__ Vec3 d_cameraDir;
 __device__ Vec3 d_cameraUp;
 __device__ Point3 d_lightPos;
 __device__ Color3 d_backgroundColor;
-Point3 h_cameraPos = Point3::init(-0.7, -1.0, -2.0);
+// Point3 h_cameraPos = Point3::init(300.0f, 200.0f, -700.0f);  // Camera for full data set
-Vec3 h_cameraDir = Vec3::init(0.4, 0.6, 1.0).normalize();
+Point3 h_cameraPos = Point3::init(50.0f, -50.0f, -75.0f);  // Camera for partially trimmed data set
 Vec3 center = Vec3::init((float)VOLUME_WIDTH/2.0f, (float)VOLUME_HEIGHT/2.0f, (float)VOLUME_DEPTH/2.0f);
 Vec3 h_cameraDir = (center - h_cameraPos).normalize();
 Vec3 h_cameraUp = Vec3::init(0.0, 1.0, 0.0).normalize();
 Point3 h_lightPos = Point3::init(1.5, 2.0, -1.0);
 Color3 h_backgroundColor = Color3::init(0.1f, 0.1f, 0.1f);
 // Copy the above values to the device
 void copyConstantsToDevice() {
    // ----------------------- Colour mapping -----------------------
    cudaMemcpyToSymbol(d_stopsPythonLike, h_stopsPythonLike, sizeof(h_stopsPythonLike));
    cudaMemcpyToSymbol(d_stopsGrayscale, h_stopsGrayscale, sizeof(h_stopsGrayscale));
    cudaMemcpyToSymbol(d_stopsBluePurleRed, h_stopsBluePurleRed, sizeof(h_stopsBluePurleRed));
    // ----------------------- Camera and Light -----------------------
    cudaMemcpyToSymbol(d_cameraPos, &h_cameraPos, sizeof(Point3));
    cudaMemcpyToSymbol(d_cameraDir, &h_cameraDir, sizeof(Vec3));
    cudaMemcpyToSymbol(d_cameraUp, &h_cameraUp, sizeof(Vec3));
    cudaMemcpyToSymbol(d_lightPos, &h_lightPos, sizeof(Point3));
    cudaMemcpyToSymbol(d_backgroundColor, &h_backgroundColor, sizeof(Color3));
 }
--- a/src/consts.h
+++ b/src/consts.h
@@ -5,27 +5,37 @@
 #include <cmath>
 // --------------------------- Basic Constants ---------------------------
-const int VOLUME_WIDTH  = 49;
+// const int VOLUME_WIDTH  = 576;  // lon
-const int VOLUME_HEIGHT = 51;
+const int VOLUME_WIDTH  = 97;  // lon
-const int VOLUME_DEPTH  = 42;
+// const int VOLUME_HEIGHT = 361;  // lat
 const int VOLUME_HEIGHT = 71;  // lat
 const int VOLUME_DEPTH  = 42;  // lev
-const int IMAGE_WIDTH   = 800;
+const int IMAGE_WIDTH   = 1600;
-const int IMAGE_HEIGHT  = 600;
+const int IMAGE_HEIGHT  = 1200;
 const double epsilon = 1e-10f;
 const double infty   = 1e15f;  // This value is used to represent missing values in data
 // --------------------------- Dataset Constants ---------------------------
 const float MIN_TEMP = 210.0f;
 const float MAX_TEMP = 240.0f;
 const float MIN_SPEED = 0.0F;
 const float MAX_SPEED = 14.0f;
 // --------------------------- Raycasting Constants ---------------------------
-const int SAMPLES_PER_PIXEL = 8;  // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map)
+const int SAMPLES_PER_PIXEL = 4;
-const float alphaAcumLimit = 0.65f;   // TODO: Idk what a good accumulation value is
+const float alphaAcumLimit = 0.4f;   // TODO: Atm, this only works with sigmoid
 const float minAllowedDensity = 0.001f;
-const float stepSize = 0.002f;
+const float stepSize = 0.02f;
 // --------------------------- Illumination Constants ---------------------------
 // Shading consts
 const double ambientStrength  = 0.3;
 const double diffuseStrength  = 0.8;
 const double specularStrength = 0.5;
@@ -38,6 +48,22 @@ extern __device__ Vec3 d_cameraDir;
 extern __device__ Vec3 d_cameraUp;
 extern __device__ Point3 d_lightPos;
 // Background color
 extern __device__ Color3 d_backgroundColor;
 // --------------------------- Transfer Function Constants ---------------------------
 struct ColorStop {
    float pos;       // in [0,1]  
    Color3 color;
 };
 const int lenStopsPythonLike = 5;
 const int lenStopsGrayscale = 2;
 const int lenStopsBluePurpleRed = 3;
 extern __constant__ ColorStop d_stopsPythonLike[5];
 extern __constant__ ColorStop d_stopsGrayscale[2];
 extern __constant__ ColorStop d_stopsBluePurleRed[3];
 // --------------------------- Functions for handling external constants ---------------------------
 void copyConstantsToDevice();
--- a/src/gui/MainWindow.cpp
+++ b/src/gui/MainWindow.cpp
@@ -6,6 +6,17 @@
 #include "Shader.h"
 // TODO: Delete
 void saveImage2(const char* filename, unsigned char* framebuffer, int width, int height) {  // TODO: Figure out a better way to do this
    std::ofstream imageFile(filename, std::ios::out | std::ios::binary);
    imageFile << "P6\n" << width << " " << height << "\n255\n";
    for (int i = 0; i < width * height * 3; i++) {
        imageFile << framebuffer[i];
    }
    imageFile.close();
 }
 Window::Window(unsigned int w, unsigned int h) {
  this->w = w;
  this->h = h;
@@ -56,6 +67,14 @@ int Window::init(float* data) {
  while (!glfwWindowShouldClose(window)) {
    Window::tick();
  }
  // TODO: Remove this, this was just for ray-casting debug
  // Window::tick();
  // Window::tick();
  // // Save the image
  // unsigned char* pixels = new unsigned char[this->w * this->h * 3];
  // glReadPixels(0, 0, this->w, this->h, GL_RGB, GL_UNSIGNED_BYTE, pixels);
  // saveImage2("output.ppm", pixels, this->w, this->h);
  // delete[] pixels;
  Window::free(data);
  return 0;
--- a/src/illumination/FrameBuffer.cu
+++ b/src/illumination/FrameBuffer.cu
@@ -4,9 +4,9 @@
 __host__ FrameBuffer::FrameBuffer(unsigned int w, unsigned int h) : w(w), h(h) {}
-__device__ void FrameBuffer::writePixel(int x, int y, float r, float g, float b) {
+__device__ void FrameBuffer::writePixel(int x, int y, float r, float g, float b, float a) {
  int i = y * this->w + x;
  // the opengl buffer uses BGRA format; dunno why
-	this->buffer[i] = packUnorm4x8(b, g, r, 1.0f);
+	this->buffer[i] = packUnorm4x8(b, g, r, a);
 }
--- a/src/illumination/FrameBuffer.h
+++ b/src/illumination/FrameBuffer.h
@@ -14,7 +14,7 @@ public:
  unsigned int h;
  __host__ FrameBuffer(unsigned int w, unsigned int h);
-  __device__ void writePixel(int x, int y, float r, float g, float b);
+  __device__ void writePixel(int x, int y, float r, float g, float b, float a);
 };
 #endif // FRAMEBUFFER_H
--- a/src/illumination/Raycaster.cu
+++ b/src/illumination/Raycaster.cu
@@ -5,10 +5,11 @@
 #include "linalg/linalg.h"
 #include "consts.h"
 #include "transferFunction.h"
 #include "cuda_error.h"
-#include "shading.h"
+
 #include <iostream>
-#include "objs/sphere.h"
+#include <curand_kernel.h>
 // TODO: instead of IMAGEWIDTH and IMAGEHEIGHT this should reflect the windowSize;
@@ -20,14 +21,20 @@ __global__ void raycastKernel(float* volumeData, FrameBuffer framebuffer) {
    float accumR = 0.0f;
    float accumG = 0.0f;
    float accumB = 0.0f;
    float accumA = 1.0f * (float)SAMPLES_PER_PIXEL;
    // Initialize random state for ray scattering
    curandState randState;
    curand_init(1234, px + py * IMAGE_WIDTH, 0, &randState);
    // Multiple samples per pixel
    for (int s = 0; s < SAMPLES_PER_PIXEL; s++) {
        // Map to [-1, 1]
-        float u = ((px + 0.5f) / IMAGE_WIDTH ) * 2.0f - 1.0f;
+        float jitterU = (curand_uniform(&randState) - 0.5f) / IMAGE_WIDTH;
-        float v = ((py + 0.5f) / IMAGE_HEIGHT) * 2.0f - 1.0f;
+        float jitterV = (curand_uniform(&randState) - 0.5f) / IMAGE_HEIGHT;
        float u = ((px + 0.5f + jitterU) / IMAGE_WIDTH ) * 2.0f - 1.0f;
        float v = ((py + 0.5f + jitterV) / IMAGE_HEIGHT) * 2.0f - 1.0f;
        // TODO: Move this (and all similar transformation code) to its own separate file
        float tanHalfFov = tanf(fov * 0.5f);
        u *= tanHalfFov;
        v *= tanHalfFov;
@@ -37,95 +44,100 @@ __global__ void raycastKernel(float* volumeData, FrameBuffer framebuffer) {
        d_cameraUp = (cameraRight.cross(d_cameraDir)).normalize();
        Vec3 rayDir = (d_cameraDir + cameraRight*u + d_cameraUp*v).normalize();
-        // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this
+        // Intersect
        float tNear = 0.0f;
        float tFar  = 1e6f;
-        auto intersectAxis = [&](float start, float dirVal) {
+        auto intersectAxis = [&](float start, float dir, float minV, float maxV) {
-            if (fabsf(dirVal) < epsilon) {
+            if (fabsf(dir) < epsilon) {
-                if (start < 0.f || start > 1.f) {
+                // Ray parallel to axis. If outside min..max, no intersection.
-                   tNear = 1e9f;
+                if (start < minV || start > maxV) {
                    tNear = 1e9f;
                    tFar  = -1e9f;
                }
            } else {
-                float t0 = (0.0f - start) / dirVal;
+                float t0 = (minV - start) / dir;
-                float t1 = (1.0f - start) / dirVal;
+                float t1 = (maxV - start) / dir;
-                if (t0>t1) { 
+                if (t0 > t1) {
-                    float tmp=t0; 
+                    float tmp = t0;
-                    t0=t1; 
+                    t0 = t1;
-                    t1=tmp; 
+                    t1 = tmp;
                }
-                if (t0>tNear) tNear = t0;
+                if (t0 > tNear) tNear = t0;
-                if (t1<tFar ) tFar  = t1;
+                if (t1 < tFar ) tFar  = t1;
            }
        };
-        intersectAxis(d_cameraPos.x, rayDir.x);
+        intersectAxis(d_cameraPos.x, rayDir.x, 0.0f, (float)VOLUME_HEIGHT);
-        intersectAxis(d_cameraPos.y, rayDir.y);
+        intersectAxis(d_cameraPos.y, rayDir.y, 0.0f, (float)VOLUME_WIDTH);
-        intersectAxis(d_cameraPos.z, rayDir.z);
+        intersectAxis(d_cameraPos.z, rayDir.z, 0.0f, (float)VOLUME_DEPTH);
-        if (tNear > tFar) continue;  // No intersectionn
+        if (tNear > tFar) {
-        if (tNear < 0.0f) tNear = 0.0f;
+          // No intersection -> Set to brackground color (multiply by SAMPLES_PER_PIXEL because we divide by it later)
          accumR = d_backgroundColor.x * (float)SAMPLES_PER_PIXEL;
          accumG = d_backgroundColor.y * (float)SAMPLES_PER_PIXEL;
          accumB = d_backgroundColor.z * (float)SAMPLES_PER_PIXEL;
          accumA = 1.0f * (float)SAMPLES_PER_PIXEL;
-        float colorR = 0.0f, colorG = 0.0f, colorB = 0.0f;
+        } else {
-        float alphaAccum = 0.0f;
+          if (tNear < 0.0f) tNear = 0.0f;
-        float tCurrent = tNear;
+          float colorR = 0.0f, colorG = 0.0f, colorB = 0.0f;
-        while (tCurrent < tFar && alphaAccum < alphaAcumLimit) {
+          float alphaAccum = 0.0f;
            Point3 pos = d_cameraPos + rayDir * tCurrent;
-            // Convert to volume indices
+          float t = tNear;  // Front to back
-            float fx = pos.x * (VOLUME_WIDTH  - 1);
+          while (t < tFar && alphaAccum < alphaAcumLimit) {
-            float fy = pos.y * (VOLUME_HEIGHT - 1);
+              Point3 pos = d_cameraPos + rayDir * t;
            float fz = pos.z * (VOLUME_DEPTH  - 1);
            int ix = (int)roundf(fx);
            int iy = (int)roundf(fy);
            int iz = (int)roundf(fz);
-            // Sample
+              // Convert to volume indices
-            float density = sampleVolumeNearest(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz);
+              int ix = (int)roundf(pos.x);
              int iy = (int)roundf(pos.y);
              int iz = (int)roundf(pos.z);
-            // Basic transfer function. TODO: Move to a separate file, and then improve
+              // Sample (pick appropriate method based on volume size) TODO: Add a way to pick this in GUI
-            float alphaSample = density * 0.1f;
+              // float density = sampleVolumeNearest(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz);
-            // float alphaSample = 1.0f - expf(-density * 0.1f);
+              float density = sampleVolumeTrilinear(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, pos.x, pos.y, pos.z);
            Color3 baseColor = Color3::init(density, 0.1f*density, 1.f - density);  // TODO: Implement a proper transfer function
-            // If density ~ 0, skip shading
+              // If density ~ 0, skip shading
-            if (density > minAllowedDensity) {
+              if (density > minAllowedDensity) {
                Vec3 grad = computeGradient(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz);
-                Vec3 normal = -grad.normalize();
+                float4 color = transferFunction(density, grad, pos, rayDir);  // This already returns the alpha-weighted color
-                Vec3 lightDir = (d_lightPos - pos).normalize();
+                //Accumulate color, and alpha
-                Vec3 viewDir  = -rayDir.normalize();
+                colorR = (1.0f - alphaAccum) * color.x + colorR;
                colorG = (1.0f - alphaAccum) * color.y + colorG;
                colorB = (1.0f - alphaAccum) * color.z + colorB;
                alphaAccum = (1 - alphaAccum) * color.w + alphaAccum;
-                // Apply Phong
+              }
                Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor);
                // Compose
                colorR     += (1.0f - alphaAccum) * shadedColor.x * alphaSample;
                colorG     += (1.0f - alphaAccum) * shadedColor.y * alphaSample;
                colorB     += (1.0f - alphaAccum) * shadedColor.z * alphaSample;
                alphaAccum += (1.0f - alphaAccum) * alphaSample;
            }
-            tCurrent += stepSize;
+              t += stepSize;
          }
          // Calculate final colour
          accumR += colorR;
          accumG += colorG;
          accumB += colorB;
          accumA += alphaAccum;
          // Blend with background (for transparency)
          float leftover = 1.0 - alphaAccum;
          accumR = accumR + leftover * d_backgroundColor.x;
          accumG = accumG + leftover * d_backgroundColor.y;
          accumB = accumB + leftover * d_backgroundColor.z;
        }
        accumR += colorR;
        accumG += colorG;
        accumB += colorB;
    }
    // Average samples
    accumR /= (float)SAMPLES_PER_PIXEL;
    accumG /= (float)SAMPLES_PER_PIXEL;
    accumB /= (float)SAMPLES_PER_PIXEL;
    accumA /= (float)SAMPLES_PER_PIXEL;
    // Final colour
-    framebuffer.writePixel(px, py, accumR, accumG, accumB);
+    framebuffer.writePixel(px, py, accumR, accumG, accumB, accumA);
    // int fbIndex = (py * IMAGE_WIDTH + px) * 3;
    // framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255);
    // framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255);
    // framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255);
 }
--- a/src/illumination/shading.cu
+++ b/src/illumination/shading.cu
@@ -1,6 +1,5 @@
 #include "shading.h"
 // TODO: Consider wrapping this in a class (?)
 __device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) {
    Vec3 ambient = baseColor * ambientStrength;
    double diff = fmax(normal.dot(lightDir), 0.0);
--- a/src/illumination/transferFunction.cu
+++ b/src/illumination/transferFunction.cu
@@ -0,0 +1,133 @@
 #include "transferFunction.h"
 // Samples the voxel nearest to the given coordinates.
 __device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz) {
    // x <-> height, y <-> width, z <-> depth <--- So far this is the best one
    if (vx < 0) vx = 0;
    if (vy < 0) vy = 0;
    if (vz < 0) vz = 0;
    if (vx >= volH) vx = volH  - 1;
    if (vy >= volW) vy = volW - 1;
    if (vz >= volD) vz = volD  - 1;
    int idx = vz * volW * volH + vx * volW + vy;
    return volumeData[idx];
 }
 // tri-linear interpolation - ready if necessary (but no visible improvement for full volume)
 __device__ float sampleVolumeTrilinear(float* volumeData, const int volW, const int volH, const int volD, float fx, float fy, float fz) {
    int ix = (int)floorf(fx);
    int iy = (int)floorf(fy);
    int iz = (int)floorf(fz);
    // Clamp indices to valid range
    int ix1 = min(ix + 1, volH - 1);
    int iy1 = min(iy + 1, volW - 1);
    int iz1 = min(iz + 1, volD - 1);
    ix = max(ix, 0);
    iy = max(iy, 0);
    iz = max(iz, 0);
    // Compute weights
    float dx = fx - ix;
    float dy = fy - iy;
    float dz = fz - iz;
    // Sample values
    float c00 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy, iz) * (1.0f - dx) +
                sampleVolumeNearest(volumeData, volW, volH, volD, ix1, iy, iz) * dx;
    float c10 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy1, iz) * (1.0f - dx) +
                sampleVolumeNearest(volumeData, volW, volH, volD, ix1, iy1, iz) * dx;
    float c01 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy, iz1) * (1.0f - dx) +
                sampleVolumeNearest(volumeData, volW, volH, volD, ix1, iy, iz1) * dx;
    float c11 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy1, iz1) * (1.0f - dx) +
                sampleVolumeNearest(volumeData, volW, volH, volD, ix1, iy1, iz1) * dx;
    float c0 = c00 * (1.0f - dy) + c10 * dy;
    float c1 = c01 * (1.0f - dy) + c11 * dy;
    return c0 * (1.0f - dz) + c1 * dz;
 }
 __device__ float opacityFromGradient(const Vec3 &grad) {
    float gradMag = grad.length();
    float k = 1e-6f;               // tweak (the smaller the value, the less opacity)  // TODO: Add a slider for this
    float alpha = 1.0f - expf(-k * gradMag);
    return alpha;
 }
 __device__ float opacitySigmoid(float val) {
    return 1.0f / (1.0f + expf(-250.f * (val - 0.5f)));  // TODO: Parametrize and add sliders
 }
 __device__ Color3 colorMap(float normalizedValues, const ColorStop stops[], int N) {
    // clamp to [0,1]
    normalizedValues = fminf(fmaxf(normalizedValues, 0.0f), 1.0f);
    // N stops => N-1 intervals
    for (int i = 0; i < N - 1; ++i) {
        float start = stops[i].pos;
        float end   = stops[i + 1].pos;
        if (normalizedValues >= start && normalizedValues <= end) {
            float localT = (normalizedValues - start) / (end - start);
            return interpolate(stops[i].color, stops[i + 1].color, localT);
        }
    }
    // fallback if something goes out of [0,1] or numerical issues
    return stops[N - 1].color;
 }
 // Transfer function
 __device__ float4 transferFunction(float density, const Vec3& grad, const Point3& pos, const Vec3& rayDir) {
  // --------------------------- Sample the volume ---------------------------
  // TODO: Somehow pick if to use temp of speed normalization ... or pass extremas as params.
  float normDensity = (density - MIN_TEMP) / (MAX_TEMP - MIN_TEMP);
  // float normDensity = (density - MIN_SPEED) / (MAX_SPEED - MIN_SPEED);
  normDensity = clamp(normDensity, 0.0f, 1.0f);
  // --------------------------- Map density to color ---------------------------
  // TODO: Add a way to pick stops here
  Color3 baseColor = colorMap(normDensity, d_stopsPythonLike, lenStopsPythonLike);
  // TODO: Add a way to pick different function for alpha
  float alpha = opacityFromGradient(grad);
  // alpha = 0.1f;
  alpha = opacitySigmoid(normDensity);
  // alpha = (1.0f - fabs(grad.normalize().dot(rayDir.normalize()))) * 0.8f + 0.2f;
  float alphaSample = density * alpha * 0.1;
  // --------------------------- Shading ---------------------------
  // Apply Phong
  Vec3 normal = -grad.normalize();
  Vec3 lightDir = (d_lightPos - pos).normalize();
  Vec3 viewDir  = -rayDir.normalize();
  Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor);
  // Compose
  float4 result;
  result.x = shadedColor.x * alphaSample;
  result.y = shadedColor.y * alphaSample;
  result.z = shadedColor.z * alphaSample;
  result.w = alpha;
  // --------------------------- Silhouettes ---------------------------
  // TODO: This is the black silhouette, technically if we are doing alpha based on gradient then it's kind of redundant (?) ... but could also be used for even more pronounced edges
  // TODO: Add a way to adjust the treshold (0.2f atm)
  // TODO: I don't think we should literally be doing this => use gradient based opacity => delete the below if-statement
  // if (fabs(grad.normalize().dot(rayDir.normalize())) < 0.2f) {
  //   result.x = 0.0f;
  //   result.y = 0.0f;
  //   result.z = 0.0f;
  //   result.w = 1.0f;
  // }
  return result;
 }
--- a/src/illumination/transferFunction.h
+++ b/src/illumination/transferFunction.h
@@ -0,0 +1,20 @@
 #ifndef TRANSFER_FUNCTION_H
 #define TRANSFER_FUNCTION_H
 #include "linalg/linalg.h"
 #include "consts.h"
 #include "shading.h"
 // --------------------------- Color mapping ---------------------------
 // --------------------------- Volume sampling ---------------------------
 __device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz);
 __device__ float sampleVolumeTrilinear(float* volumeData, const int volW, const int volH, const int volD, float fx, float fy, float fz);
 // --------------------------- Transfer function ---------------------------
 __device__ float4 transferFunction(float density, const Vec3& grad, const Point3& pos, const Vec3& rayDir);
 #endif // TRANSFER_FUNCTION_H
--- a/src/linalg/mat.cu
+++ b/src/linalg/mat.cu
@@ -7,22 +7,21 @@ using namespace std;
 __device__ Vec3 computeGradient(float* volumeData, const int volW, const int volH, const int volD, int x, int y, int z) {
    // Finite difference for partial derivatives.
    // For boundary voxels - clamp to the boundary. 
    // Normal should point from higher to lower intensities
    int xm = max(x - 1, 0);
-    int xp = min(x + 1, volW  - 1);
+    int xp = min(x + 1, volH  - 1);
    int ym = max(y - 1, 0);
-    int yp = min(y + 1, volH - 1);
+    int yp = min(y + 1, volW - 1);
    int zm = max(z - 1, 0);
    int zp = min(z + 1, volD  - 1);
-    // Note: Assuming data is linearized (idx = z*w*h + y*w + x) TODO: Unlinearize if data not linear
+    // Note: Assuming data is linearized (idx = z * W * H + x * W + y;) TODO: Unlinearize if data not linear
-    float gx = volumeData[z  * volW * volH + y  * volW + xp]
+    float gx = volumeData[z  * volW * volH + xp  * volW + y]
-             - volumeData[z  * volW * volH + y  * volW + xm];
+             - volumeData[z  * volW * volH + xm  * volW + y];
-    float gy = volumeData[z  * volW * volH + yp * volW + x ]
+    float gy = volumeData[z  * volW * volH + x  * volW + yp]
-             - volumeData[z  * volW * volH + ym * volW + x ];
+              - volumeData[z  * volW * volH + x  * volW + ym];
-    float gz = volumeData[zp * volW * volH + y  * volW + x ]
+    float gz = volumeData[zp * volW * volH + x  * volW + y ]
-             - volumeData[zm * volW * volH + y  * volW + x ];
+              - volumeData[zm * volW * volH + x  * volW + y ];
    return Vec3::init(gx, gy, gz);
 };
@@ -44,3 +43,13 @@ __device__ unsigned int packUnorm4x8(float r, float g, float b, float a) {
 	return u.out;
 }
 // Clamp a value between a min and max value
 __device__ float clamp(float value, float min, float max) {
  return fmaxf(min, fminf(value, max));
 }
 // Normalize a float to the range [0, 1]
 __device__ float normalize(float value, float min, float max) {
  return (value - min) / (max - min);
 }
--- a/src/linalg/mat.h
+++ b/src/linalg/mat.h
@@ -2,9 +2,21 @@
 #define MAT_H
 #include "vec.h"
 #include "consts.h"
 __device__ Vec3 computeGradient(float* volumeData, const int volW, const int volH, const int volD, int x, int y, int z);
 __device__ unsigned int packUnorm4x8(float r, float g, float b, float a);
 __device__ float clamp(float value, float min, float max);
 __device__ float normalize(float value, float min, float max);
 // Interpolate between two values
 template <typename T>
 __device__ T interpolate(T start, T end, float t) {
  return start + t * (end - start);
 }
 #endif // MAT_H
--- a/src/linalg/vec.h
+++ b/src/linalg/vec.h
@@ -20,10 +20,17 @@ struct Vec3 {  // TODO: Maybe make this into a class ... maybe
    __host__ __device__ Vec3 operator*(double b) const { return Vec3::init(x * b, y * b, z * b); }
    __host__ __device__ Vec3& operator*=(double b) { x *= b; y *= b; z *= b; return *this; }
    __host__ __device__ Vec3 operator*(float b) const { return Vec3::init(x * b, y * b, z * b); }
    __host__ __device__ Vec3& operator*=(float b) { x *= b; y *= b; z *= b; return *this; }
    friend __host__ __device__ Vec3 operator*(float a, const Vec3& b) {
        return Vec3::init(a * b.x, a * b.y, a * b.z);
    }
    __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
    __host__ __device__ Vec3 cross(const Vec3& b) const { return Vec3::init(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); }
    __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3::init(x / len, y / len, z / len); }
    __host__ __device__ double length() const { return sqrt(x * x + y * y + z * z); }
 };
 typedef Vec3 Point3;
--- a/src/main.cu
+++ b/src/main.cu
@@ -10,6 +10,7 @@
 #include <iostream>
 #include "linalg/linalg.h" 
 #include <vector>
 #include <numeric>
 static float* d_volume = nullptr;
@@ -21,6 +22,7 @@ static float* d_volume = nullptr;
 void getTemperature(std::vector<float>& temperatureData, int idx = 0) {
    std::string path = "data/trimmed";
    // std::string path = "data";
    std::string variable = "T";
    DataReader dataReader(path, variable);
    size_t dataLength = dataReader.fileLength(idx);
@@ -30,6 +32,7 @@ void getTemperature(std::vector<float>& temperatureData, int idx = 0) {
 void getSpeed(std::vector<float>& speedData, int idx = 0) {
    std::string path = "data/trimmed";
    // std::string path = "data";
    std::string varU = "U";
    std::string varV = "V";
@@ -52,27 +55,55 @@ void getSpeed(std::vector<float>& speedData, int idx = 0) {
 int main() {
  std::vector<float> data;
-  // getTemperature(data);
+  getTemperature(data, 0);
-  getSpeed(data);
+  // getSpeed(data, 294);
  std::cout << "DATA size: " << data.size() << std::endl;
-  // TODO: Eveontually remove debug below (i.e., eliminate for-loop etc.)
+  // TODO: Eventually, we should not need to load the volume like this
  // Generate debug volume data
  float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH];
-  // generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH);
+  for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {
-  for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {  // TODO: This is technically an unnecessary artifact of the old code taking in a float* instead of a std::vector
+    hostVolume[i] = data[i + 0*VOLUME_DEPTH*VOLUME_HEIGHT*VOLUME_WIDTH];
-    // Discard temperatures above a small star (supposedly, missing temperature values)
+    // Discard missing values
-    hostVolume[i] = data[i];
+    if (data[i + 0*VOLUME_DEPTH*VOLUME_HEIGHT*VOLUME_WIDTH] + epsilon >= infty) hostVolume[i] = -infty;
    if (data[i] + epsilon >= infty) hostVolume[i] = 0.0f;
  }
-  // Min-max normalization
+  // Reverse the order of hostVolume - why is it upside down anyway?
-  float minVal = *std::min_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
+  for (int i = 0; i < VOLUME_WIDTH; i++) {
-  float maxVal = *std::max_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
+    for (int j = 0; j < VOLUME_HEIGHT; j++) {
-  for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {
+      for (int k = 0; k < VOLUME_DEPTH/2; k++) {
-    hostVolume[i] = (hostVolume[i] - minVal) / (maxVal - minVal);
+        float temp = hostVolume[i + j*VOLUME_WIDTH + k*VOLUME_WIDTH*VOLUME_HEIGHT];
        hostVolume[i + j*VOLUME_WIDTH + k*VOLUME_WIDTH*VOLUME_HEIGHT] = hostVolume[i + j*VOLUME_WIDTH + (VOLUME_DEPTH - 1 - k)*VOLUME_WIDTH*VOLUME_HEIGHT];
        hostVolume[i + j*VOLUME_WIDTH + (VOLUME_DEPTH - 1 - k)*VOLUME_WIDTH*VOLUME_HEIGHT] = temp;
      }
    }
  }
  // Store the half-way up slice data into a file TODO: Remove this debug
  std::ofstream myfile;
  myfile.open("halfwayup.txt");
  for (int i = 0; i < VOLUME_WIDTH; i++) {
    for (int j = 0; j < VOLUME_HEIGHT; j++) {
      myfile << hostVolume[i + j*VOLUME_WIDTH + VOLUME_DEPTH/2*VOLUME_WIDTH*VOLUME_HEIGHT] << " ";
    }
    myfile << std::endl;
  }
  myfile.close();
  // Print min, max, avg., and median values TODO: Remove this debug
  float minVal = *std::min_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH, [](float a, float b) {
    if (a <= epsilon) return false;
    if (b <= epsilon) return true;
    return a < b;
  });
  float maxVal = *std::max_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
  std::cout << "minVal: " << minVal << " maxVal: " << maxVal << std::endl;
  // // print min, max, avg., and median values <--- the code actually does not work when this snippet is enabled so probably TODO: Delete this later
  // std::sort(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
  // float sum = std::accumulate(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH, 0.0f);
  // float avg = sum / (VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
  // std::cout << "min: " << hostVolume[0] << " max: " << hostVolume[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH - 1] << " avg: " << avg << " median: " << hostVolume[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH / 2] << std::endl;
  // Allocate + copy data to GPU
  size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH;
  cudaMalloc((void**)&d_volume, volumeSize);
--- a/src/objs/sphere.h
+++ b/src/objs/sphere.h
@@ -79,16 +79,3 @@ __host__ void generateVolume(float* volumeData, int volW, int volH, int volD) {
        }
    }
 }
 // Samples the voxel nearest to the given coordinates. TODO: Can be re-used in other places so move
 __device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz) {
    if (vx < 0) vx = 0;
    if (vy < 0) vy = 0;
    if (vz < 0) vz = 0;
    if (vx >= volW) vx = volW  - 1;
    if (vy >= volH) vy = volH - 1;
    if (vz >= volD) vz = volD  - 1;
    int idx = vz * volW * volH + vy * volD + vx;
    return volumeData[idx];
 }