From ae599a3659366d13a2ac2146e104939af9a881b0 Mon Sep 17 00:00:00 2001 From: Martin Opat Date: Mon, 23 Dec 2024 22:31:36 +0100 Subject: [PATCH] Basic exampe of phong + ray-casting dvr finally works ... though unfortunatelly, loooots of TODOs still --- src/main.cu | 246 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 184 insertions(+), 62 deletions(-) diff --git a/src/main.cu b/src/main.cu index fafaec8..1b4ac0e 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,102 +1,224 @@ -#include -#include #include +#include #include +#include -#include "linalg/linalg.h" +#include "linalg/linalg.h" #include "objs/sphere.h" #include "img/handler.h" -#define WIDTH 3840 -#define HEIGHT 2160 -#define SAMPLES_PER_PIXEL 8 +// TODO: Eventually, export this into a better place (i.e., such that we do not have to recompile every time we change a parameter) +static const int VOLUME_WIDTH = 1024; +static const int VOLUME_HEIGHT = 1024; +static const int VOLUME_DEPTH = 1024; + +static const int IMAGE_WIDTH = 2560; +static const int IMAGE_HEIGHT = 1440; + +static const int SAMPLES_PER_PIXEL = 8; // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map) -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; +__constant__ int d_volumeWidth; +__constant__ int d_volumeHeight; +__constant__ int d_volumeDepth; + +static float* d_volume = nullptr; // TODO: Adjust according to how data is loaded + +// ---------------------------------------------------------------------------------------------------- +__device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) { + double ambientStrength = 0.3; + double diffuseStrength = 0.8; double specularStrength = 0.5; - int shininess = 64; + int shininess = 32; - Vec3 ambient = color * ambientStrength; - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); + Vec3 ambient = baseColor * ambientStrength; + double diff = fmax(normal.dot(lightDir), 0.0); + Vec3 diffuse = baseColor * (diffuseStrength * diff); Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); + double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess); Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); return ambient + diffuse + specular; } -__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; +// Raycast + phong +__global__ void raycastKernel(float* volumeData, unsigned char* framebuffer, int imageWidth, int imageHeight, Vec3 cameraPos, Vec3 cameraDir, Vec3 cameraUp, float fov, float stepSize, Vec3 lightPos) { + int px = blockIdx.x * blockDim.x + threadIdx.x; + int py = blockIdx.y * blockDim.y + threadIdx.y; + if (px >= imageWidth || py >= imageHeight) return; - int pixelIndex = (y * WIDTH + x) * 3; - Vec3 rayOrigin(0, 0, 0); - Vec3 colCum(0, 0, 0); + float accumR = 0.0f; + float accumG = 0.0f; + float accumB = 0.0f; - double spp = static_cast(SAMPLES_PER_PIXEL); - for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { - double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH; - double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT; - Vec3 rayDir(u, v, 1.0); - rayDir = rayDir.normalize(); + // Multiple samples per pixel + for (int s = 0; s < SAMPLES_PER_PIXEL; s++) { + // Map to [-1, 1] + float u = ((px + 0.5f) / imageWidth ) * 2.0f - 1.0f; + float v = ((py + 0.5f) / imageHeight) * 2.0f - 1.0f; - for (int i = 0; i < numSpheres; ++i) { - double t; - if (spheres[i].intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - spheres[i].center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; + // TODO: Move this (and all similar transformation code) to its own separate file + float tanHalfFov = tanf(fov * 0.5f); + u *= tanHalfFov; + v *= tanHalfFov; - colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); + // Find ray direction + Vec3 cameraRight = (cameraDir.cross(cameraUp)).normalize(); + cameraUp = (cameraRight.cross(cameraDir)).normalize(); + Vec3 rayDir = (cameraDir + cameraRight*u + cameraUp*v).normalize(); + + // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this + float tNear = 0.f; // TODO: These are also linear transforms, so move away + float tFar = 1e6f; + + auto intersectAxis = [&](float start, float dirVal) { + if (fabsf(dirVal) < 1e-10f) { // TDDO: Add a constant - epsilon + if (start < 0.f || start > 1.f) { + tNear = 1e9f; + tFar = -1e9f; + } + } else { + float t0 = (0.0f - start) / dirVal; // TODO: 0.0 and 1.0 depend on the box size -> move to a constant + float t1 = (1.0f - start) / dirVal; + if (t0>t1) { + float tmp=t0; + t0=t1; + t1=tmp; + } + if (t0>tNear) tNear = t0; + if (t1 tFar) continue; // No intersectionn + if (tNear < 0.0f) tNear = 0.0f; + + float colorR = 0.f, colorG = 0.f, colorB = 0.f; + float alphaAccum = 0.f; + + float tCurrent = tNear; + while (tCurrent < tFar && alphaAccum < 0.99f) { + Vec3 pos = cameraPos + rayDir * tCurrent; + + // Convert to volume indices + float fx = pos.x * (d_volumeWidth - 1); + float fy = pos.y * (d_volumeHeight - 1); + float fz = pos.z * (d_volumeDepth - 1); + int ix = (int)roundf(fx); + int iy = (int)roundf(fy); + int iz = (int)roundf(fz); + + // Sample + float density = sampleVolumeNearest(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); + + // Basic transfer function. TODO: Move to a separate file, and then improve + float alphaSample = density * 0.05f; + Vec3 baseColor = Vec3(density, 0.1f*density, 1.f - density); + + // If density ~ 0, skip shading + if (density > 0.001f) { + Vec3 grad = computeGradient(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); + Vec3 normal = -grad.normalize(); + + Vec3 lightDir = (lightPos - pos).normalize(); + Vec3 viewDir = -rayDir.normalize(); + + // Apply Phong + Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor); + + // Compose + colorR += (1.0f - alphaAccum) * shadedColor.x * alphaSample; + colorG += (1.0f - alphaAccum) * shadedColor.y * alphaSample; + colorB += (1.0f - alphaAccum) * shadedColor.z * alphaSample; + alphaAccum += (1.0f - alphaAccum) * alphaSample; + } + + tCurrent += stepSize; } + + accumR += colorR; + accumG += colorG; + accumB += colorB; } - // Average color across all samples - Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); + // Average samples + accumR /= (float)SAMPLES_PER_PIXEL; + accumG /= (float)SAMPLES_PER_PIXEL; + accumB /= (float)SAMPLES_PER_PIXEL; - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); + // Final colour + int fbIndex = (py * imageWidth + px) * 3; + framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255); + framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255); + framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255); } +int main(int argc, char** argv) { + // Generate debug volume data + float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH]; + generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH); + // Allocate + copy data to GPU + size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; + cudaMalloc((void**)&d_volume, volumeSize); + cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice); -int main() { - Sphere spheres[] = { - { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere - { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere - { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere - }; - int numSpheres = sizeof(spheres) / sizeof(Sphere); - Vec3 lightPos(5, 5, 0); + int w = VOLUME_WIDTH, h = VOLUME_HEIGHT, d = VOLUME_DEPTH; + cudaMemcpyToSymbol(d_volumeWidth, &w, sizeof(int)); + cudaMemcpyToSymbol(d_volumeHeight, &h, sizeof(int)); + cudaMemcpyToSymbol(d_volumeDepth, &d, sizeof(int)); + // Allocate framebuffer unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - Sphere* d_spheres; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); - cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); + size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char); + cudaMalloc((void**)&d_framebuffer, fbSize); + cudaMemset(d_framebuffer, 0, fbSize); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); + // Camera and Light + Vec3 cameraPos(0.5, 0.5, -2.0); + Vec3 cameraDir(0.0, 0.0, 1.0); + Vec3 cameraUp(0.0, 1.0, 0.0); + float fov = 60.0f * (M_PI / 180.0f); + float stepSize = 0.002f; + Vec3 lightPos(1.5, 2.0, -1.0); + + // Launch kernel + dim3 blockSize(16, 16); + dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x, + (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y); + + raycastKernel<<>>( + d_volume, + d_framebuffer, + IMAGE_WIDTH, + IMAGE_HEIGHT, + cameraPos, + cameraDir.normalize(), + cameraUp.normalize(), + fov, + stepSize, + lightPos + ); cudaDeviceSynchronize(); - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT); + // Copy framebuffer back to CPU + unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3]; + cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost); + // Export image + saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT); + + // Cleanup + delete[] hostVolume; + delete[] hostFramebuffer; + cudaFree(d_volume); cudaFree(d_framebuffer); - cudaFree(d_spheres); - delete[] h_framebuffer; - std::cout << "High-resolution image saved as output.ppm" << std::endl; + std::cout << "Phong-DVR rendering done. Image saved to output_phong_dvr.ppm" << std::endl; return 0; }