#include "Raycaster.h" #include "cuda_runtime.h" #include "device_launch_parameters.h" #include "linalg/linalg.h" #include "consts.h" #include "cuda_error.h" #include "shading.h" #include #include "objs/sphere.h" // Samples the voxel nearest to the given coordinates. TODO: Can be re-used in other places so move __device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz) { if (vx < 0) vx = 0; if (vy < 0) vy = 0; if (vz < 0) vz = 0; if (vx >= volW) vx = volW - 1; if (vy >= volH) vy = volH - 1; if (vz >= volD) vz = volD - 1; int idx = vz * volW * volH + vy * volD + vx; return volumeData[idx]; } // tri-linear interpolation - ready if necessary (but no visible improvement for full volume) __device__ float sampleVolumeTrilinear(float* volumeData, const int volW, const int volH, const int volD, float fx, float fy, float fz) { int ix = (int)fx; int iy = (int)fy; int iz = (int)fz; float dx = fx - ix; float dy = fy - iy; float dz = fz - iz; float c00 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy, iz) * (1.0f - dx) + sampleVolumeNearest(volumeData, volW, volH, volD, ix + 1, iy, iz) * dx; float c10 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy + 1, iz) * (1.0f - dx) + sampleVolumeNearest(volumeData, volW, volH, volD, ix + 1, iy + 1, iz) * dx; float c01 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy, iz + 1) * (1.0f - dx) + sampleVolumeNearest(volumeData, volW, volH, volD, ix + 1, iy, iz + 1) * dx; float c11 = sampleVolumeNearest(volumeData, volW, volH, volD, ix, iy + 1, iz + 1) * (1.0f - dx) + sampleVolumeNearest(volumeData, volW, volH, volD, ix + 1, iy + 1, iz + 1) * dx; float c0 = c00 * (1.0f - dy) + c10 * dy; float c1 = c01 * (1.0f - dy) + c11 * dy; return c0 * (1.0f - dz) + c1 * dz; } // Transfer function __device__ float4 transferFunction(float density, Vec3 grad, Point3 pos, Vec3 rayDir) { float4 result; // Basic transfer function. TODO: Move to a separate file, and then improve float alphaSample = density * 0.1f; // result.w = 1.0f - expf(-density * 0.1f); Color3 baseColor = Color3::init(density, 0.1f*density, 1.f - density); // TODO: Implement a proper transfer function Vec3 normal = -grad.normalize(); Vec3 lightDir = (d_lightPos - pos).normalize(); Vec3 viewDir = -rayDir.normalize(); // Apply Phong Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor); // Compose result.x = (1.0f - alphaSample) * shadedColor.x * alphaSample; result.y = (1.0f - alphaSample) * shadedColor.y * alphaSample; result.z = (1.0f - alphaSample) * shadedColor.z * alphaSample; result.w = (1.0f - alphaSample) * alphaSample; // TODO: Add silhouette - Take gradient of volume dot with view direction (if small then this is a silhouette) if (grad.dot(viewDir) < epsilon / 100000.0f) { result.x = 0.0f; result.y = 0.0f; result.z = 0.0f; result.w = 1.0f; } return result; } // TODO: instead of IMAGEWIDTH and IMAGEHEIGHT this should reflect the windowSize; __global__ void raycastKernel(float* volumeData, FrameBuffer framebuffer) { int px = blockIdx.x * blockDim.x + threadIdx.x; int py = blockIdx.y * blockDim.y + threadIdx.y; if (px >= IMAGE_WIDTH || py >= IMAGE_HEIGHT) return; float accumR = 0.0f; float accumG = 0.0f; float accumB = 0.0f; // Multiple samples per pixel for (int s = 0; s < SAMPLES_PER_PIXEL; s++) { // Map to [-1, 1] float u = ((px + 0.5f) / IMAGE_WIDTH ) * 2.0f - 1.0f; float v = ((py + 0.5f) / IMAGE_HEIGHT) * 2.0f - 1.0f; // TODO: Move this (and all similar transformation code) to its own separate file float tanHalfFov = tanf(fov * 0.5f); u *= tanHalfFov; v *= tanHalfFov; // Find ray direction Vec3 cameraRight = (d_cameraDir.cross(d_cameraUp)).normalize(); d_cameraUp = (cameraRight.cross(d_cameraDir)).normalize(); Vec3 rayDir = (d_cameraDir + cameraRight*u + d_cameraUp*v).normalize(); // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this float tNear = 0.0f; float tFar = 1e6f; auto intersectAxis = [&](float start, float dir, float minV, float maxV) { if (fabsf(dir) < epsilon) { // Ray parallel to axis. If outside min..max, no intersection. if (start < minV || start > maxV) { tNear = 1e9f; tFar = -1e9f; } } else { float t0 = (minV - start) / dir; float t1 = (maxV - start) / dir; if (t0 > t1) { float tmp = t0; t0 = t1; t1 = tmp; } if (t0 > tNear) tNear = t0; if (t1 < tFar ) tFar = t1; } }; intersectAxis(d_cameraPos.x, rayDir.x, 0.0f, (float)VOLUME_WIDTH); intersectAxis(d_cameraPos.y, rayDir.y, 0.0f, (float)VOLUME_HEIGHT); intersectAxis(d_cameraPos.z, rayDir.z, 0.0f, (float)VOLUME_DEPTH); if (tNear > tFar){ // No intersectionn accumR = 0.9f; accumG = 0.9f; accumB = 0.9f; } else { if (tNear < 0.0f) tNear = 0.0f; float colorR = 0.0f, colorG = 0.0f, colorB = 0.0f; float alphaAccum = 0.0f; float tCurrent = tNear; while (tCurrent < tFar && alphaAccum < alphaAcumLimit) { Point3 pos = d_cameraPos + rayDir * tCurrent; // Convert to volume indices // float fx = pos.x * (VOLUME_WIDTH - 1); // float fy = pos.y * (VOLUME_HEIGHT - 1); // float fz = pos.z * (VOLUME_DEPTH - 1); int ix = (int)roundf(pos.x); int iy = (int)roundf(pos.y); int iz = (int)roundf(pos.z); // Sample (pick appropriate method based on volume size) float density = sampleVolumeNearest(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz); // float density = sampleVolumeTrilinear(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, pos.x, pos.y, pos.z); // If density ~ 0, skip shading if (density > minAllowedDensity) { Vec3 grad = computeGradient(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz); float4 color = transferFunction(density, grad, pos, rayDir); colorR += color.x; colorG += color.y; colorB += color.z; alphaAccum += color.w; } tCurrent += stepSize; } accumR += colorR; accumG += colorG; accumB += colorB; // float leftover = 1.0f - alphaAccum; // accumR = accumR + leftover * 0.9f; // accumG = accumG + leftover * 0.9f; // accumB = accumB + leftover * 0.9f; } } // Average samples accumR /= (float)SAMPLES_PER_PIXEL; accumG /= (float)SAMPLES_PER_PIXEL; accumB /= (float)SAMPLES_PER_PIXEL; // Final colour framebuffer.writePixel(px, py, accumR, accumG, accumB); // int fbIndex = (py * IMAGE_WIDTH + px) * 3; // framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255); // framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255); // framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255); } Raycaster::Raycaster(cudaGraphicsResource_t resources, int w, int h, float* data) { this->resources = resources; this->w = w; this->h = h; this->fb = new FrameBuffer(w, h); this->data = data; // camera_info = CameraInfo(Vec3(0.0f, 0.0f, 0.0f), Vec3(0.0f, 0.0f, 0.0f), 90.0f, (float) w, (float) h); // d_camera = thrust::device_new(); check_cuda_errors(cudaDeviceSynchronize()); } void Raycaster::render() { check_cuda_errors(cudaGraphicsMapResources(1, &this->resources)); check_cuda_errors(cudaGraphicsResourceGetMappedPointer((void**)&(this->fb->buffer), &(this->fb->buffer_size), resources)); // FIXME: might not be the best parallelization configuraiton int tx = 16; int ty = 16; dim3 threadSize(this->w / tx + 1, this->h / ty + 1); dim3 blockSize(tx, ty); // TODO: pass camera info at some point // frame buffer is implicitly copied to the device each frame raycastKernel<<>> (this->data, *this->fb); check_cuda_errors(cudaGetLastError()); check_cuda_errors(cudaDeviceSynchronize()); check_cuda_errors(cudaGraphicsUnmapResources(1, &this->resources)); } void Raycaster::resize(int w, int h) { this->w = w; this->h = h; delete this->fb; this->fb = new FrameBuffer(w, h); // TODO: should be globals probably int tx = 8; int ty = 8; dim3 blocks(w / tx + 1, h / ty + 1); dim3 threads(tx, ty); check_cuda_errors(cudaDeviceSynchronize()); }