commit 34971311cb6acd8fb1b13128c4676735b16bd698 Author: Martin Opat Date: Thu Nov 14 12:05:54 2024 +0100 Initiall commit diff --git a/a.out b/a.out new file mode 100755 index 0000000..3cf0e96 Binary files /dev/null and b/a.out differ diff --git a/cuda_install_guide.md b/cuda_install_guide.md new file mode 100644 index 0000000..9faa0ca --- /dev/null +++ b/cuda_install_guide.md @@ -0,0 +1,6 @@ +# Assuminng NVIDIA drivers present + +wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda_12.5.1_555.42.06_linux.run +chmod +x cuda_12.5.1_555.42.06_linux.run +sudo ./cuda_12.5.1_555.42.06_linux.run + diff --git a/hello_world b/hello_world new file mode 100755 index 0000000..bb85e75 Binary files /dev/null and b/hello_world differ diff --git a/hello_world.cu b/hello_world.cu new file mode 100644 index 0000000..d5941ee --- /dev/null +++ b/hello_world.cu @@ -0,0 +1,27 @@ +#include +#include + +#define cudaCheckError() { \ + cudaError_t e = cudaGetLastError(); \ + if (e != cudaSuccess) { \ + printf("CUDA error %s:%d: %s\n", __FILE__, __LINE__, \ + cudaGetErrorString(e)); \ + exit(EXIT_FAILURE); \ + } \ +} + +__global__ void hello_from_gpu() { + printf("Hello from GPU!\n"); +} + +int main() { + hello_from_gpu<<<1, 1>>>(); + cudaCheckError(); + + cudaDeviceSynchronize(); + cudaCheckError(); + + // Reset device + cudaDeviceReset(); + return 0; +} diff --git a/output.ppm b/output.ppm new file mode 100644 index 0000000..1d7b780 Binary files /dev/null and b/output.ppm differ diff --git a/test.cu b/test.cu new file mode 100644 index 0000000..b661dad --- /dev/null +++ b/test.cu @@ -0,0 +1,128 @@ +#include +#include +#include +#include + +#define WIDTH 800 +#define HEIGHT 600 + +struct Vec3 { + double x, y, z; + + __host__ __device__ Vec3() : x(0), y(0), z(0) {} + __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {} + + __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); } + __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); } + __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); } + __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); } + __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; } + __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); } +}; + +// Simple Phong lighting components +struct Sphere { + Vec3 center; + double radius; + Vec3 color; + + __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const { + Vec3 oc = rayOrigin - center; + double b = oc.dot(rayDir); + double c = oc.dot(oc) - radius * radius; + double h = b * b - c; + if (h < 0.0) return false; + h = sqrt(h); + t = -b - h; + return true; + } +}; + +__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { + double ambientStrength = 0.1; + double diffuseStrength = 0.8; + double specularStrength = 0.5; + int shininess = 32; + + // Ambient + Vec3 ambient = color * ambientStrength; + + // Diffuse + double diff = max(normal.dot(lightDir), 0.0); + Vec3 diffuse = color * (diffuseStrength * diff); + + // Specular + Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); + double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); + Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); + + return ambient + diffuse + specular; +} + +__global__ void renderKernel(unsigned char* framebuffer, Sphere sphere, Vec3 lightPos) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= WIDTH || y >= HEIGHT) return; + + int pixelIndex = (y * WIDTH + x) * 3; + + Vec3 rayOrigin(0, 0, 0); + Vec3 rayDir((x - WIDTH / 2.0) / WIDTH, (y - HEIGHT / 2.0) / HEIGHT, 1.0); + rayDir = rayDir.normalize(); + + double t; + if (sphere.intersect(rayOrigin, rayDir, t)) { + Vec3 hitPoint = rayOrigin + rayDir * t; + Vec3 normal = (hitPoint - sphere.center).normalize(); + Vec3 lightDir = (lightPos - hitPoint).normalize(); + Vec3 viewDir = -rayDir; + + Vec3 color = phongShading(hitPoint, normal, lightDir, viewDir, sphere.color); + + framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); + framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); + framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); + } else { + framebuffer[pixelIndex] = 0; + framebuffer[pixelIndex + 1] = 0; + framebuffer[pixelIndex + 2] = 0; + } +} + +void saveImage(const char* filename, unsigned char* framebuffer) { + std::ofstream imageFile(filename, std::ios::out | std::ios::binary); + imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n"; + for (int i = 0; i < WIDTH * HEIGHT * 3; i++) { + imageFile << framebuffer[i]; + } + imageFile.close(); +} + +int main() { + // Initialize sphere and light source + Sphere sphere = { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }; // Red sphere + Vec3 lightPos(5, 5, 0); + + // Allocate framebuffer on device and host + unsigned char* d_framebuffer; + unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; + cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); + + // Launch + dim3 threadsPerBlock(16, 16); + dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, + (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); + renderKernel<<>>(d_framebuffer, sphere, lightPos); + cudaDeviceSynchronize(); + + // Copy result back to host and save + cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); + saveImage("output.ppm", h_framebuffer); + + // Clean up + cudaFree(d_framebuffer); + delete[] h_framebuffer; + + std::cout << "Image saved as output.ppm" << std::endl; + return 0; +} diff --git a/test_heavy.cu b/test_heavy.cu new file mode 100644 index 0000000..8e9cd2c --- /dev/null +++ b/test_heavy.cu @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include + +#define WIDTH 3840 +#define HEIGHT 2160 +#define SAMPLES_PER_PIXEL 8 + +struct Vec3 { + double x, y, z; + + __host__ __device__ Vec3() : x(0), y(0), z(0) {} + __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {} + + __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); } + __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); } + __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); } + __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); } + __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; } + __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); } +}; + +struct Sphere { + Vec3 center; + double radius; + Vec3 color; + + __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const { + Vec3 oc = rayOrigin - center; + double b = oc.dot(rayDir); + double c = oc.dot(oc) - radius * radius; + double h = b * b - c; + if (h < 0.0) return false; + h = sqrt(h); + t = -b - h; + return true; + } +}; + +__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { + double ambientStrength = 0.1; + double diffuseStrength = 0.8; + double specularStrength = 0.5; + int shininess = 64; + + Vec3 ambient = color * ambientStrength; + double diff = max(normal.dot(lightDir), 0.0); + Vec3 diffuse = color * (diffuseStrength * diff); + + Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); + double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); + Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); + + return ambient + diffuse + specular; +} + +__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x >= WIDTH || y >= HEIGHT) return; + + int pixelIndex = (y * WIDTH + x) * 3; + Vec3 rayOrigin(0, 0, 0); + Vec3 colCum(0, 0, 0); + + for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { + double u = (x + (sample / static_cast(SAMPLES_PER_PIXEL)) - WIDTH / 2.0) / WIDTH; + double v = (y + (sample / static_cast(SAMPLES_PER_PIXEL)) - HEIGHT / 2.0) / HEIGHT; + Vec3 rayDir(u, v, 1.0); + rayDir = rayDir.normalize(); + + for (int i = 0; i < numSpheres; ++i) { + double t; + if (spheres[i].intersect(rayOrigin, rayDir, t)) { + Vec3 hitPoint = rayOrigin + rayDir * t; + Vec3 normal = (hitPoint - spheres[i].center).normalize(); + Vec3 lightDir = (lightPos - hitPoint).normalize(); + Vec3 viewDir = -rayDir; + + colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); + } + } + } + + // Average color across all samples + Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); + + framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); + framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); + framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); +} + +void saveImage(const char* filename, unsigned char* framebuffer) { + std::ofstream imageFile(filename, std::ios::out | std::ios::binary); + imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n"; + for (int i = 0; i < WIDTH * HEIGHT * 3; i++) { + imageFile << framebuffer[i]; + } + imageFile.close(); +} + +int main() { + Sphere spheres[] = { + { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere + { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere + { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere + }; + int numSpheres = sizeof(spheres) / sizeof(Sphere); + Vec3 lightPos(5, 5, 0); + + unsigned char* d_framebuffer; + unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; + Sphere* d_spheres; + cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); + cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); + cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); + + dim3 threadsPerBlock(16, 16); + dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, + (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); + renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); + cudaDeviceSynchronize(); + + cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); + saveImage("output.ppm", h_framebuffer); + + cudaFree(d_framebuffer); + cudaFree(d_spheres); + delete[] h_framebuffer; + + std::cout << "High-resolution image saved as output.ppm" << std::endl; + return 0; +}