commit 34971311cb6acd8fb1b13128c4676735b16bd698
Author: Martin Opat <martinopat.opat@gmail.com>
Date:   Thu Nov 14 12:05:54 2024 +0100

    Initiall commit

diff --git a/a.out b/a.out
new file mode 100755
index 0000000..3cf0e96
Binary files /dev/null and b/a.out differ
diff --git a/cuda_install_guide.md b/cuda_install_guide.md
new file mode 100644
index 0000000..9faa0ca
--- /dev/null
+++ b/cuda_install_guide.md
@@ -0,0 +1,6 @@
+# Assuminng NVIDIA drivers present
+
+wget https://developer.download.nvidia.com/compute/cuda/12.5.1/local_installers/cuda_12.5.1_555.42.06_linux.run
+chmod +x cuda_12.5.1_555.42.06_linux.run
+sudo ./cuda_12.5.1_555.42.06_linux.run
+
diff --git a/hello_world b/hello_world
new file mode 100755
index 0000000..bb85e75
Binary files /dev/null and b/hello_world differ
diff --git a/hello_world.cu b/hello_world.cu
new file mode 100644
index 0000000..d5941ee
--- /dev/null
+++ b/hello_world.cu
@@ -0,0 +1,27 @@
+#include <iostream>
+#include <cuda_runtime.h>
+
+#define cudaCheckError() {                                      \
+    cudaError_t e = cudaGetLastError();                         \
+    if (e != cudaSuccess) {                                     \
+        printf("CUDA error %s:%d: %s\n", __FILE__, __LINE__,    \
+               cudaGetErrorString(e));                          \
+        exit(EXIT_FAILURE);                                     \
+    }                                                           \
+}
+
+__global__ void hello_from_gpu() {
+    printf("Hello from GPU!\n");
+}
+
+int main() {
+    hello_from_gpu<<<1, 1>>>();
+    cudaCheckError();
+
+    cudaDeviceSynchronize();
+    cudaCheckError();
+
+    // Reset device
+    cudaDeviceReset();
+    return 0;
+}
diff --git a/output.ppm b/output.ppm
new file mode 100644
index 0000000..1d7b780
Binary files /dev/null and b/output.ppm differ
diff --git a/test.cu b/test.cu
new file mode 100644
index 0000000..b661dad
--- /dev/null
+++ b/test.cu
@@ -0,0 +1,128 @@
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <iostream>
+#include <fstream>
+
+#define WIDTH 800
+#define HEIGHT 600
+
+struct Vec3 {
+    double x, y, z;
+
+    __host__ __device__ Vec3() : x(0), y(0), z(0) {}
+    __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
+
+    __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); }
+    __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); }
+    __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); }
+    __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); }
+    __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
+    __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); }
+};
+
+// Simple Phong lighting components
+struct Sphere {
+    Vec3 center;
+    double radius;
+    Vec3 color;
+
+    __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const {
+        Vec3 oc = rayOrigin - center;
+        double b = oc.dot(rayDir);
+        double c = oc.dot(oc) - radius * radius;
+        double h = b * b - c;
+        if (h < 0.0) return false;
+        h = sqrt(h);
+        t = -b - h;
+        return true;
+    }
+};
+
+__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) {
+    double ambientStrength = 0.1;
+    double diffuseStrength = 0.8;
+    double specularStrength = 0.5;
+    int shininess = 32;
+
+    // Ambient
+    Vec3 ambient = color * ambientStrength;
+
+    // Diffuse
+    double diff = max(normal.dot(lightDir), 0.0);
+    Vec3 diffuse = color * (diffuseStrength * diff);
+
+    // Specular
+    Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
+    double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess);
+    Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
+
+    return ambient + diffuse + specular;
+}
+
+__global__ void renderKernel(unsigned char* framebuffer, Sphere sphere, Vec3 lightPos) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= WIDTH || y >= HEIGHT) return;
+
+    int pixelIndex = (y * WIDTH + x) * 3;
+
+    Vec3 rayOrigin(0, 0, 0);
+    Vec3 rayDir((x - WIDTH / 2.0) / WIDTH, (y - HEIGHT / 2.0) / HEIGHT, 1.0);
+    rayDir = rayDir.normalize();
+
+    double t;
+    if (sphere.intersect(rayOrigin, rayDir, t)) {
+        Vec3 hitPoint = rayOrigin + rayDir * t;
+        Vec3 normal = (hitPoint - sphere.center).normalize();
+        Vec3 lightDir = (lightPos - hitPoint).normalize();
+        Vec3 viewDir = -rayDir;
+
+        Vec3 color = phongShading(hitPoint, normal, lightDir, viewDir, sphere.color);
+
+        framebuffer[pixelIndex] = static_cast<unsigned char>(fmin(color.x, 1.0) * 255);
+        framebuffer[pixelIndex + 1] = static_cast<unsigned char>(fmin(color.y, 1.0) * 255);
+        framebuffer[pixelIndex + 2] = static_cast<unsigned char>(fmin(color.z, 1.0) * 255);
+    } else {
+        framebuffer[pixelIndex] = 0;
+        framebuffer[pixelIndex + 1] = 0;
+        framebuffer[pixelIndex + 2] = 0;
+    }
+}
+
+void saveImage(const char* filename, unsigned char* framebuffer) {
+    std::ofstream imageFile(filename, std::ios::out | std::ios::binary);
+    imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n";
+    for (int i = 0; i < WIDTH * HEIGHT * 3; i++) {
+        imageFile << framebuffer[i];
+    }
+    imageFile.close();
+}
+
+int main() {
+    // Initialize sphere and light source
+    Sphere sphere = { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) };  // Red sphere
+    Vec3 lightPos(5, 5, 0);
+
+    // Allocate framebuffer on device and host
+    unsigned char* d_framebuffer;
+    unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3];
+    cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3);
+
+    // Launch
+    dim3 threadsPerBlock(16, 16);
+    dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, 
+                   (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y);
+    renderKernel<<<numBlocks, threadsPerBlock>>>(d_framebuffer, sphere, lightPos);
+    cudaDeviceSynchronize();
+
+    // Copy result back to host and save
+    cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost);
+    saveImage("output.ppm", h_framebuffer);
+
+    // Clean up
+    cudaFree(d_framebuffer);
+    delete[] h_framebuffer;
+
+    std::cout << "Image saved as output.ppm" << std::endl;
+    return 0;
+}
diff --git a/test_heavy.cu b/test_heavy.cu
new file mode 100644
index 0000000..8e9cd2c
--- /dev/null
+++ b/test_heavy.cu
@@ -0,0 +1,135 @@
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include <iostream>
+#include <fstream>
+#include <cmath>
+
+#define WIDTH 3840
+#define HEIGHT 2160
+#define SAMPLES_PER_PIXEL 8
+
+struct Vec3 {
+    double x, y, z;
+
+    __host__ __device__ Vec3() : x(0), y(0), z(0) {}
+    __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
+
+    __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); }
+    __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); }
+    __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); }
+    __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); }
+    __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
+    __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); }
+};
+
+struct Sphere {
+    Vec3 center;
+    double radius;
+    Vec3 color;
+
+    __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const {
+        Vec3 oc = rayOrigin - center;
+        double b = oc.dot(rayDir);
+        double c = oc.dot(oc) - radius * radius;
+        double h = b * b - c;
+        if (h < 0.0) return false;
+        h = sqrt(h);
+        t = -b - h;
+        return true;
+    }
+};
+
+__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) {
+    double ambientStrength = 0.1;
+    double diffuseStrength = 0.8;
+    double specularStrength = 0.5;
+    int shininess = 64;
+
+    Vec3 ambient = color * ambientStrength;
+    double diff = max(normal.dot(lightDir), 0.0);
+    Vec3 diffuse = color * (diffuseStrength * diff);
+
+    Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
+    double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess);
+    Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
+
+    return ambient + diffuse + specular;
+}
+
+__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= WIDTH || y >= HEIGHT) return;
+
+    int pixelIndex = (y * WIDTH + x) * 3;
+    Vec3 rayOrigin(0, 0, 0);
+    Vec3 colCum(0, 0, 0);
+
+    for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) {
+        double u = (x + (sample / static_cast<double>(SAMPLES_PER_PIXEL)) - WIDTH / 2.0) / WIDTH;
+        double v = (y + (sample / static_cast<double>(SAMPLES_PER_PIXEL)) - HEIGHT / 2.0) / HEIGHT;
+        Vec3 rayDir(u, v, 1.0);
+        rayDir = rayDir.normalize();
+
+        for (int i = 0; i < numSpheres; ++i) {
+            double t;
+            if (spheres[i].intersect(rayOrigin, rayDir, t)) {
+                Vec3 hitPoint = rayOrigin + rayDir * t;
+                Vec3 normal = (hitPoint - spheres[i].center).normalize();
+                Vec3 lightDir = (lightPos - hitPoint).normalize();
+                Vec3 viewDir = -rayDir;
+
+                colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color);
+            }
+        }
+    }
+
+    // Average color across all samples
+    Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL);
+
+    framebuffer[pixelIndex] = static_cast<unsigned char>(fmin(color.x, 1.0) * 255);
+    framebuffer[pixelIndex + 1] = static_cast<unsigned char>(fmin(color.y, 1.0) * 255);
+    framebuffer[pixelIndex + 2] = static_cast<unsigned char>(fmin(color.z, 1.0) * 255);
+}
+
+void saveImage(const char* filename, unsigned char* framebuffer) {
+    std::ofstream imageFile(filename, std::ios::out | std::ios::binary);
+    imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n";
+    for (int i = 0; i < WIDTH * HEIGHT * 3; i++) {
+        imageFile << framebuffer[i];
+    }
+    imageFile.close();
+}
+
+int main() {
+    Sphere spheres[] = {
+        { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) },  // Red sphere
+        { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere
+        { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) }  // Blue sphere
+    };
+    int numSpheres = sizeof(spheres) / sizeof(Sphere);
+    Vec3 lightPos(5, 5, 0);
+
+    unsigned char* d_framebuffer;
+    unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3];
+    Sphere* d_spheres;
+    cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3);
+    cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere));
+    cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice);
+
+    dim3 threadsPerBlock(16, 16);
+    dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, 
+                   (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y);
+    renderKernel<<<numBlocks, threadsPerBlock>>>(d_framebuffer, d_spheres, numSpheres, lightPos);
+    cudaDeviceSynchronize();
+
+    cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost);
+    saveImage("output.ppm", h_framebuffer);
+
+    cudaFree(d_framebuffer);
+    cudaFree(d_spheres);
+    delete[] h_framebuffer;
+
+    std::cout << "High-resolution image saved as output.ppm" << std::endl;
+    return 0;
+}