diff --git a/.gitignore b/.gitignore index 44ce3ac..1c3a1df 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,11 @@ .vscode # Build -build build/ -build/* +*.o +# Data +data/ + +# Resulting images +*.ppm diff --git a/README.md b/README.md index 15c50f1..45b5a9e 100644 --- a/README.md +++ b/README.md @@ -1 +1,22 @@ -# cuda-based-raytrace \ No newline at end of file +# cuda-based-raytrace + +## How to run +If necessary clean previous build: +```bash +make clean +``` + +Then build (currently always debug build): +```bash +make all +``` + +Run: +```bash +./build/main +``` + +To open the resulting image you can use (Ubuntu): +```bash +xdg-open output.ppm +``` \ No newline at end of file diff --git a/load-modules.sh b/load-modules.sh new file mode 100755 index 0000000..c6fcab4 --- /dev/null +++ b/load-modules.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Load the needed modules in Habrok + +ml CUDA +ml netCDF-C++4 diff --git a/makefile b/makefile index 18993ff..90d9c3c 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,7 @@ # Compiler and flags NVCC = nvcc -CXXFLAGS = -I./src -I./linalg -I./img -I./objs -std=c++17 +CXXFLAGS = -I./src -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs) -g -G +COMPILE_OBJ_FLAGS = --device-c # Directories SRC_DIR = src @@ -8,7 +9,7 @@ BUILD_DIR = build # Files TARGET = $(BUILD_DIR)/main -SRC_FILES = $(wildcard $(SRC_DIR)/*.cu) +SRC_FILES := $(shell find $(SRC_DIR) -type f \( -name '*.cu' -o -name '*.cpp' \)) OBJ_FILES = $(patsubst $(SRC_DIR)/%.cu,$(BUILD_DIR)/%.o,$(SRC_FILES)) # Default target @@ -16,11 +17,12 @@ all: $(TARGET) # Build the main target $(TARGET): $(OBJ_FILES) | $(BUILD_DIR) - $(NVCC) $(CXXFLAGS) -o $@ $^ + $(NVCC) $(CXXFLAGS) $^ -o $@ # Compile object files $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu - $(NVCC) $(CXXFLAGS) -c $< -o $@ + @mkdir -p $(dir $@) + $(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@ # Debug build debug: CXXFLAGS += -g @@ -34,4 +36,4 @@ clean: $(BUILD_DIR): mkdir -p $(BUILD_DIR) -.PHONY: all clean debug +.PHONY: all clean debug \ No newline at end of file diff --git a/output.ppm b/output.ppm deleted file mode 100644 index 1d7b780..0000000 Binary files a/output.ppm and /dev/null differ diff --git a/src/consts.cu b/src/consts.cu new file mode 100644 index 0000000..18cf40a --- /dev/null +++ b/src/consts.cu @@ -0,0 +1,18 @@ +#include "consts.h" + +__device__ Point3 d_cameraPos; +__device__ Vec3 d_cameraDir; +__device__ Vec3 d_cameraUp; +__device__ Point3 d_lightPos; + +Point3 h_cameraPos = Point3::init(-0.7, -1.0, -2.0); +Vec3 h_cameraDir = Vec3::init(0.4, 0.6, 1.0).normalize(); +Vec3 h_cameraUp = Vec3::init(0.0, 1.0, 0.0).normalize(); +Point3 h_lightPos = Point3::init(1.5, 2.0, -1.0); + +void copyConstantsToDevice() { + cudaMemcpyToSymbol(d_cameraPos, &h_cameraPos, sizeof(Point3)); + cudaMemcpyToSymbol(d_cameraDir, &h_cameraDir, sizeof(Vec3)); + cudaMemcpyToSymbol(d_cameraUp, &h_cameraUp, sizeof(Vec3)); + cudaMemcpyToSymbol(d_lightPos, &h_lightPos, sizeof(Point3)); +} \ No newline at end of file diff --git a/src/consts.h b/src/consts.h new file mode 100644 index 0000000..774c297 --- /dev/null +++ b/src/consts.h @@ -0,0 +1,44 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "linalg/vec.h" +#include + +// --------------------------- Basic Constants --------------------------- +const int VOLUME_WIDTH = 49; +const int VOLUME_HEIGHT = 51; +const int VOLUME_DEPTH = 42; + +const int IMAGE_WIDTH = 2560; +const int IMAGE_HEIGHT = 1440; + +const double epsilon = 1e-10f; +const double infty = 1e15f; // This vlalue is used to represent missing values in data + + +// --------------------------- Raycasting Constants --------------------------- +const int SAMPLES_PER_PIXEL = 8; // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map) + +const float alphaAcumLimit = 0.65f; // TODO: Idk what a good accumulation value is +const float minAllowedDensity = 0.001f; + +const float stepSize = 0.002f; + + +// --------------------------- Illumination Constants --------------------------- +const double ambientStrength = 0.3; +const double diffuseStrength = 0.8; +const double specularStrength = 0.5; +const int shininess = 32; +const float fov = 60.0f * (M_PI / 180.0f); + +// Camera and Light +extern __device__ Point3 d_cameraPos; +extern __device__ Vec3 d_cameraDir; +extern __device__ Vec3 d_cameraUp; +extern __device__ Point3 d_lightPos; + +// --------------------------- Functions for handling external constants --------------------------- +void copyConstantsToDevice(); + +#endif // CONSTS_H \ No newline at end of file diff --git a/src/hurricanedata/datareader.cu b/src/hurricanedata/datareader.cu new file mode 100644 index 0000000..f62f316 --- /dev/null +++ b/src/hurricanedata/datareader.cu @@ -0,0 +1,77 @@ +#include "datareader.h" + +#include +#include + +using namespace std; +using namespace netCDF; + +DataReader::DataReader(const std::string &path, std::string variableName): +filePathManager(path), variableName(variableName) { } + +size_t DataReader::fileLength(size_t fileIndex) { + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(variableName)->second; + + size_t length = 1; + for (NcDim dim: var.getDims()) { + length *= dim.getSize(); + } + + return length; +} + +size_t DataReader::axisLength(size_t fileIndex, const std::string& axisName) { + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(axisName)->second; + + assert(var.getDimCount() == 1); + + netCDF::NcDim dim = var.getDim(0); + return dim.getSize(); +} + +template +void DataReader::loadFile(T* dataOut, size_t fileIndex) { + loadFile(dataOut, fileIndex, variableName); +} + +template +void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) { + // TODO: We could make the index wrap around so that we dont lose data + // using fileIndex % NUMBEROFFILES here. + + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(varName)->second; + + var.getVar(dataOut); +} + +template void DataReader::loadFile(float* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(int* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(double* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(double* dataOut, size_t fileIndex); +template void DataReader::loadFile(float* dataOut, size_t fileIndex); +template void DataReader::loadFile(int* dataOut, size_t fileIndex); + +DataReader::~DataReader() { + +} + +// template +// void DataReader::readAndAllocateAxis(T** axis_ptr, size_t *size, const string &varName) { +// assert(var.getDimCount() == 1); +// netCDF::NcDim dim = var.getDim(0); +// *size = dim.getSize(); +// cudaError_t status = cudaMallocManaged(axis_ptr, *size*sizeof(T)); +// var.getVar(*axis_ptr); +// } \ No newline at end of file diff --git a/src/hurricanedata/datareader.h b/src/hurricanedata/datareader.h new file mode 100644 index 0000000..9c82aea --- /dev/null +++ b/src/hurricanedata/datareader.h @@ -0,0 +1,56 @@ +#ifndef DATAREADER_H +#define DATAREADER_H + +#include +#include +#include + +#include "filepathmanager.h" + + +/** + * @brief Simple wrapper around all the NetCDF functionality. + */ +class DataReader { +public: + /** + * @brief Constructor. + * @param path Path to the directory containing all the .nc4 files. + * @param variableName The variable we are interested in. + */ + DataReader(const std::string &path, std::string variableName); + + /** + * @brief The length of the flat data in variableName. + * Used for allocating the right amount of memory. + */ + size_t fileLength(size_t fileIndex); + + /** + * @brief Write all the data in file fileIndex of variable we re interested in into the dataOut. + */ + template + void loadFile(T* dataOut, size_t fileIndex); + + /** + * @brief Write all the data in file fileIndex of variable variableName into the dataOut. + * @param dataOut pointer to memory that should be written to. + * @param fileIndex the index of the file we want to load into memory. + * @param variableName the name of the variable + */ + template + void loadFile(T* dataOut, size_t fileIndex, const std::string& variableName); + + /** + * @brief Get size of a variable. + */ + size_t axisLength(size_t fileIndex, const std::string& axisName); + + ~DataReader(); +private: + FilePathManager filePathManager; + std::string variableName; +}; + + +#endif //DATAREADER_H diff --git a/src/hurricanedata/fielddata.cu b/src/hurricanedata/fielddata.cu new file mode 100644 index 0000000..38b0d25 --- /dev/null +++ b/src/hurricanedata/fielddata.cu @@ -0,0 +1,21 @@ +#include "fielddata.h" + +__device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +) { + size_t valArrInd = (d.fieldInd + timeInd) / md.numberOfTimeStepsPerFile; + size_t trueTimeInd = (d.fieldInd + timeInd) % md.numberOfTimeStepsPerFile; + size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize; + size_t size2DMapData = md.widthSize*md.heightSize; + return d.valArrays[valArrInd][ + trueTimeInd*sizeSpatialData + + levInd*size2DMapData + + latInd*md.widthSize + + lonInd + ]; +} \ No newline at end of file diff --git a/src/hurricanedata/fielddata.h b/src/hurricanedata/fielddata.h new file mode 100644 index 0000000..4a3257c --- /dev/null +++ b/src/hurricanedata/fielddata.h @@ -0,0 +1,58 @@ +#ifndef FIELDDATA_H +#define FIELDDATA_H + +#include + +struct FieldMetadata { + size_t widthSize; // Number of different longitudes + size_t heightSize; // Number of different latitudes + size_t depthSize; // Number of different levels + size_t timeSize; // Number of different times + + // lons is a managed Unified Memory array of size widthCount that indicates + // that getVal(t, i, j, k) is a value with longitude of lons[i]. + // The other such arrays are similarly defined. + double *lons; + double *lats; + double *levs; + int *times; + + size_t numberOfTimeStepsPerFile; +}; + +using FieldMetadata = FieldMetadata; + +/** + * @brief Allows for accessing data of a time-slice of a scalar field + * that may start in the middle of a file or go range over multiple files + * by holding references to multiple files at a time. + * + * @note Use the getVal method to index into it and get values. + */ +struct FieldData { + static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct. + static constexpr size_t numberOfTimeStepsPerField = 2; + + size_t fieldInd; // Indicates + + // A uniform array of length FILESNUM storing pointers to 4D arrays stored in device memory. + float **valArrays; + +}; + +/** + * @brief Get the scalar field value at a particular index (timeInd, levInd, latInd, lonInd). + * Note that the timeInd may be counter-intuitive. See explanation in gpubufferhandler.h. + * + * @return scalar field float value. + */ +extern __device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +); + +#endif //FIELDDATA_H diff --git a/src/hurricanedata/filepathmanager.cpp b/src/hurricanedata/filepathmanager.cpp new file mode 100644 index 0000000..03c7f39 --- /dev/null +++ b/src/hurricanedata/filepathmanager.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#include "filepathmanager.h" + +namespace fs = std::filesystem; + +FilePathManager::FilePathManager(const std::string& path) { + // Check if the directory exists + if (!fs::exists(path) || !fs::is_directory(path)) { + std::cerr << "Error: Directory does not exist or is not valid." << std::endl; + exit(1); + } + + // Iterate over directory entries + for (const auto& entry : fs::directory_iterator(path)) { + if (entry.is_regular_file() && entry.path().extension() == ".nc4") { + fileNames.push_back(entry.path().string()); + } + } + + // Sort file names lexicographically + std::sort(fileNames.begin(), fileNames.end()); + + // Print sorted file names + std::cout << "Files in directory '" << path << "' sorted lexicographically:\n"; + for (const auto& fileName : fileNames) { + std::cout << fileName << std::endl; + } +} + +const char* FilePathManager::getPath(size_t index) const { + return fileNames.at(index).c_str(); +} + + +FilePathManager::~FilePathManager() { } \ No newline at end of file diff --git a/src/hurricanedata/filepathmanager.h b/src/hurricanedata/filepathmanager.h new file mode 100644 index 0000000..96f9ec5 --- /dev/null +++ b/src/hurricanedata/filepathmanager.h @@ -0,0 +1,28 @@ +#ifndef FILEPATHMANAGER_H +#define FILEPATHMANAGER_H + +#include +#include + +/** + * @brief Simple class that is responsible for mapping a file index to a file path. + * So for example: + * index = 0 -> MERRA2_400.inst6_3d_ana_Np.20120101.nc4 + * index = 1 -> MERRA2_400.inst6_3d_ana_Np.20120102.nc4 + * index = 2 -> MERRA2_400.inst6_3d_ana_Np.20120103.nc4 + * index = 3 -> MERRA2_400.inst6_3d_ana_Np.20120104.nc4 + * etc... + */ +class FilePathManager { +public: + FilePathManager(const std::string& path); + size_t getNumberOfFiles(); + const char* getPath(size_t index) const; + + ~FilePathManager(); + +private: + std::vector fileNames; +}; + +#endif //FILEPATHMANAGER_H \ No newline at end of file diff --git a/src/hurricanedata/gpubuffer.cu b/src/hurricanedata/gpubuffer.cu new file mode 100644 index 0000000..7edac38 --- /dev/null +++ b/src/hurricanedata/gpubuffer.cu @@ -0,0 +1,277 @@ +#include "gpubuffer.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct File { + condition_variable cv; + mutex m; + bool valid; + size_t size; + float *h_data; // host data + float *d_data; // device data +}; + +struct LoadFileJob { + size_t fileIndex; + size_t bufferIndex; +}; + +struct GetSizeJob { + size_t fileIndex; + promise size; +}; + +struct GetAxisDoubleJob { + size_t fileIndex; + string axisName; + promise> axis; +}; + +struct GetAxisIntJob { + size_t fileIndex; + string axisName; + promise> axis; +}; + +using Job = variant; + +class GPUBuffer::impl { +public: + impl(DataReader& dataReader); + void loadFile(LoadFileJob job); + void loadFile(size_t fileIndex, size_t bufferIndex); + void getSize(GetSizeJob job); + size_t getSize(size_t fileIndex); // Most probably blocking + void getAxis(GetAxisDoubleJob job); + void getAxis(GetAxisIntJob job); + + template + std::pair getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking + ~impl(); + + File buffer[numBufferedFiles]; + + // Thread worker things + void worker(); + queue jobs; + unique_ptr ioworker; + cudaStream_t iostream; + DataReader& dataReader; + condition_variable queuecv; + mutex queuecv_m; + bool workerRunning = true; +}; + +GPUBuffer::GPUBuffer(DataReader& dataReader): pImpl(make_unique(dataReader)) { } + +size_t GPUBuffer::impl::getSize(size_t fileIndex) { + promise promise; + future future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetSizeJob{fileIndex, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} + +template +pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName) { + return pImpl->getAxis(fileIndex, axisName); +} +template pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName); +template pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName); + +template <> +pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) { + promise> promise; + future> future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetAxisDoubleJob{fileIndex, axisName, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} +template pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName); + +template <> +pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) { + promise> promise; + future> future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetAxisIntJob{fileIndex, axisName, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} +template pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName); + +GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) { + cudaStreamCreate(&iostream); + + ioworker = make_unique([this]() { worker(); }); + + size_t size = getSize(0); + auto x = getAxis(0, "time"); + size_t sizeTime = x.first; + cudaFree(x.second); + for (size_t i = 0; i < numBufferedFiles; i++) { + { + File &file = buffer[i]; + lock_guard lk(file.m); + cudaMallocHost(&file.h_data, sizeof(float)*size); + cudaMalloc(&file.d_data, sizeof(float)*size); + file.size = size; + file.valid = false; + } + // loadFile(i, i); + } +} + +GPUBuffer::~GPUBuffer() { +} + +GPUBuffer::impl::~impl() { + { + lock_guard lk(queuecv_m); + workerRunning = false; + } + queuecv.notify_all(); + ioworker->join(); + for (size_t i = 0; i < numBufferedFiles; i++) { + File &file = buffer[i]; + cudaFree(file.d_data); + cudaFreeHost(file.h_data); + } + cudaStreamDestroy(iostream); +} + +void GPUBuffer::impl::loadFile(LoadFileJob job) { + File &file = buffer[job.bufferIndex]; + { + lock_guard lk(file.m); + assert(!file.valid); + cout << "loading file with index " << job.fileIndex << "\n"; + dataReader.loadFile(file.h_data, job.fileIndex); + cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream); + cudaStreamSynchronize(iostream); + buffer[job.bufferIndex].valid = true; + } + file.cv.notify_all(); +} + +void GPUBuffer::impl::getSize(GetSizeJob job) { + size_t size = dataReader.fileLength(job.fileIndex); + job.size.set_value(size); +} + +void GPUBuffer::impl::getAxis(GetAxisDoubleJob job) { + pair array; + array.first = dataReader.axisLength(job.fileIndex, job.axisName); + cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(double)); + dataReader.loadFile(array.second, job.fileIndex, job.axisName); + job.axis.set_value(array); +} + +void GPUBuffer::impl::getAxis(GetAxisIntJob job) { + pair array; + array.first = dataReader.axisLength(job.fileIndex, job.axisName); + cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(int)); + dataReader.loadFile(array.second, job.fileIndex, job.axisName); + job.axis.set_value(array); +} + +void GPUBuffer::loadFile(size_t fileIndex, size_t bufferIndex) { + pImpl->loadFile(fileIndex, bufferIndex); +} + +void GPUBuffer::impl::loadFile(size_t fileIndex, size_t bufferIndex) { + LoadFileJob job = { + .fileIndex = fileIndex, + .bufferIndex = bufferIndex + }; + + // Main thread theoretically blocks on 2 mutexes here + // but it _should_ never have to wait for them. + { + File &file = buffer[bufferIndex]; + + std::unique_lock lk(file.m, std::defer_lock); + bool lockval = lk.try_lock(); + if (!lockval) { + cout << "waited on GPUBuffer during loadFile orchestration :(\n"; + lk.lock(); + } + file.valid = false; + } + { + std::unique_lock lk(queuecv_m, std::defer_lock); + bool lockval = lk.try_lock(); + if (!lockval) { + cout << "waited on IOworker queue during loadFile orchestration :(\n"; + lk.lock(); + } + jobs.push(job); + } + queuecv.notify_all(); +} + +void GPUBuffer::impl::worker() { + while(workerRunning) { + Job job; + { + unique_lock lk(queuecv_m); + queuecv.wait(lk, [this]{ return !workerRunning || !jobs.empty(); }); + if (!workerRunning) { + return; + } + + job = move(jobs.front()); + jobs.pop(); + } + if(holds_alternative(job)) { + loadFile(get(job)); + } else if(holds_alternative(job)) { + getSize(move(get(job))); + } else if(holds_alternative(job)) { + getAxis(move(get(job))); + } else if(holds_alternative(job)) { + getAxis(move(get(job))); + } + + } +} + +DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) { + File &file = pImpl->buffer[bufferIndex]; + + // TODO: Might be nice to measure the blocking time here. + unique_lock lk(file.m); + file.cv.wait(lk, [this, bufferIndex]{ return pImpl->buffer[bufferIndex].valid; }); + + DataHandle dh = { + .d_data = file.d_data, + .size = file.size + }; + return dh; +} \ No newline at end of file diff --git a/src/hurricanedata/gpubuffer.h b/src/hurricanedata/gpubuffer.h new file mode 100644 index 0000000..b953abe --- /dev/null +++ b/src/hurricanedata/gpubuffer.h @@ -0,0 +1,60 @@ +#ifndef GPUBUFFER_H +#define GPUBUFFER_H + +#include +#include +#include + +#include "datareader.h" + +struct DataHandle { + float *d_data; // Device memory + size_t size; + // Could include the data stored in host memory h_data in this handle if it were needed. +}; + +/** + * @brief Handles the asynchronous (un)loading data to the GPU. The rest of the + * application should not have to interface directly with this class. Getting data + * should go over the GPUBufferHandler class. + * + * @note This class uses a queue to give loading jobs to the worker thread. + * NetCDF-C is not thread safe so you may never read data using netCDF directly + * on any other thread than this worker thread. + * + * Assumes all the data in the nc4 files is the same size. + * + */ +class GPUBuffer { +public: + static constexpr size_t numBufferedFiles = 3; // Number of files stored in memory at one time. + + GPUBuffer(DataReader& dataReader); + + /** + * @brief Asynchronously tells the GPUBuffer to eventually load a particular file index + * into a buffer index (in which part of the buffer the file should be stored). + */ + void loadFile(size_t fileIndex, size_t bufferIndex); // No blocking + + /** + * @brief Get the values stored in a particular buffer index + * @return A struct DataHandle that points to the memory and gives its size. + */ + DataHandle getDataHandle(size_t bufferIndex); // Potentially blocking + + /** + * @brief Get the data from an axis (e.g. longitude) and its size. + * @note This is a blocking operation, so it makes a job for the worker + * to read the data and then waits untill the job is completed. + */ + template + std::pair getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking + + ~GPUBuffer(); +private: + class impl; + std::experimental::propagate_const> pImpl; +}; + +#endif //GPUBUFFER_H diff --git a/src/hurricanedata/gpubufferhandler.cu b/src/hurricanedata/gpubufferhandler.cu new file mode 100644 index 0000000..606a47c --- /dev/null +++ b/src/hurricanedata/gpubufferhandler.cu @@ -0,0 +1,93 @@ +#include "gpubufferhandler.h" +#include "fielddata.h" + +#include + +using namespace std; + +GPUBufferHandler::GPUBufferHandler(GPUBuffer& gpuBuffer): +gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) { + cudaMallocManaged(&fmd, sizeof(FieldMetadata)); + + auto [widthSize, lons] = gpuBuffer.getAxis(0, "lon"); + fmd->widthSize = widthSize; + fmd->lons = lons; + + auto [heightSize, lats] = gpuBuffer.getAxis(0, "lat"); + fmd->heightSize = heightSize; + fmd->lats = lats; + + auto [depthSize, levs] = gpuBuffer.getAxis(0, "lev"); + fmd->depthSize = depthSize; + fmd->levs = levs; + + for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) { + gpuBuffer.loadFile(fileInd,fileInd); + fileInd++; + } + + fmd->timeSize = FieldData::numberOfTimeStepsPerField; + + cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int))); + + auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis(0, "time"); + fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile; + fmd->times = times; + + cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM); +} + +FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) { + + FieldData fd; + size_t fieldDataInd = 0; + fd.valArrays = valArrays; + cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n"; + for (int i = bufferInd; i <= newEndBufferInd; i++) { + DataHandle x = gpuBuffer.getDataHandle(i); + fd.valArrays[fieldDataInd] = x.d_data; + fieldDataInd++; + } + fd.fieldInd = fieldInd; + + return fd; +} + +FieldData GPUBufferHandler::nextFieldData() { + DataHandle x = gpuBuffer.getDataHandle(bufferInd); + size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile; + size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; + + size_t endFieldInd = (fieldInd + FieldData::numberOfTimeStepsPerField - 1) % fmd->numberOfTimeStepsPerFile; + size_t endBufferInd = (bufferInd + (fieldInd + FieldData::numberOfTimeStepsPerField - 1)/fmd->numberOfTimeStepsPerFile) % GPUBuffer::numBufferedFiles; + + size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile; + size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; + + if(firstTimeStep) { + firstTimeStep = false; + return setupField(endBufferInd); + } + + fieldInd = newFieldInd; + + if (newBufferInd != bufferInd) { + gpuBuffer.loadFile(fileInd, bufferInd); + fileInd++; + bufferInd = newBufferInd; + } + + if (newEndBufferInd != endBufferInd) { + // maybe dont do things? + } + + return setupField(newEndBufferInd); +} + +GPUBufferHandler::~GPUBufferHandler() { + cudaFree(fmd->lons); + cudaFree(fmd->lats); + cudaFree(fmd->levs); + cudaFree(valArrays); + cudaFree(fmd); +} \ No newline at end of file diff --git a/src/hurricanedata/gpubufferhandler.h b/src/hurricanedata/gpubufferhandler.h new file mode 100644 index 0000000..2b4338b --- /dev/null +++ b/src/hurricanedata/gpubufferhandler.h @@ -0,0 +1,64 @@ +#ifndef GPUBUFFERHANDLER_H +#define GPUBUFFERHANDLER_H + +#include "fielddata.h" +#include "gpubuffer.h" + +#include + +/** + * @brief Responsible for deciding when the GPUBuffer should load and unload files. + * Also assembles and gives access to FieldData. + * + * You will need to interface with this class. + */ +class GPUBufferHandler { +public: + GPUBufferHandler(GPUBuffer& gpuBuffer); + + /** + * @brief Produces a FieldData which can be used to retrieve values for a time-slice + * into a scalar field with a width of FieldData::numberOfTimeStepsPerField. + * + * @details This method always increments the start point of the time-slice + * by 1. See below: + * + * time steps = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ... + * files (4 time steps per file) = [0 1 2 3][4 5 6 7][8 9 10 11][12 13 ... + * nextFieldData() (1st call) = [0 1] + * nextFieldData() (2nd call) = [1 2] + * nextFieldData() (3rd call) = [2 3] + * nextFieldData() (4th call) = [3 4] + * nextFieldData() (5th call) = [4 5] + * etc... + * + * When getting values from a FieldData using the getVal method, + * the time index is relative to the start of the time-slice. + * + * This means that for d = nextFieldData() (4th call), + * getVal(.., fieldData=d, ..., timeInd = 1, ...) gives a value at + * absolute time step 5 as seen above. + */ + FieldData nextFieldData(); + + ~GPUBufferHandler(); + + /** + * You can get the FieldMetaData from here. + */ + FieldMetadata *fmd; + + static void freeFieldData(); + +private: + FieldData setupField(size_t endBufferInd); + GPUBuffer& gpuBuffer; + size_t fileInd; + size_t bufferInd; + size_t fieldInd; + bool firstTimeStep = true; + + float **valArrays; +}; + +#endif //GPUBUFFERHANDLER_H diff --git a/src/illumination/illumination.h b/src/illumination/illumination.h new file mode 100644 index 0000000..6e9b6f0 --- /dev/null +++ b/src/illumination/illumination.h @@ -0,0 +1,7 @@ +#ifndef ILLUMINATION_H +#define ILLUMINATION_H + +#include "raycaster.h" +#include "shading.h" + +#endif // ILLUMINATION_H \ No newline at end of file diff --git a/src/illumination/raycaster.h b/src/illumination/raycaster.h new file mode 100644 index 0000000..a1c4292 --- /dev/null +++ b/src/illumination/raycaster.h @@ -0,0 +1,126 @@ +#ifndef RAYCASTER_H +#define RAYCASTER_H + +#include +#include "linalg/linalg.h" +#include "consts.h" +#include "shading.h" + + +// Raycast + phong, TODO: Consider wrapping in a class +__global__ void raycastKernel(float* volumeData, unsigned char* framebuffer) { + int px = blockIdx.x * blockDim.x + threadIdx.x; + int py = blockIdx.y * blockDim.y + threadIdx.y; + if (px >= IMAGE_WIDTH || py >= IMAGE_HEIGHT) return; + + float accumR = 0.0f; + float accumG = 0.0f; + float accumB = 0.0f; + + // Multiple samples per pixel + for (int s = 0; s < SAMPLES_PER_PIXEL; s++) { + // Map to [-1, 1] + float u = ((px + 0.5f) / IMAGE_WIDTH ) * 2.0f - 1.0f; + float v = ((py + 0.5f) / IMAGE_HEIGHT) * 2.0f - 1.0f; + + // TODO: Move this (and all similar transformation code) to its own separate file + float tanHalfFov = tanf(fov * 0.5f); + u *= tanHalfFov; + v *= tanHalfFov; + + // Find ray direction + Vec3 cameraRight = (d_cameraDir.cross(d_cameraUp)).normalize(); + d_cameraUp = (cameraRight.cross(d_cameraDir)).normalize(); + Vec3 rayDir = (d_cameraDir + cameraRight*u + d_cameraUp*v).normalize(); + + // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this + float tNear = 0.0f; + float tFar = 1e6f; + auto intersectAxis = [&](float start, float dirVal) { + if (fabsf(dirVal) < epsilon) { + if (start < 0.f || start > 1.f) { + tNear = 1e9f; + tFar = -1e9f; + } + } else { + float t0 = (0.0f - start) / dirVal; + float t1 = (1.0f - start) / dirVal; + if (t0>t1) { + float tmp=t0; + t0=t1; + t1=tmp; + } + if (t0>tNear) tNear = t0; + if (t1 tFar) continue; // No intersectionn + if (tNear < 0.0f) tNear = 0.0f; + + float colorR = 0.0f, colorG = 0.0f, colorB = 0.0f; + float alphaAccum = 0.0f; + + float tCurrent = tNear; + while (tCurrent < tFar && alphaAccum < alphaAcumLimit) { + Point3 pos = d_cameraPos + rayDir * tCurrent; + + // Convert to volume indices + float fx = pos.x * (VOLUME_WIDTH - 1); + float fy = pos.y * (VOLUME_HEIGHT - 1); + float fz = pos.z * (VOLUME_DEPTH - 1); + int ix = (int)roundf(fx); + int iy = (int)roundf(fy); + int iz = (int)roundf(fz); + + // Sample + float density = sampleVolumeNearest(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz); + + // Basic transfer function. TODO: Move to a separate file, and then improve + float alphaSample = density * 0.1f; + // float alphaSample = 1.0f - expf(-density * 0.1f); + Color3 baseColor = Color3::init(density, 0.1f*density, 1.f - density); // TODO: Implement a proper transfer function + + // If density ~ 0, skip shading + if (density > minAllowedDensity) { + Vec3 grad = computeGradient(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz); + Vec3 normal = -grad.normalize(); + + Vec3 lightDir = (d_lightPos - pos).normalize(); + Vec3 viewDir = -rayDir.normalize(); + + // Apply Phong + Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor); + + // Compose + colorR += (1.0f - alphaAccum) * shadedColor.x * alphaSample; + colorG += (1.0f - alphaAccum) * shadedColor.y * alphaSample; + colorB += (1.0f - alphaAccum) * shadedColor.z * alphaSample; + alphaAccum += (1.0f - alphaAccum) * alphaSample; + } + + tCurrent += stepSize; + } + + accumR += colorR; + accumG += colorG; + accumB += colorB; + } + + // Average samples + accumR /= (float)SAMPLES_PER_PIXEL; + accumG /= (float)SAMPLES_PER_PIXEL; + accumB /= (float)SAMPLES_PER_PIXEL; + + // Final colour + int fbIndex = (py * IMAGE_WIDTH + px) * 3; + framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255); + framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255); + framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255); +} + +#endif // RAYCASTER_H \ No newline at end of file diff --git a/src/illumination/shading.h b/src/illumination/shading.h new file mode 100644 index 0000000..fa3ee8e --- /dev/null +++ b/src/illumination/shading.h @@ -0,0 +1,20 @@ +#ifndef SHADING_H +#define SHADING_H + +#include "linalg/linalg.h" +#include "consts.h" + +// TODO: Consider wrapping this in a class (?) +__device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) { + Vec3 ambient = baseColor * ambientStrength; + double diff = fmax(normal.dot(lightDir), 0.0); + Vec3 diffuse = baseColor * (diffuseStrength * diff); + + Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); + double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess); + Vec3 specular = Vec3::init(1.0, 1.0, 1.0) * (specularStrength * spec); + + return ambient + diffuse + specular; +} + +#endif // SHADING_H \ No newline at end of file diff --git a/src/linalg/mat.h b/src/linalg/mat.h index 6f70f09..92f6fdc 100644 --- a/src/linalg/mat.h +++ b/src/linalg/mat.h @@ -1 +1,24 @@ #pragma once + +__device__ Vec3 computeGradient(float* volumeData, const int volW, const int volH, const int volD, int x, int y, int z) { + // Finite difference for partial derivatives. + // For boundary voxels - clamp to the boundary. + // Normal should point from higher to lower intensities + + int xm = max(x - 1, 0); + int xp = min(x + 1, volW - 1); + int ym = max(y - 1, 0); + int yp = min(y + 1, volH - 1); + int zm = max(z - 1, 0); + int zp = min(z + 1, volD - 1); + + // Note: Assuming data is linearized (idx = z*w*h + y*w + x) TODO: Unlinearize if data not linear + float gx = volumeData[z * volW * volH + y * volW + xp] + - volumeData[z * volW * volH + y * volW + xm]; + float gy = volumeData[z * volW * volH + yp * volW + x ] + - volumeData[z * volW * volH + ym * volW + x ]; + float gz = volumeData[zp * volW * volH + y * volW + x ] + - volumeData[zm * volW * volH + y * volW + x ]; + + return Vec3::init(gx, gy, gz); +} diff --git a/src/linalg/vec.h b/src/linalg/vec.h index ef505bd..9fc6745 100644 --- a/src/linalg/vec.h +++ b/src/linalg/vec.h @@ -3,16 +3,28 @@ #include #include -struct Vec3 { // TODO: Maybe make this into a class +struct Vec3 { // TODO: Maybe make this into a class ... maybe double x, y, z; - __host__ __device__ Vec3() : x(0), y(0), z(0) {} - __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {} + static __host__ __device__ Vec3 init(double x, double y, double z) {Vec3 v = {x, y, z}; return v;} + static __host__ __device__ Vec3 zero() { return Vec3::init(0, 0, 0); } + static __host__ __device__ Vec3 init() { return zero(); } + static __host__ __device__ Vec3 init(const double (&arr)[3]) { return Vec3::init(arr[0], arr[1], arr[2]); } + + __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3::init(x + b.x, y + b.y, z + b.z); } + __host__ __device__ Vec3& operator+=(const Vec3& b) { x += b.x; y += b.y; z += b.z; return *this; } + + __host__ __device__ Vec3 operator-() const { return Vec3::init(-x, -y, -z); } + __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3::init(x - b.x, y - b.y, z - b.z); } + __host__ __device__ Vec3& operator-=(const Vec3& b) { x -= b.x; y -= b.y; z -= b.z; return *this; } + + __host__ __device__ Vec3 operator*(double b) const { return Vec3::init(x * b, y * b, z * b); } + __host__ __device__ Vec3& operator*=(double b) { x *= b; y *= b; z *= b; return *this; } - __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); } - __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); } - __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); } - __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); } __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; } - __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); } -}; \ No newline at end of file + __host__ __device__ Vec3 cross(const Vec3& b) const { return Vec3::init(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); } + __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3::init(x / len, y / len, z / len); } +}; + +typedef Vec3 Point3; +typedef Vec3 Color3; \ No newline at end of file diff --git a/src/main.cu b/src/main.cu index fafaec8..3c7b38a 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,102 +1,112 @@ -#include -#include #include +#include #include +#include +#include +#include -#include "linalg/linalg.h" +#include "hurricanedata/datareader.h" +#include "linalg/linalg.h" #include "objs/sphere.h" #include "img/handler.h" - -#define WIDTH 3840 -#define HEIGHT 2160 -#define SAMPLES_PER_PIXEL 8 +#include "consts.h" +#include "illumination/illumination.h" -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 64; +static float* d_volume = nullptr; - Vec3 ambient = color * ambientStrength; - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; +void getTemperature(std::vector& temperatureData, int idx = 0) { + std::string path = "data/trimmed"; + std::string variable = "T"; + DataReader dataReader(path, variable); + size_t dataLength = dataReader.fileLength(idx); + temperatureData.resize(dataLength); + dataReader.loadFile(temperatureData.data(), idx); } -__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; +void getSpeed(std::vector& speedData, int idx = 0) { + std::string path = "data/trimmed"; + std::string varU = "U"; + std::string varV = "V"; - int pixelIndex = (y * WIDTH + x) * 3; - Vec3 rayOrigin(0, 0, 0); - Vec3 colCum(0, 0, 0); + DataReader dataReaderU(path, varU); + DataReader dataReaderV(path, varV); - double spp = static_cast(SAMPLES_PER_PIXEL); - for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { - double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH; - double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT; - Vec3 rayDir(u, v, 1.0); - rayDir = rayDir.normalize(); + size_t dataLength = dataReaderU.fileLength(idx); + speedData.resize(dataLength); + std::vector uData(dataLength); + std::vector vData(dataLength); - for (int i = 0; i < numSpheres; ++i) { - double t; - if (spheres[i].intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - spheres[i].center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; + dataReaderU.loadFile(uData.data(), idx); + dataReaderV.loadFile(vData.data(), idx); - colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); - } - } + for (int i = 0; i < dataLength; i++) { + speedData[i] = sqrt(uData[i]*uData[i] + vData[i]*vData[i]); + } +} + +int main(int argc, char** argv) { + std::vector data; + // getTemperature(data); + getSpeed(data); + + + // TODO: Eveontually remove debug below (i.e., eliminate for-loop etc.) + // Generate debug volume data + float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH]; + // generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH); + for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) { // TODO: This is technically an unnecessary artifact of the old code taking in a float* instead of a std::vector + // Discard temperatures above a small star (supposedly, missing temperature values) + hostVolume[i] = data[i]; + if (data[i] + epsilon >= infty) hostVolume[i] = 0.0f; } - // Average color across all samples - Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); + // Min-max normalization + float minVal = *std::min_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH); + float maxVal = *std::max_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH); + for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) { + hostVolume[i] = (hostVolume[i] - minVal) / (maxVal - minVal); + } - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); -} - - - -int main() { - Sphere spheres[] = { - { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere - { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere - { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere - }; - int numSpheres = sizeof(spheres) / sizeof(Sphere); - Vec3 lightPos(5, 5, 0); + // Allocate + copy data to GPU + size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; + cudaMalloc((void**)&d_volume, volumeSize); + cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice); + // Allocate framebuffer unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - Sphere* d_spheres; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); - cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); + size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char); + cudaMalloc((void**)&d_framebuffer, fbSize); + cudaMemset(d_framebuffer, 0, fbSize); - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); + // Copy external constants from consts.h to cuda + copyConstantsToDevice(); + + // Launch kernel + dim3 blockSize(16, 16); // TODO: Figure out a good size for parallelization + dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x, + (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y); + + raycastKernel<<>>( + d_volume, + d_framebuffer + ); cudaDeviceSynchronize(); - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT); + // Copy framebuffer back to CPU + unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3]; + cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost); + // Export image + saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT); + + // Cleanup + delete[] hostVolume; + delete[] hostFramebuffer; + cudaFree(d_volume); cudaFree(d_framebuffer); - cudaFree(d_spheres); - delete[] h_framebuffer; - std::cout << "High-resolution image saved as output.ppm" << std::endl; + std::cout << "Phong-DVR rendering done. Image saved to output.ppm" << std::endl; return 0; -} +} \ No newline at end of file diff --git a/src/objs/sphere.h b/src/objs/sphere.h index a9b20b4..dbac251 100644 --- a/src/objs/sphere.h +++ b/src/objs/sphere.h @@ -5,6 +5,7 @@ #include "linalg/linalg.h" +// TODO: This is technically just for debugging, but if it is to be used outside of that, it should be a made into a proper class (I mean, just look at those functions below, it screams "add class attributes") struct Sphere { Vec3 center; double radius; @@ -20,4 +21,74 @@ struct Sphere { t = -b - h; return true; } -}; \ No newline at end of file +}; + +// A function to generate two concentric spherical shells +__host__ void generateVolume(float* volumeData, int volW, int volH, int volD) { + int cx = volW / 2; + int cy = volH / 2; + int cz = volD / 2; + float maxRadius = static_cast(volW) * 0.5f; + + // Two shells + float shell1Inner = 0.2f * maxRadius; + float shell1Outer = 0.3f * maxRadius; + float shell2Inner = 0.4f * maxRadius; + float shell2Outer = 0.5f * maxRadius; + + float shell1Intensity = 0.8f; + float shell2Intensity = 0.6f; + + for (int z = 0; z < volD; ++z) { + for (int y = 0; y < volH; ++y) { + for (int x = 0; x < volW; ++x) { + float dx = (float)(x - cx); + float dy = (float)(y - cy); + float dz = (float)(z - cz); + float dist = sqrtf(dx*dx + dy*dy + dz*dz); + + float intensity = 0.0f; + + // Shell 1 + if (dist >= shell1Inner && dist <= shell1Outer) { + float mid = 0.5f * (shell1Inner + shell1Outer); + if (dist < mid) { + float inFactor = (dist - shell1Inner) / (mid - shell1Inner); + intensity += shell1Intensity * inFactor; + } else { + float outFactor = (shell1Outer - dist) / (shell1Outer - mid); + intensity += shell1Intensity * outFactor; + } + } + + // Shell 2 + if (dist >= shell2Inner && dist <= shell2Outer) { + float mid = 0.5f * (shell2Inner + shell2Outer); + if (dist < mid) { + float inFactor = (dist - shell2Inner) / (mid - shell2Inner); + intensity += shell2Intensity * inFactor; + } else { + float outFactor = (shell2Outer - dist) / (shell2Outer - mid); + intensity += shell2Intensity * outFactor; + } + } + + if (intensity > 1.0f) intensity = 1.0f; + volumeData[z * volW * volH + y * volW + x] = intensity; + } + } + } +} + +// Samples the voxel nearest to the given coordinates. TODO: Can be re-used in other places so move +__device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz) { + if (vx < 0) vx = 0; + if (vy < 0) vy = 0; + if (vz < 0) vz = 0; + if (vx >= volW) vx = volW - 1; + if (vy >= volH) vy = volH - 1; + if (vz >= volD) vz = volD - 1; + + int idx = vz * volW * volH + vy * volD + vx; + return volumeData[idx]; +} \ No newline at end of file diff --git a/src/tests/a.out b/src/tests/a.out deleted file mode 100755 index 3cf0e96..0000000 Binary files a/src/tests/a.out and /dev/null differ diff --git a/src/tests/deprecated_test.cu b/src/tests/deprecated_test.cu deleted file mode 100644 index b661dad..0000000 --- a/src/tests/deprecated_test.cu +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include -#include - -#define WIDTH 800 -#define HEIGHT 600 - -struct Vec3 { - double x, y, z; - - __host__ __device__ Vec3() : x(0), y(0), z(0) {} - __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {} - - __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); } - __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); } - __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); } - __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); } - __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; } - __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); } -}; - -// Simple Phong lighting components -struct Sphere { - Vec3 center; - double radius; - Vec3 color; - - __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const { - Vec3 oc = rayOrigin - center; - double b = oc.dot(rayDir); - double c = oc.dot(oc) - radius * radius; - double h = b * b - c; - if (h < 0.0) return false; - h = sqrt(h); - t = -b - h; - return true; - } -}; - -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 32; - - // Ambient - Vec3 ambient = color * ambientStrength; - - // Diffuse - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - - // Specular - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; -} - -__global__ void renderKernel(unsigned char* framebuffer, Sphere sphere, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; - - int pixelIndex = (y * WIDTH + x) * 3; - - Vec3 rayOrigin(0, 0, 0); - Vec3 rayDir((x - WIDTH / 2.0) / WIDTH, (y - HEIGHT / 2.0) / HEIGHT, 1.0); - rayDir = rayDir.normalize(); - - double t; - if (sphere.intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - sphere.center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; - - Vec3 color = phongShading(hitPoint, normal, lightDir, viewDir, sphere.color); - - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); - } else { - framebuffer[pixelIndex] = 0; - framebuffer[pixelIndex + 1] = 0; - framebuffer[pixelIndex + 2] = 0; - } -} - -void saveImage(const char* filename, unsigned char* framebuffer) { - std::ofstream imageFile(filename, std::ios::out | std::ios::binary); - imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n"; - for (int i = 0; i < WIDTH * HEIGHT * 3; i++) { - imageFile << framebuffer[i]; - } - imageFile.close(); -} - -int main() { - // Initialize sphere and light source - Sphere sphere = { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }; // Red sphere - Vec3 lightPos(5, 5, 0); - - // Allocate framebuffer on device and host - unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - - // Launch - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, sphere, lightPos); - cudaDeviceSynchronize(); - - // Copy result back to host and save - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer); - - // Clean up - cudaFree(d_framebuffer); - delete[] h_framebuffer; - - std::cout << "Image saved as output.ppm" << std::endl; - return 0; -} diff --git a/src/tests/hello_world b/src/tests/hello_world deleted file mode 100755 index bb85e75..0000000 Binary files a/src/tests/hello_world and /dev/null differ diff --git a/src/tests/hello_world.cu b/src/tests/hello_world.cu deleted file mode 100644 index a3b2c6a..0000000 --- a/src/tests/hello_world.cu +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include - -__global__ void hello_from_gpu() { - printf("Hello from GPU!\n"); -} - -int main() { - hello_from_gpu<<<1, 1>>>(); - - cudaDeviceSynchronize(); - - // Reset device - cudaDeviceReset(); - return 0; -} diff --git a/src/tests/output.ppm b/src/tests/output.ppm deleted file mode 100644 index 1d7b780..0000000 Binary files a/src/tests/output.ppm and /dev/null differ diff --git a/src/tests/test_heavy.cu b/src/tests/test_heavy.cu deleted file mode 100644 index 1f70108..0000000 --- a/src/tests/test_heavy.cu +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include -#include - -#include "linalg/linalg.h" -#include "sphere.h" -#include "img/handler.h" - -#define WIDTH 3840 -#define HEIGHT 2160 -#define SAMPLES_PER_PIXEL 8 - - -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 64; - - Vec3 ambient = color * ambientStrength; - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; -} - -__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; - - int pixelIndex = (y * WIDTH + x) * 3; - Vec3 rayOrigin(0, 0, 0); - Vec3 colCum(0, 0, 0); - - double spp = static_cast(SAMPLES_PER_PIXEL); - for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { - double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH; - double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT; - Vec3 rayDir(u, v, 1.0); - rayDir = rayDir.normalize(); - - for (int i = 0; i < numSpheres; ++i) { - double t; - if (spheres[i].intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - spheres[i].center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; - - colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); - } - } - } - - // Average color across all samples - Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); - - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); -} - - - -int main() { - Sphere spheres[] = { - { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere - { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere - { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere - }; - int numSpheres = sizeof(spheres) / sizeof(Sphere); - Vec3 lightPos(5, 5, 0); - - unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - Sphere* d_spheres; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); - cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); - - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); - cudaDeviceSynchronize(); - - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT); - - cudaFree(d_framebuffer); - cudaFree(d_spheres); - delete[] h_framebuffer; - - std::cout << "High-resolution image saved as output.ppm" << std::endl; - return 0; -}