diff --git a/.gitignore b/.gitignore index 6d4600a..1c3a1df 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,11 @@ .vscode # Build -build build/ -build/* *.o +# Data +data/ + # Resulting images *.ppm diff --git a/load-modules.sh b/load-modules.sh new file mode 100755 index 0000000..c6fcab4 --- /dev/null +++ b/load-modules.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Load the needed modules in Habrok + +ml CUDA +ml netCDF-C++4 diff --git a/makefile b/makefile index 18993ff..be9c702 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,7 @@ # Compiler and flags NVCC = nvcc -CXXFLAGS = -I./src -I./linalg -I./img -I./objs -std=c++17 +CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs) -g -G +COMPILE_OBJ_FLAGS = --device-c # Directories SRC_DIR = src @@ -8,7 +9,7 @@ BUILD_DIR = build # Files TARGET = $(BUILD_DIR)/main -SRC_FILES = $(wildcard $(SRC_DIR)/*.cu) +SRC_FILES := $(shell find $(SRC_DIR) -type f \( -name '*.cu' -o -name '*.cpp' \)) OBJ_FILES = $(patsubst $(SRC_DIR)/%.cu,$(BUILD_DIR)/%.o,$(SRC_FILES)) # Default target @@ -16,11 +17,12 @@ all: $(TARGET) # Build the main target $(TARGET): $(OBJ_FILES) | $(BUILD_DIR) - $(NVCC) $(CXXFLAGS) -o $@ $^ + $(NVCC) $(CXXFLAGS) $^ -o $@ # Compile object files $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu - $(NVCC) $(CXXFLAGS) -c $< -o $@ + @mkdir -p $(dir $@) + $(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@ # Debug build debug: CXXFLAGS += -g @@ -34,4 +36,4 @@ clean: $(BUILD_DIR): mkdir -p $(BUILD_DIR) -.PHONY: all clean debug +.PHONY: all clean debug \ No newline at end of file diff --git a/src/hurricanedata/datareader.cu b/src/hurricanedata/datareader.cu new file mode 100644 index 0000000..f62f316 --- /dev/null +++ b/src/hurricanedata/datareader.cu @@ -0,0 +1,77 @@ +#include "datareader.h" + +#include +#include + +using namespace std; +using namespace netCDF; + +DataReader::DataReader(const std::string &path, std::string variableName): +filePathManager(path), variableName(variableName) { } + +size_t DataReader::fileLength(size_t fileIndex) { + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(variableName)->second; + + size_t length = 1; + for (NcDim dim: var.getDims()) { + length *= dim.getSize(); + } + + return length; +} + +size_t DataReader::axisLength(size_t fileIndex, const std::string& axisName) { + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(axisName)->second; + + assert(var.getDimCount() == 1); + + netCDF::NcDim dim = var.getDim(0); + return dim.getSize(); +} + +template +void DataReader::loadFile(T* dataOut, size_t fileIndex) { + loadFile(dataOut, fileIndex, variableName); +} + +template +void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) { + // TODO: We could make the index wrap around so that we dont lose data + // using fileIndex % NUMBEROFFILES here. + + NcFile data(filePathManager.getPath(fileIndex), NcFile::read); + + multimap vars = data.getVars(); + + NcVar var = vars.find(varName)->second; + + var.getVar(dataOut); +} + +template void DataReader::loadFile(float* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(int* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(double* dataOut, size_t fileIndex, const string& variableName); +template void DataReader::loadFile(double* dataOut, size_t fileIndex); +template void DataReader::loadFile(float* dataOut, size_t fileIndex); +template void DataReader::loadFile(int* dataOut, size_t fileIndex); + +DataReader::~DataReader() { + +} + +// template +// void DataReader::readAndAllocateAxis(T** axis_ptr, size_t *size, const string &varName) { +// assert(var.getDimCount() == 1); +// netCDF::NcDim dim = var.getDim(0); +// *size = dim.getSize(); +// cudaError_t status = cudaMallocManaged(axis_ptr, *size*sizeof(T)); +// var.getVar(*axis_ptr); +// } \ No newline at end of file diff --git a/src/hurricanedata/datareader.h b/src/hurricanedata/datareader.h new file mode 100644 index 0000000..9c82aea --- /dev/null +++ b/src/hurricanedata/datareader.h @@ -0,0 +1,56 @@ +#ifndef DATAREADER_H +#define DATAREADER_H + +#include +#include +#include + +#include "filepathmanager.h" + + +/** + * @brief Simple wrapper around all the NetCDF functionality. + */ +class DataReader { +public: + /** + * @brief Constructor. + * @param path Path to the directory containing all the .nc4 files. + * @param variableName The variable we are interested in. + */ + DataReader(const std::string &path, std::string variableName); + + /** + * @brief The length of the flat data in variableName. + * Used for allocating the right amount of memory. + */ + size_t fileLength(size_t fileIndex); + + /** + * @brief Write all the data in file fileIndex of variable we re interested in into the dataOut. + */ + template + void loadFile(T* dataOut, size_t fileIndex); + + /** + * @brief Write all the data in file fileIndex of variable variableName into the dataOut. + * @param dataOut pointer to memory that should be written to. + * @param fileIndex the index of the file we want to load into memory. + * @param variableName the name of the variable + */ + template + void loadFile(T* dataOut, size_t fileIndex, const std::string& variableName); + + /** + * @brief Get size of a variable. + */ + size_t axisLength(size_t fileIndex, const std::string& axisName); + + ~DataReader(); +private: + FilePathManager filePathManager; + std::string variableName; +}; + + +#endif //DATAREADER_H diff --git a/src/hurricanedata/fielddata.cu b/src/hurricanedata/fielddata.cu new file mode 100644 index 0000000..38b0d25 --- /dev/null +++ b/src/hurricanedata/fielddata.cu @@ -0,0 +1,21 @@ +#include "fielddata.h" + +__device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +) { + size_t valArrInd = (d.fieldInd + timeInd) / md.numberOfTimeStepsPerFile; + size_t trueTimeInd = (d.fieldInd + timeInd) % md.numberOfTimeStepsPerFile; + size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize; + size_t size2DMapData = md.widthSize*md.heightSize; + return d.valArrays[valArrInd][ + trueTimeInd*sizeSpatialData + + levInd*size2DMapData + + latInd*md.widthSize + + lonInd + ]; +} \ No newline at end of file diff --git a/src/hurricanedata/fielddata.h b/src/hurricanedata/fielddata.h new file mode 100644 index 0000000..4a3257c --- /dev/null +++ b/src/hurricanedata/fielddata.h @@ -0,0 +1,58 @@ +#ifndef FIELDDATA_H +#define FIELDDATA_H + +#include + +struct FieldMetadata { + size_t widthSize; // Number of different longitudes + size_t heightSize; // Number of different latitudes + size_t depthSize; // Number of different levels + size_t timeSize; // Number of different times + + // lons is a managed Unified Memory array of size widthCount that indicates + // that getVal(t, i, j, k) is a value with longitude of lons[i]. + // The other such arrays are similarly defined. + double *lons; + double *lats; + double *levs; + int *times; + + size_t numberOfTimeStepsPerFile; +}; + +using FieldMetadata = FieldMetadata; + +/** + * @brief Allows for accessing data of a time-slice of a scalar field + * that may start in the middle of a file or go range over multiple files + * by holding references to multiple files at a time. + * + * @note Use the getVal method to index into it and get values. + */ +struct FieldData { + static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct. + static constexpr size_t numberOfTimeStepsPerField = 2; + + size_t fieldInd; // Indicates + + // A uniform array of length FILESNUM storing pointers to 4D arrays stored in device memory. + float **valArrays; + +}; + +/** + * @brief Get the scalar field value at a particular index (timeInd, levInd, latInd, lonInd). + * Note that the timeInd may be counter-intuitive. See explanation in gpubufferhandler.h. + * + * @return scalar field float value. + */ +extern __device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +); + +#endif //FIELDDATA_H diff --git a/src/hurricanedata/filepathmanager.cpp b/src/hurricanedata/filepathmanager.cpp new file mode 100644 index 0000000..03c7f39 --- /dev/null +++ b/src/hurricanedata/filepathmanager.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#include "filepathmanager.h" + +namespace fs = std::filesystem; + +FilePathManager::FilePathManager(const std::string& path) { + // Check if the directory exists + if (!fs::exists(path) || !fs::is_directory(path)) { + std::cerr << "Error: Directory does not exist or is not valid." << std::endl; + exit(1); + } + + // Iterate over directory entries + for (const auto& entry : fs::directory_iterator(path)) { + if (entry.is_regular_file() && entry.path().extension() == ".nc4") { + fileNames.push_back(entry.path().string()); + } + } + + // Sort file names lexicographically + std::sort(fileNames.begin(), fileNames.end()); + + // Print sorted file names + std::cout << "Files in directory '" << path << "' sorted lexicographically:\n"; + for (const auto& fileName : fileNames) { + std::cout << fileName << std::endl; + } +} + +const char* FilePathManager::getPath(size_t index) const { + return fileNames.at(index).c_str(); +} + + +FilePathManager::~FilePathManager() { } \ No newline at end of file diff --git a/src/hurricanedata/filepathmanager.h b/src/hurricanedata/filepathmanager.h new file mode 100644 index 0000000..96f9ec5 --- /dev/null +++ b/src/hurricanedata/filepathmanager.h @@ -0,0 +1,28 @@ +#ifndef FILEPATHMANAGER_H +#define FILEPATHMANAGER_H + +#include +#include + +/** + * @brief Simple class that is responsible for mapping a file index to a file path. + * So for example: + * index = 0 -> MERRA2_400.inst6_3d_ana_Np.20120101.nc4 + * index = 1 -> MERRA2_400.inst6_3d_ana_Np.20120102.nc4 + * index = 2 -> MERRA2_400.inst6_3d_ana_Np.20120103.nc4 + * index = 3 -> MERRA2_400.inst6_3d_ana_Np.20120104.nc4 + * etc... + */ +class FilePathManager { +public: + FilePathManager(const std::string& path); + size_t getNumberOfFiles(); + const char* getPath(size_t index) const; + + ~FilePathManager(); + +private: + std::vector fileNames; +}; + +#endif //FILEPATHMANAGER_H \ No newline at end of file diff --git a/src/hurricanedata/gpubuffer.cu b/src/hurricanedata/gpubuffer.cu new file mode 100644 index 0000000..7edac38 --- /dev/null +++ b/src/hurricanedata/gpubuffer.cu @@ -0,0 +1,277 @@ +#include "gpubuffer.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +struct File { + condition_variable cv; + mutex m; + bool valid; + size_t size; + float *h_data; // host data + float *d_data; // device data +}; + +struct LoadFileJob { + size_t fileIndex; + size_t bufferIndex; +}; + +struct GetSizeJob { + size_t fileIndex; + promise size; +}; + +struct GetAxisDoubleJob { + size_t fileIndex; + string axisName; + promise> axis; +}; + +struct GetAxisIntJob { + size_t fileIndex; + string axisName; + promise> axis; +}; + +using Job = variant; + +class GPUBuffer::impl { +public: + impl(DataReader& dataReader); + void loadFile(LoadFileJob job); + void loadFile(size_t fileIndex, size_t bufferIndex); + void getSize(GetSizeJob job); + size_t getSize(size_t fileIndex); // Most probably blocking + void getAxis(GetAxisDoubleJob job); + void getAxis(GetAxisIntJob job); + + template + std::pair getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking + ~impl(); + + File buffer[numBufferedFiles]; + + // Thread worker things + void worker(); + queue jobs; + unique_ptr ioworker; + cudaStream_t iostream; + DataReader& dataReader; + condition_variable queuecv; + mutex queuecv_m; + bool workerRunning = true; +}; + +GPUBuffer::GPUBuffer(DataReader& dataReader): pImpl(make_unique(dataReader)) { } + +size_t GPUBuffer::impl::getSize(size_t fileIndex) { + promise promise; + future future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetSizeJob{fileIndex, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} + +template +pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName) { + return pImpl->getAxis(fileIndex, axisName); +} +template pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName); +template pair GPUBuffer::getAxis(size_t fileIndex, const string& axisName); + +template <> +pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) { + promise> promise; + future> future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetAxisDoubleJob{fileIndex, axisName, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} +template pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName); + +template <> +pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) { + promise> promise; + future> future = promise.get_future(); + { + lock_guard lk(queuecv_m); + jobs.push(GetAxisIntJob{fileIndex, axisName, move(promise)}); + } + queuecv.notify_all(); + + future.wait(); + + return future.get(); +} +template pair GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName); + +GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) { + cudaStreamCreate(&iostream); + + ioworker = make_unique([this]() { worker(); }); + + size_t size = getSize(0); + auto x = getAxis(0, "time"); + size_t sizeTime = x.first; + cudaFree(x.second); + for (size_t i = 0; i < numBufferedFiles; i++) { + { + File &file = buffer[i]; + lock_guard lk(file.m); + cudaMallocHost(&file.h_data, sizeof(float)*size); + cudaMalloc(&file.d_data, sizeof(float)*size); + file.size = size; + file.valid = false; + } + // loadFile(i, i); + } +} + +GPUBuffer::~GPUBuffer() { +} + +GPUBuffer::impl::~impl() { + { + lock_guard lk(queuecv_m); + workerRunning = false; + } + queuecv.notify_all(); + ioworker->join(); + for (size_t i = 0; i < numBufferedFiles; i++) { + File &file = buffer[i]; + cudaFree(file.d_data); + cudaFreeHost(file.h_data); + } + cudaStreamDestroy(iostream); +} + +void GPUBuffer::impl::loadFile(LoadFileJob job) { + File &file = buffer[job.bufferIndex]; + { + lock_guard lk(file.m); + assert(!file.valid); + cout << "loading file with index " << job.fileIndex << "\n"; + dataReader.loadFile(file.h_data, job.fileIndex); + cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream); + cudaStreamSynchronize(iostream); + buffer[job.bufferIndex].valid = true; + } + file.cv.notify_all(); +} + +void GPUBuffer::impl::getSize(GetSizeJob job) { + size_t size = dataReader.fileLength(job.fileIndex); + job.size.set_value(size); +} + +void GPUBuffer::impl::getAxis(GetAxisDoubleJob job) { + pair array; + array.first = dataReader.axisLength(job.fileIndex, job.axisName); + cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(double)); + dataReader.loadFile(array.second, job.fileIndex, job.axisName); + job.axis.set_value(array); +} + +void GPUBuffer::impl::getAxis(GetAxisIntJob job) { + pair array; + array.first = dataReader.axisLength(job.fileIndex, job.axisName); + cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(int)); + dataReader.loadFile(array.second, job.fileIndex, job.axisName); + job.axis.set_value(array); +} + +void GPUBuffer::loadFile(size_t fileIndex, size_t bufferIndex) { + pImpl->loadFile(fileIndex, bufferIndex); +} + +void GPUBuffer::impl::loadFile(size_t fileIndex, size_t bufferIndex) { + LoadFileJob job = { + .fileIndex = fileIndex, + .bufferIndex = bufferIndex + }; + + // Main thread theoretically blocks on 2 mutexes here + // but it _should_ never have to wait for them. + { + File &file = buffer[bufferIndex]; + + std::unique_lock lk(file.m, std::defer_lock); + bool lockval = lk.try_lock(); + if (!lockval) { + cout << "waited on GPUBuffer during loadFile orchestration :(\n"; + lk.lock(); + } + file.valid = false; + } + { + std::unique_lock lk(queuecv_m, std::defer_lock); + bool lockval = lk.try_lock(); + if (!lockval) { + cout << "waited on IOworker queue during loadFile orchestration :(\n"; + lk.lock(); + } + jobs.push(job); + } + queuecv.notify_all(); +} + +void GPUBuffer::impl::worker() { + while(workerRunning) { + Job job; + { + unique_lock lk(queuecv_m); + queuecv.wait(lk, [this]{ return !workerRunning || !jobs.empty(); }); + if (!workerRunning) { + return; + } + + job = move(jobs.front()); + jobs.pop(); + } + if(holds_alternative(job)) { + loadFile(get(job)); + } else if(holds_alternative(job)) { + getSize(move(get(job))); + } else if(holds_alternative(job)) { + getAxis(move(get(job))); + } else if(holds_alternative(job)) { + getAxis(move(get(job))); + } + + } +} + +DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) { + File &file = pImpl->buffer[bufferIndex]; + + // TODO: Might be nice to measure the blocking time here. + unique_lock lk(file.m); + file.cv.wait(lk, [this, bufferIndex]{ return pImpl->buffer[bufferIndex].valid; }); + + DataHandle dh = { + .d_data = file.d_data, + .size = file.size + }; + return dh; +} \ No newline at end of file diff --git a/src/hurricanedata/gpubuffer.h b/src/hurricanedata/gpubuffer.h new file mode 100644 index 0000000..b953abe --- /dev/null +++ b/src/hurricanedata/gpubuffer.h @@ -0,0 +1,60 @@ +#ifndef GPUBUFFER_H +#define GPUBUFFER_H + +#include +#include +#include + +#include "datareader.h" + +struct DataHandle { + float *d_data; // Device memory + size_t size; + // Could include the data stored in host memory h_data in this handle if it were needed. +}; + +/** + * @brief Handles the asynchronous (un)loading data to the GPU. The rest of the + * application should not have to interface directly with this class. Getting data + * should go over the GPUBufferHandler class. + * + * @note This class uses a queue to give loading jobs to the worker thread. + * NetCDF-C is not thread safe so you may never read data using netCDF directly + * on any other thread than this worker thread. + * + * Assumes all the data in the nc4 files is the same size. + * + */ +class GPUBuffer { +public: + static constexpr size_t numBufferedFiles = 3; // Number of files stored in memory at one time. + + GPUBuffer(DataReader& dataReader); + + /** + * @brief Asynchronously tells the GPUBuffer to eventually load a particular file index + * into a buffer index (in which part of the buffer the file should be stored). + */ + void loadFile(size_t fileIndex, size_t bufferIndex); // No blocking + + /** + * @brief Get the values stored in a particular buffer index + * @return A struct DataHandle that points to the memory and gives its size. + */ + DataHandle getDataHandle(size_t bufferIndex); // Potentially blocking + + /** + * @brief Get the data from an axis (e.g. longitude) and its size. + * @note This is a blocking operation, so it makes a job for the worker + * to read the data and then waits untill the job is completed. + */ + template + std::pair getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking + + ~GPUBuffer(); +private: + class impl; + std::experimental::propagate_const> pImpl; +}; + +#endif //GPUBUFFER_H diff --git a/src/hurricanedata/gpubufferhandler.cu b/src/hurricanedata/gpubufferhandler.cu new file mode 100644 index 0000000..606a47c --- /dev/null +++ b/src/hurricanedata/gpubufferhandler.cu @@ -0,0 +1,93 @@ +#include "gpubufferhandler.h" +#include "fielddata.h" + +#include + +using namespace std; + +GPUBufferHandler::GPUBufferHandler(GPUBuffer& gpuBuffer): +gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) { + cudaMallocManaged(&fmd, sizeof(FieldMetadata)); + + auto [widthSize, lons] = gpuBuffer.getAxis(0, "lon"); + fmd->widthSize = widthSize; + fmd->lons = lons; + + auto [heightSize, lats] = gpuBuffer.getAxis(0, "lat"); + fmd->heightSize = heightSize; + fmd->lats = lats; + + auto [depthSize, levs] = gpuBuffer.getAxis(0, "lev"); + fmd->depthSize = depthSize; + fmd->levs = levs; + + for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) { + gpuBuffer.loadFile(fileInd,fileInd); + fileInd++; + } + + fmd->timeSize = FieldData::numberOfTimeStepsPerField; + + cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int))); + + auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis(0, "time"); + fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile; + fmd->times = times; + + cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM); +} + +FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) { + + FieldData fd; + size_t fieldDataInd = 0; + fd.valArrays = valArrays; + cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n"; + for (int i = bufferInd; i <= newEndBufferInd; i++) { + DataHandle x = gpuBuffer.getDataHandle(i); + fd.valArrays[fieldDataInd] = x.d_data; + fieldDataInd++; + } + fd.fieldInd = fieldInd; + + return fd; +} + +FieldData GPUBufferHandler::nextFieldData() { + DataHandle x = gpuBuffer.getDataHandle(bufferInd); + size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile; + size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; + + size_t endFieldInd = (fieldInd + FieldData::numberOfTimeStepsPerField - 1) % fmd->numberOfTimeStepsPerFile; + size_t endBufferInd = (bufferInd + (fieldInd + FieldData::numberOfTimeStepsPerField - 1)/fmd->numberOfTimeStepsPerFile) % GPUBuffer::numBufferedFiles; + + size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile; + size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles; + + if(firstTimeStep) { + firstTimeStep = false; + return setupField(endBufferInd); + } + + fieldInd = newFieldInd; + + if (newBufferInd != bufferInd) { + gpuBuffer.loadFile(fileInd, bufferInd); + fileInd++; + bufferInd = newBufferInd; + } + + if (newEndBufferInd != endBufferInd) { + // maybe dont do things? + } + + return setupField(newEndBufferInd); +} + +GPUBufferHandler::~GPUBufferHandler() { + cudaFree(fmd->lons); + cudaFree(fmd->lats); + cudaFree(fmd->levs); + cudaFree(valArrays); + cudaFree(fmd); +} \ No newline at end of file diff --git a/src/hurricanedata/gpubufferhandler.h b/src/hurricanedata/gpubufferhandler.h new file mode 100644 index 0000000..2b4338b --- /dev/null +++ b/src/hurricanedata/gpubufferhandler.h @@ -0,0 +1,64 @@ +#ifndef GPUBUFFERHANDLER_H +#define GPUBUFFERHANDLER_H + +#include "fielddata.h" +#include "gpubuffer.h" + +#include + +/** + * @brief Responsible for deciding when the GPUBuffer should load and unload files. + * Also assembles and gives access to FieldData. + * + * You will need to interface with this class. + */ +class GPUBufferHandler { +public: + GPUBufferHandler(GPUBuffer& gpuBuffer); + + /** + * @brief Produces a FieldData which can be used to retrieve values for a time-slice + * into a scalar field with a width of FieldData::numberOfTimeStepsPerField. + * + * @details This method always increments the start point of the time-slice + * by 1. See below: + * + * time steps = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ... + * files (4 time steps per file) = [0 1 2 3][4 5 6 7][8 9 10 11][12 13 ... + * nextFieldData() (1st call) = [0 1] + * nextFieldData() (2nd call) = [1 2] + * nextFieldData() (3rd call) = [2 3] + * nextFieldData() (4th call) = [3 4] + * nextFieldData() (5th call) = [4 5] + * etc... + * + * When getting values from a FieldData using the getVal method, + * the time index is relative to the start of the time-slice. + * + * This means that for d = nextFieldData() (4th call), + * getVal(.., fieldData=d, ..., timeInd = 1, ...) gives a value at + * absolute time step 5 as seen above. + */ + FieldData nextFieldData(); + + ~GPUBufferHandler(); + + /** + * You can get the FieldMetaData from here. + */ + FieldMetadata *fmd; + + static void freeFieldData(); + +private: + FieldData setupField(size_t endBufferInd); + GPUBuffer& gpuBuffer; + size_t fileInd; + size_t bufferInd; + size_t fieldInd; + bool firstTimeStep = true; + + float **valArrays; +}; + +#endif //GPUBUFFERHANDLER_H diff --git a/src/main.cu b/src/main.cu index 1b4ac0e..cba55f9 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,224 +1,276 @@ -#include -#include -#include +// #include +// #include +// #include +// #include + +// #include "linalg/linalg.h" +// #include "objs/sphere.h" +// #include "img/handler.h" + +// // TODO: Eventually, export this into a better place (i.e., such that we do not have to recompile every time we change a parameter) +// static const int VOLUME_WIDTH = 1024; +// static const int VOLUME_HEIGHT = 1024; +// static const int VOLUME_DEPTH = 1024; + +// static const int IMAGE_WIDTH = 2560; +// static const int IMAGE_HEIGHT = 1440; + +// static const int SAMPLES_PER_PIXEL = 8; // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map) + + +// __constant__ int d_volumeWidth; +// __constant__ int d_volumeHeight; +// __constant__ int d_volumeDepth; + +// static float* d_volume = nullptr; // TODO: Adjust according to how data is loaded + +// // ---------------------------------------------------------------------------------------------------- +// __device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) { +// double ambientStrength = 0.3; +// double diffuseStrength = 0.8; +// double specularStrength = 0.5; +// int shininess = 32; + +// Vec3 ambient = baseColor * ambientStrength; +// double diff = fmax(normal.dot(lightDir), 0.0); +// Vec3 diffuse = baseColor * (diffuseStrength * diff); + +// Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); +// double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess); +// Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); + +// return ambient + diffuse + specular; +// } + +// // Raycast + phong +// __global__ void raycastKernel(float* volumeData, unsigned char* framebuffer, int imageWidth, int imageHeight, Vec3 cameraPos, Vec3 cameraDir, Vec3 cameraUp, float fov, float stepSize, Vec3 lightPos) { +// int px = blockIdx.x * blockDim.x + threadIdx.x; +// int py = blockIdx.y * blockDim.y + threadIdx.y; +// if (px >= imageWidth || py >= imageHeight) return; + +// float accumR = 0.0f; +// float accumG = 0.0f; +// float accumB = 0.0f; + +// // Multiple samples per pixel +// for (int s = 0; s < SAMPLES_PER_PIXEL; s++) { +// // Map to [-1, 1] +// float u = ((px + 0.5f) / imageWidth ) * 2.0f - 1.0f; +// float v = ((py + 0.5f) / imageHeight) * 2.0f - 1.0f; + +// // TODO: Move this (and all similar transformation code) to its own separate file +// float tanHalfFov = tanf(fov * 0.5f); +// u *= tanHalfFov; +// v *= tanHalfFov; + +// // Find ray direction +// Vec3 cameraRight = (cameraDir.cross(cameraUp)).normalize(); +// cameraUp = (cameraRight.cross(cameraDir)).normalize(); +// Vec3 rayDir = (cameraDir + cameraRight*u + cameraUp*v).normalize(); + +// // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this +// float tNear = 0.f; // TODO: These are also linear transforms, so move away +// float tFar = 1e6f; + +// auto intersectAxis = [&](float start, float dirVal) { +// if (fabsf(dirVal) < 1e-10f) { // TDDO: Add a constant - epsilon +// if (start < 0.f || start > 1.f) { +// tNear = 1e9f; +// tFar = -1e9f; +// } +// } else { +// float t0 = (0.0f - start) / dirVal; // TODO: 0.0 and 1.0 depend on the box size -> move to a constant +// float t1 = (1.0f - start) / dirVal; +// if (t0>t1) { +// float tmp=t0; +// t0=t1; +// t1=tmp; +// } +// if (t0>tNear) tNear = t0; +// if (t1 tFar) continue; // No intersectionn +// if (tNear < 0.0f) tNear = 0.0f; + +// float colorR = 0.f, colorG = 0.f, colorB = 0.f; +// float alphaAccum = 0.f; + +// float tCurrent = tNear; +// while (tCurrent < tFar && alphaAccum < 0.99f) { +// Vec3 pos = cameraPos + rayDir * tCurrent; + +// // Convert to volume indices +// float fx = pos.x * (d_volumeWidth - 1); +// float fy = pos.y * (d_volumeHeight - 1); +// float fz = pos.z * (d_volumeDepth - 1); +// int ix = (int)roundf(fx); +// int iy = (int)roundf(fy); +// int iz = (int)roundf(fz); + +// // Sample +// float density = sampleVolumeNearest(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); + +// // Basic transfer function. TODO: Move to a separate file, and then improve +// float alphaSample = density * 0.05f; +// Vec3 baseColor = Vec3(density, 0.1f*density, 1.f - density); + +// // If density ~ 0, skip shading +// if (density > 0.001f) { +// Vec3 grad = computeGradient(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); +// Vec3 normal = -grad.normalize(); + +// Vec3 lightDir = (lightPos - pos).normalize(); +// Vec3 viewDir = -rayDir.normalize(); + +// // Apply Phong +// Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor); + +// // Compose +// colorR += (1.0f - alphaAccum) * shadedColor.x * alphaSample; +// colorG += (1.0f - alphaAccum) * shadedColor.y * alphaSample; +// colorB += (1.0f - alphaAccum) * shadedColor.z * alphaSample; +// alphaAccum += (1.0f - alphaAccum) * alphaSample; +// } + +// tCurrent += stepSize; +// } + +// accumR += colorR; +// accumG += colorG; +// accumB += colorB; +// } + +// // Average samples +// accumR /= (float)SAMPLES_PER_PIXEL; +// accumG /= (float)SAMPLES_PER_PIXEL; +// accumB /= (float)SAMPLES_PER_PIXEL; + +// // Final colour +// int fbIndex = (py * imageWidth + px) * 3; +// framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255); +// framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255); +// framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255); +// } + +// int main(int argc, char** argv) { +// // Generate debug volume data +// float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH]; +// generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH); + +// // Allocate + copy data to GPU +// size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; +// cudaMalloc((void**)&d_volume, volumeSize); +// cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice); + +// int w = VOLUME_WIDTH, h = VOLUME_HEIGHT, d = VOLUME_DEPTH; +// cudaMemcpyToSymbol(d_volumeWidth, &w, sizeof(int)); +// cudaMemcpyToSymbol(d_volumeHeight, &h, sizeof(int)); +// cudaMemcpyToSymbol(d_volumeDepth, &d, sizeof(int)); + +// // Allocate framebuffer +// unsigned char* d_framebuffer; +// size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char); +// cudaMalloc((void**)&d_framebuffer, fbSize); +// cudaMemset(d_framebuffer, 0, fbSize); + +// // Camera and Light +// Vec3 cameraPos(0.5, 0.5, -2.0); +// Vec3 cameraDir(0.0, 0.0, 1.0); +// Vec3 cameraUp(0.0, 1.0, 0.0); +// float fov = 60.0f * (M_PI / 180.0f); +// float stepSize = 0.002f; +// Vec3 lightPos(1.5, 2.0, -1.0); + +// // Launch kernel +// dim3 blockSize(16, 16); +// dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x, +// (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y); + +// raycastKernel<<>>( +// d_volume, +// d_framebuffer, +// IMAGE_WIDTH, +// IMAGE_HEIGHT, +// cameraPos, +// cameraDir.normalize(), +// cameraUp.normalize(), +// fov, +// stepSize, +// lightPos +// ); +// cudaDeviceSynchronize(); + +// // Copy framebuffer back to CPU +// unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3]; +// cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost); + +// // Export image +// saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT); + +// // Cleanup +// delete[] hostVolume; +// delete[] hostFramebuffer; +// cudaFree(d_volume); +// cudaFree(d_framebuffer); + +// std::cout << "Phong-DVR rendering done. Image saved to output.ppm" << std::endl; +// return 0; +// } + +// gpu-buffer-handler branch main +#include "hurricanedata/fielddata.h" +#include "hurricanedata/gpubufferhandler.h" +#include "hurricanedata/datareader.h" +#include "hurricanedata/gpubuffer.h" + #include +#include +#include +#include +#include +#include -#include "linalg/linalg.h" -#include "objs/sphere.h" -#include "img/handler.h" - -// TODO: Eventually, export this into a better place (i.e., such that we do not have to recompile every time we change a parameter) -static const int VOLUME_WIDTH = 1024; -static const int VOLUME_HEIGHT = 1024; -static const int VOLUME_DEPTH = 1024; - -static const int IMAGE_WIDTH = 2560; -static const int IMAGE_HEIGHT = 1440; - -static const int SAMPLES_PER_PIXEL = 8; // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map) - - -__constant__ int d_volumeWidth; -__constant__ int d_volumeHeight; -__constant__ int d_volumeDepth; - -static float* d_volume = nullptr; // TODO: Adjust according to how data is loaded - -// ---------------------------------------------------------------------------------------------------- -__device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) { - double ambientStrength = 0.3; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 32; - - Vec3 ambient = baseColor * ambientStrength; - double diff = fmax(normal.dot(lightDir), 0.0); - Vec3 diffuse = baseColor * (diffuseStrength * diff); - - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; +__global__ void middleOfTwoValues(float *ans, const FieldMetadata &fmd, FieldData fd) { + float xi = getVal(fmd, fd, 0, 20, 100, 100); + float yi = getVal(fmd, fd, 1, 20, 100, 100); + *ans = (xi+yi)/2; } -// Raycast + phong -__global__ void raycastKernel(float* volumeData, unsigned char* framebuffer, int imageWidth, int imageHeight, Vec3 cameraPos, Vec3 cameraDir, Vec3 cameraUp, float fov, float stepSize, Vec3 lightPos) { - int px = blockIdx.x * blockDim.x + threadIdx.x; - int py = blockIdx.y * blockDim.y + threadIdx.y; - if (px >= imageWidth || py >= imageHeight) return; +int main() { + std::string path = "data/atmosphere_MERRA-wind-speed[179253532]"; - float accumR = 0.0f; - float accumG = 0.0f; - float accumB = 0.0f; + std::string variable = "T"; - // Multiple samples per pixel - for (int s = 0; s < SAMPLES_PER_PIXEL; s++) { - // Map to [-1, 1] - float u = ((px + 0.5f) / imageWidth ) * 2.0f - 1.0f; - float v = ((py + 0.5f) / imageHeight) * 2.0f - 1.0f; + DataReader dataReader{path, variable}; - // TODO: Move this (and all similar transformation code) to its own separate file - float tanHalfFov = tanf(fov * 0.5f); - u *= tanHalfFov; - v *= tanHalfFov; + std::cout << "created datareader\n"; - // Find ray direction - Vec3 cameraRight = (cameraDir.cross(cameraUp)).normalize(); - cameraUp = (cameraRight.cross(cameraDir)).normalize(); - Vec3 rayDir = (cameraDir + cameraRight*u + cameraUp*v).normalize(); + GPUBuffer buffer (dataReader); - // Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this - float tNear = 0.f; // TODO: These are also linear transforms, so move away - float tFar = 1e6f; + std::cout << "created buffer\n"; - auto intersectAxis = [&](float start, float dirVal) { - if (fabsf(dirVal) < 1e-10f) { // TDDO: Add a constant - epsilon - if (start < 0.f || start > 1.f) { - tNear = 1e9f; - tFar = -1e9f; - } - } else { - float t0 = (0.0f - start) / dirVal; // TODO: 0.0 and 1.0 depend on the box size -> move to a constant - float t1 = (1.0f - start) / dirVal; - if (t0>t1) { - float tmp=t0; - t0=t1; - t1=tmp; - } - if (t0>tNear) tNear = t0; - if (t1 tFar) continue; // No intersectionn - if (tNear < 0.0f) tNear = 0.0f; + std::cout << "created buffer handler\n"; + for (int i = 0; i < 10; i++) { + FieldData fd = bufferHandler.nextFieldData(); - float colorR = 0.f, colorG = 0.f, colorB = 0.f; - float alphaAccum = 0.f; + middleOfTwoValues<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd); - float tCurrent = tNear; - while (tCurrent < tFar && alphaAccum < 0.99f) { - Vec3 pos = cameraPos + rayDir * tCurrent; - - // Convert to volume indices - float fx = pos.x * (d_volumeWidth - 1); - float fy = pos.y * (d_volumeHeight - 1); - float fz = pos.z * (d_volumeDepth - 1); - int ix = (int)roundf(fx); - int iy = (int)roundf(fy); - int iz = (int)roundf(fz); - - // Sample - float density = sampleVolumeNearest(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); - - // Basic transfer function. TODO: Move to a separate file, and then improve - float alphaSample = density * 0.05f; - Vec3 baseColor = Vec3(density, 0.1f*density, 1.f - density); - - // If density ~ 0, skip shading - if (density > 0.001f) { - Vec3 grad = computeGradient(volumeData, d_volumeWidth, d_volumeHeight, d_volumeDepth, ix, iy, iz); - Vec3 normal = -grad.normalize(); - - Vec3 lightDir = (lightPos - pos).normalize(); - Vec3 viewDir = -rayDir.normalize(); - - // Apply Phong - Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor); - - // Compose - colorR += (1.0f - alphaAccum) * shadedColor.x * alphaSample; - colorG += (1.0f - alphaAccum) * shadedColor.y * alphaSample; - colorB += (1.0f - alphaAccum) * shadedColor.z * alphaSample; - alphaAccum += (1.0f - alphaAccum) * alphaSample; - } - - tCurrent += stepSize; - } - - accumR += colorR; - accumG += colorG; - accumB += colorB; + cudaDeviceSynchronize(); + std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n"; } - - // Average samples - accumR /= (float)SAMPLES_PER_PIXEL; - accumG /= (float)SAMPLES_PER_PIXEL; - accumB /= (float)SAMPLES_PER_PIXEL; - - // Final colour - int fbIndex = (py * imageWidth + px) * 3; - framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255); - framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255); - framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255); -} - -int main(int argc, char** argv) { - // Generate debug volume data - float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH]; - generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH); - - // Allocate + copy data to GPU - size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; - cudaMalloc((void**)&d_volume, volumeSize); - cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice); - - int w = VOLUME_WIDTH, h = VOLUME_HEIGHT, d = VOLUME_DEPTH; - cudaMemcpyToSymbol(d_volumeWidth, &w, sizeof(int)); - cudaMemcpyToSymbol(d_volumeHeight, &h, sizeof(int)); - cudaMemcpyToSymbol(d_volumeDepth, &d, sizeof(int)); - - // Allocate framebuffer - unsigned char* d_framebuffer; - size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char); - cudaMalloc((void**)&d_framebuffer, fbSize); - cudaMemset(d_framebuffer, 0, fbSize); - - // Camera and Light - Vec3 cameraPos(0.5, 0.5, -2.0); - Vec3 cameraDir(0.0, 0.0, 1.0); - Vec3 cameraUp(0.0, 1.0, 0.0); - float fov = 60.0f * (M_PI / 180.0f); - float stepSize = 0.002f; - Vec3 lightPos(1.5, 2.0, -1.0); - - // Launch kernel - dim3 blockSize(16, 16); - dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x, - (IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y); - - raycastKernel<<>>( - d_volume, - d_framebuffer, - IMAGE_WIDTH, - IMAGE_HEIGHT, - cameraPos, - cameraDir.normalize(), - cameraUp.normalize(), - fov, - stepSize, - lightPos - ); - cudaDeviceSynchronize(); - - // Copy framebuffer back to CPU - unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3]; - cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost); - - // Export image - saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT); - - // Cleanup - delete[] hostVolume; - delete[] hostFramebuffer; - cudaFree(d_volume); - cudaFree(d_framebuffer); - - std::cout << "Phong-DVR rendering done. Image saved to output_phong_dvr.ppm" << std::endl; + + // TODO: measure data transfer time in this example code. + cudaFree(ptr_test_read); return 0; -} +} \ No newline at end of file diff --git a/src/tests/a.out b/src/tests/a.out deleted file mode 100755 index 3cf0e96..0000000 Binary files a/src/tests/a.out and /dev/null differ diff --git a/src/tests/cg_example.cu b/src/tests/cg_example.cu deleted file mode 100644 index fafaec8..0000000 --- a/src/tests/cg_example.cu +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include -#include - -#include "linalg/linalg.h" -#include "objs/sphere.h" -#include "img/handler.h" - -#define WIDTH 3840 -#define HEIGHT 2160 -#define SAMPLES_PER_PIXEL 8 - - -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 64; - - Vec3 ambient = color * ambientStrength; - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; -} - -__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; - - int pixelIndex = (y * WIDTH + x) * 3; - Vec3 rayOrigin(0, 0, 0); - Vec3 colCum(0, 0, 0); - - double spp = static_cast(SAMPLES_PER_PIXEL); - for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { - double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH; - double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT; - Vec3 rayDir(u, v, 1.0); - rayDir = rayDir.normalize(); - - for (int i = 0; i < numSpheres; ++i) { - double t; - if (spheres[i].intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - spheres[i].center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; - - colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); - } - } - } - - // Average color across all samples - Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); - - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); -} - - - -int main() { - Sphere spheres[] = { - { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere - { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere - { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere - }; - int numSpheres = sizeof(spheres) / sizeof(Sphere); - Vec3 lightPos(5, 5, 0); - - unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - Sphere* d_spheres; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); - cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); - - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); - cudaDeviceSynchronize(); - - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT); - - cudaFree(d_framebuffer); - cudaFree(d_spheres); - delete[] h_framebuffer; - - std::cout << "High-resolution image saved as output.ppm" << std::endl; - return 0; -} diff --git a/src/tests/deprecated_test.cu b/src/tests/deprecated_test.cu deleted file mode 100644 index b661dad..0000000 --- a/src/tests/deprecated_test.cu +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include -#include - -#define WIDTH 800 -#define HEIGHT 600 - -struct Vec3 { - double x, y, z; - - __host__ __device__ Vec3() : x(0), y(0), z(0) {} - __host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {} - - __host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); } - __host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); } - __host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); } - __host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); } - __host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; } - __host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); } -}; - -// Simple Phong lighting components -struct Sphere { - Vec3 center; - double radius; - Vec3 color; - - __device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const { - Vec3 oc = rayOrigin - center; - double b = oc.dot(rayDir); - double c = oc.dot(oc) - radius * radius; - double h = b * b - c; - if (h < 0.0) return false; - h = sqrt(h); - t = -b - h; - return true; - } -}; - -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 32; - - // Ambient - Vec3 ambient = color * ambientStrength; - - // Diffuse - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - - // Specular - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; -} - -__global__ void renderKernel(unsigned char* framebuffer, Sphere sphere, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; - - int pixelIndex = (y * WIDTH + x) * 3; - - Vec3 rayOrigin(0, 0, 0); - Vec3 rayDir((x - WIDTH / 2.0) / WIDTH, (y - HEIGHT / 2.0) / HEIGHT, 1.0); - rayDir = rayDir.normalize(); - - double t; - if (sphere.intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - sphere.center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; - - Vec3 color = phongShading(hitPoint, normal, lightDir, viewDir, sphere.color); - - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); - } else { - framebuffer[pixelIndex] = 0; - framebuffer[pixelIndex + 1] = 0; - framebuffer[pixelIndex + 2] = 0; - } -} - -void saveImage(const char* filename, unsigned char* framebuffer) { - std::ofstream imageFile(filename, std::ios::out | std::ios::binary); - imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n"; - for (int i = 0; i < WIDTH * HEIGHT * 3; i++) { - imageFile << framebuffer[i]; - } - imageFile.close(); -} - -int main() { - // Initialize sphere and light source - Sphere sphere = { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }; // Red sphere - Vec3 lightPos(5, 5, 0); - - // Allocate framebuffer on device and host - unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - - // Launch - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, sphere, lightPos); - cudaDeviceSynchronize(); - - // Copy result back to host and save - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer); - - // Clean up - cudaFree(d_framebuffer); - delete[] h_framebuffer; - - std::cout << "Image saved as output.ppm" << std::endl; - return 0; -} diff --git a/src/tests/hello_world b/src/tests/hello_world deleted file mode 100755 index bb85e75..0000000 Binary files a/src/tests/hello_world and /dev/null differ diff --git a/src/tests/hello_world.cu b/src/tests/hello_world.cu deleted file mode 100644 index a3b2c6a..0000000 --- a/src/tests/hello_world.cu +++ /dev/null @@ -1,16 +0,0 @@ -#include -#include - -__global__ void hello_from_gpu() { - printf("Hello from GPU!\n"); -} - -int main() { - hello_from_gpu<<<1, 1>>>(); - - cudaDeviceSynchronize(); - - // Reset device - cudaDeviceReset(); - return 0; -} diff --git a/src/tests/output.ppm b/src/tests/output.ppm deleted file mode 100644 index 1d7b780..0000000 Binary files a/src/tests/output.ppm and /dev/null differ diff --git a/src/tests/test_heavy.cu b/src/tests/test_heavy.cu deleted file mode 100644 index 1f70108..0000000 --- a/src/tests/test_heavy.cu +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include -#include - -#include "linalg/linalg.h" -#include "sphere.h" -#include "img/handler.h" - -#define WIDTH 3840 -#define HEIGHT 2160 -#define SAMPLES_PER_PIXEL 8 - - -__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) { - double ambientStrength = 0.1; - double diffuseStrength = 0.8; - double specularStrength = 0.5; - int shininess = 64; - - Vec3 ambient = color * ambientStrength; - double diff = max(normal.dot(lightDir), 0.0); - Vec3 diffuse = color * (diffuseStrength * diff); - - Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize(); - double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess); - Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec); - - return ambient + diffuse + specular; -} - -__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= WIDTH || y >= HEIGHT) return; - - int pixelIndex = (y * WIDTH + x) * 3; - Vec3 rayOrigin(0, 0, 0); - Vec3 colCum(0, 0, 0); - - double spp = static_cast(SAMPLES_PER_PIXEL); - for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) { - double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH; - double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT; - Vec3 rayDir(u, v, 1.0); - rayDir = rayDir.normalize(); - - for (int i = 0; i < numSpheres; ++i) { - double t; - if (spheres[i].intersect(rayOrigin, rayDir, t)) { - Vec3 hitPoint = rayOrigin + rayDir * t; - Vec3 normal = (hitPoint - spheres[i].center).normalize(); - Vec3 lightDir = (lightPos - hitPoint).normalize(); - Vec3 viewDir = -rayDir; - - colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color); - } - } - } - - // Average color across all samples - Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL); - - framebuffer[pixelIndex] = static_cast(fmin(color.x, 1.0) * 255); - framebuffer[pixelIndex + 1] = static_cast(fmin(color.y, 1.0) * 255); - framebuffer[pixelIndex + 2] = static_cast(fmin(color.z, 1.0) * 255); -} - - - -int main() { - Sphere spheres[] = { - { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere - { Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere - { Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere - }; - int numSpheres = sizeof(spheres) / sizeof(Sphere); - Vec3 lightPos(5, 5, 0); - - unsigned char* d_framebuffer; - unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3]; - Sphere* d_spheres; - cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3); - cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere)); - cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice); - - dim3 threadsPerBlock(16, 16); - dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x, - (HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y); - renderKernel<<>>(d_framebuffer, d_spheres, numSpheres, lightPos); - cudaDeviceSynchronize(); - - cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost); - saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT); - - cudaFree(d_framebuffer); - cudaFree(d_spheres); - delete[] h_framebuffer; - - std::cout << "High-resolution image saved as output.ppm" << std::endl; - return 0; -} diff --git a/test_read.py b/test_read.py new file mode 100644 index 0000000..e37c4a3 --- /dev/null +++ b/test_read.py @@ -0,0 +1,51 @@ +import numpy as np +from netCDF4 import Dataset + +# Load the NetCDF file +file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4' +ncfile = Dataset(file_path, 'r') + +# Check the available variables in the file +print(ncfile.variables.keys()) + +U = ncfile.variables['T'][:] + +# Check the shape of the variable +print("Shape of U:", U.shape) + +# Compute the mean of the variable across all axes (for all elements in U) +U_mean = np.mean(U) +U_sum = np.sum(U) + +# Print the mean +print("Mean of U:", U_mean) + +print("Sum of U:", U_sum) + +sumval = 0 +row = U[2,20,100] + +print("Shape of row", row.shape) +print("Row data for first 20 entries", row[0:5]) + +row = row[0:5] +print(f"{type(row)} {row.dtype}") + +n = 0 +for val in row: + #if not np.ma.is_masked(val): + n+=1 + sumval += np.float64(val) +Mean1 = np.mean(row) +Mean2 = sumval/n + +print(type(Mean1)) +print(type(Mean2)) +print(Mean1) +print(Mean2) + +print(f"Why does {np.mean(row):.10f} not equal {sumval/n:.10f} ?!") + + +# Close the NetCDF file +ncfile.close() \ No newline at end of file diff --git a/test_read2.py b/test_read2.py new file mode 100644 index 0000000..b137d14 --- /dev/null +++ b/test_read2.py @@ -0,0 +1,18 @@ +import numpy as np +from netCDF4 import Dataset + +# Load the NetCDF file +file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4' +ncfile = Dataset(file_path, 'r') + +# Check the available variables in the file +print(ncfile.variables.keys()) + +Temp = ncfile.variables['T'][:] + +print(f"{Temp[1, 20, 100, 100]=}") +print(f"{Temp.flat[12949732]=}") + + +# Close the NetCDF file +ncfile.close() \ No newline at end of file diff --git a/test_read3.py b/test_read3.py new file mode 100644 index 0000000..335b7e9 --- /dev/null +++ b/test_read3.py @@ -0,0 +1,30 @@ +import numpy as np +from netCDF4 import Dataset + +# file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4' +# ncfile = Dataset(file_path, 'r') + +file_paths = [ + 'data/atmosphere_MERRA-wind-speed[179253532]/MERRA2_400.inst6_3d_ana_Np.20120101.nc4', + 'data/atmosphere_MERRA-wind-speed[179253532]/MERRA2_400.inst6_3d_ana_Np.20120102.nc4', + 'data/atmosphere_MERRA-wind-speed[179253532]/MERRA2_400.inst6_3d_ana_Np.20120103.nc4' +] + +ncfiles = [Dataset(file_path) for file_path in file_paths] + + +# print(f"{Temp[0, 20, 100, 100]=}") + +for i in range(10): + Temp = ncfiles[i//4].variables['T'][:] + x = Temp[i%4, 20, 100, 100] + + Temp2 = ncfiles[(i+1)//4].variables['T'][:] + y = Temp2[(i+1)%4, 20, 100, 100] + print(f"{(x+y)/2=}") + + + +# Close the NetCDF file +for ncfile in ncfiles: + ncfile.close() \ No newline at end of file