commit
c92d0bc2a9
|
|
@ -2,7 +2,11 @@
|
||||||
.vscode
|
.vscode
|
||||||
|
|
||||||
# Build
|
# Build
|
||||||
build
|
|
||||||
build/
|
build/
|
||||||
build/*
|
*.o
|
||||||
|
|
||||||
|
# Data
|
||||||
|
data/
|
||||||
|
|
||||||
|
# Resulting images
|
||||||
|
*.ppm
|
||||||
|
|
|
||||||
21
README.md
21
README.md
|
|
@ -1 +1,22 @@
|
||||||
# cuda-based-raytrace
|
# cuda-based-raytrace
|
||||||
|
|
||||||
|
## How to run
|
||||||
|
If necessary clean previous build:
|
||||||
|
```bash
|
||||||
|
make clean
|
||||||
|
```
|
||||||
|
|
||||||
|
Then build (currently always debug build):
|
||||||
|
```bash
|
||||||
|
make all
|
||||||
|
```
|
||||||
|
|
||||||
|
Run:
|
||||||
|
```bash
|
||||||
|
./build/main
|
||||||
|
```
|
||||||
|
|
||||||
|
To open the resulting image you can use (Ubuntu):
|
||||||
|
```bash
|
||||||
|
xdg-open output.ppm
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Load the needed modules in Habrok
|
||||||
|
|
||||||
|
ml CUDA
|
||||||
|
ml netCDF-C++4
|
||||||
10
makefile
10
makefile
|
|
@ -1,6 +1,7 @@
|
||||||
# Compiler and flags
|
# Compiler and flags
|
||||||
NVCC = nvcc
|
NVCC = nvcc
|
||||||
CXXFLAGS = -I./src -I./linalg -I./img -I./objs -std=c++17
|
CXXFLAGS = -I./src -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs) -g -G
|
||||||
|
COMPILE_OBJ_FLAGS = --device-c
|
||||||
|
|
||||||
# Directories
|
# Directories
|
||||||
SRC_DIR = src
|
SRC_DIR = src
|
||||||
|
|
@ -8,7 +9,7 @@ BUILD_DIR = build
|
||||||
|
|
||||||
# Files
|
# Files
|
||||||
TARGET = $(BUILD_DIR)/main
|
TARGET = $(BUILD_DIR)/main
|
||||||
SRC_FILES = $(wildcard $(SRC_DIR)/*.cu)
|
SRC_FILES := $(shell find $(SRC_DIR) -type f \( -name '*.cu' -o -name '*.cpp' \))
|
||||||
OBJ_FILES = $(patsubst $(SRC_DIR)/%.cu,$(BUILD_DIR)/%.o,$(SRC_FILES))
|
OBJ_FILES = $(patsubst $(SRC_DIR)/%.cu,$(BUILD_DIR)/%.o,$(SRC_FILES))
|
||||||
|
|
||||||
# Default target
|
# Default target
|
||||||
|
|
@ -16,11 +17,12 @@ all: $(TARGET)
|
||||||
|
|
||||||
# Build the main target
|
# Build the main target
|
||||||
$(TARGET): $(OBJ_FILES) | $(BUILD_DIR)
|
$(TARGET): $(OBJ_FILES) | $(BUILD_DIR)
|
||||||
$(NVCC) $(CXXFLAGS) -o $@ $^
|
$(NVCC) $(CXXFLAGS) $^ -o $@
|
||||||
|
|
||||||
# Compile object files
|
# Compile object files
|
||||||
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu
|
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu
|
||||||
$(NVCC) $(CXXFLAGS) -c $< -o $@
|
@mkdir -p $(dir $@)
|
||||||
|
$(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@
|
||||||
|
|
||||||
# Debug build
|
# Debug build
|
||||||
debug: CXXFLAGS += -g
|
debug: CXXFLAGS += -g
|
||||||
|
|
|
||||||
BIN
output.ppm
BIN
output.ppm
Binary file not shown.
|
|
@ -0,0 +1,18 @@
|
||||||
|
#include "consts.h"
|
||||||
|
|
||||||
|
__device__ Point3 d_cameraPos;
|
||||||
|
__device__ Vec3 d_cameraDir;
|
||||||
|
__device__ Vec3 d_cameraUp;
|
||||||
|
__device__ Point3 d_lightPos;
|
||||||
|
|
||||||
|
Point3 h_cameraPos = Point3::init(-0.7, -1.0, -2.0);
|
||||||
|
Vec3 h_cameraDir = Vec3::init(0.4, 0.6, 1.0).normalize();
|
||||||
|
Vec3 h_cameraUp = Vec3::init(0.0, 1.0, 0.0).normalize();
|
||||||
|
Point3 h_lightPos = Point3::init(1.5, 2.0, -1.0);
|
||||||
|
|
||||||
|
void copyConstantsToDevice() {
|
||||||
|
cudaMemcpyToSymbol(d_cameraPos, &h_cameraPos, sizeof(Point3));
|
||||||
|
cudaMemcpyToSymbol(d_cameraDir, &h_cameraDir, sizeof(Vec3));
|
||||||
|
cudaMemcpyToSymbol(d_cameraUp, &h_cameraUp, sizeof(Vec3));
|
||||||
|
cudaMemcpyToSymbol(d_lightPos, &h_lightPos, sizeof(Point3));
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
#ifndef CONSTS_H
|
||||||
|
#define CONSTS_H
|
||||||
|
|
||||||
|
#include "linalg/vec.h"
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
// --------------------------- Basic Constants ---------------------------
|
||||||
|
const int VOLUME_WIDTH = 49;
|
||||||
|
const int VOLUME_HEIGHT = 51;
|
||||||
|
const int VOLUME_DEPTH = 42;
|
||||||
|
|
||||||
|
const int IMAGE_WIDTH = 2560;
|
||||||
|
const int IMAGE_HEIGHT = 1440;
|
||||||
|
|
||||||
|
const double epsilon = 1e-10f;
|
||||||
|
const double infty = 1e15f; // This vlalue is used to represent missing values in data
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------- Raycasting Constants ---------------------------
|
||||||
|
const int SAMPLES_PER_PIXEL = 8; // TODO: Right now uses simple variance, consider using something more advanced (e.g., some commonly-used noise map)
|
||||||
|
|
||||||
|
const float alphaAcumLimit = 0.65f; // TODO: Idk what a good accumulation value is
|
||||||
|
const float minAllowedDensity = 0.001f;
|
||||||
|
|
||||||
|
const float stepSize = 0.002f;
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------- Illumination Constants ---------------------------
|
||||||
|
const double ambientStrength = 0.3;
|
||||||
|
const double diffuseStrength = 0.8;
|
||||||
|
const double specularStrength = 0.5;
|
||||||
|
const int shininess = 32;
|
||||||
|
const float fov = 60.0f * (M_PI / 180.0f);
|
||||||
|
|
||||||
|
// Camera and Light
|
||||||
|
extern __device__ Point3 d_cameraPos;
|
||||||
|
extern __device__ Vec3 d_cameraDir;
|
||||||
|
extern __device__ Vec3 d_cameraUp;
|
||||||
|
extern __device__ Point3 d_lightPos;
|
||||||
|
|
||||||
|
// --------------------------- Functions for handling external constants ---------------------------
|
||||||
|
void copyConstantsToDevice();
|
||||||
|
|
||||||
|
#endif // CONSTS_H
|
||||||
|
|
@ -0,0 +1,77 @@
|
||||||
|
#include "datareader.h"
|
||||||
|
|
||||||
|
#include <netcdf>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace netCDF;
|
||||||
|
|
||||||
|
DataReader::DataReader(const std::string &path, std::string variableName):
|
||||||
|
filePathManager(path), variableName(variableName) { }
|
||||||
|
|
||||||
|
size_t DataReader::fileLength(size_t fileIndex) {
|
||||||
|
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
||||||
|
|
||||||
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
|
NcVar var = vars.find(variableName)->second;
|
||||||
|
|
||||||
|
size_t length = 1;
|
||||||
|
for (NcDim dim: var.getDims()) {
|
||||||
|
length *= dim.getSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t DataReader::axisLength(size_t fileIndex, const std::string& axisName) {
|
||||||
|
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
||||||
|
|
||||||
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
|
NcVar var = vars.find(axisName)->second;
|
||||||
|
|
||||||
|
assert(var.getDimCount() == 1);
|
||||||
|
|
||||||
|
netCDF::NcDim dim = var.getDim(0);
|
||||||
|
return dim.getSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataReader::loadFile(T* dataOut, size_t fileIndex) {
|
||||||
|
loadFile(dataOut, fileIndex, variableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) {
|
||||||
|
// TODO: We could make the index wrap around so that we dont lose data
|
||||||
|
// using fileIndex % NUMBEROFFILES here.
|
||||||
|
|
||||||
|
NcFile data(filePathManager.getPath(fileIndex), NcFile::read);
|
||||||
|
|
||||||
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
|
NcVar var = vars.find(varName)->second;
|
||||||
|
|
||||||
|
var.getVar(dataOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void DataReader::loadFile<float>(float* dataOut, size_t fileIndex, const string& variableName);
|
||||||
|
template void DataReader::loadFile<int>(int* dataOut, size_t fileIndex, const string& variableName);
|
||||||
|
template void DataReader::loadFile<double>(double* dataOut, size_t fileIndex, const string& variableName);
|
||||||
|
template void DataReader::loadFile<double>(double* dataOut, size_t fileIndex);
|
||||||
|
template void DataReader::loadFile<float>(float* dataOut, size_t fileIndex);
|
||||||
|
template void DataReader::loadFile<int>(int* dataOut, size_t fileIndex);
|
||||||
|
|
||||||
|
DataReader::~DataReader() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// template <typename T>
|
||||||
|
// void DataReader::readAndAllocateAxis(T** axis_ptr, size_t *size, const string &varName) {
|
||||||
|
// assert(var.getDimCount() == 1);
|
||||||
|
// netCDF::NcDim dim = var.getDim(0);
|
||||||
|
// *size = dim.getSize();
|
||||||
|
// cudaError_t status = cudaMallocManaged(axis_ptr, *size*sizeof(T));
|
||||||
|
// var.getVar(*axis_ptr);
|
||||||
|
// }
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
#ifndef DATAREADER_H
|
||||||
|
#define DATAREADER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <experimental/propagate_const>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "filepathmanager.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Simple wrapper around all the NetCDF functionality.
|
||||||
|
*/
|
||||||
|
class DataReader {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Constructor.
|
||||||
|
* @param path Path to the directory containing all the .nc4 files.
|
||||||
|
* @param variableName The variable we are interested in.
|
||||||
|
*/
|
||||||
|
DataReader(const std::string &path, std::string variableName);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The length of the flat data in variableName.
|
||||||
|
* Used for allocating the right amount of memory.
|
||||||
|
*/
|
||||||
|
size_t fileLength(size_t fileIndex);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Write all the data in file fileIndex of variable we re interested in into the dataOut.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
void loadFile(T* dataOut, size_t fileIndex);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Write all the data in file fileIndex of variable variableName into the dataOut.
|
||||||
|
* @param dataOut pointer to memory that should be written to.
|
||||||
|
* @param fileIndex the index of the file we want to load into memory.
|
||||||
|
* @param variableName the name of the variable
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
void loadFile(T* dataOut, size_t fileIndex, const std::string& variableName);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get size of a variable.
|
||||||
|
*/
|
||||||
|
size_t axisLength(size_t fileIndex, const std::string& axisName);
|
||||||
|
|
||||||
|
~DataReader();
|
||||||
|
private:
|
||||||
|
FilePathManager filePathManager;
|
||||||
|
std::string variableName;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //DATAREADER_H
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
#include "fielddata.h"
|
||||||
|
|
||||||
|
__device__ float getVal(
|
||||||
|
const FieldMetadata &md,
|
||||||
|
const FieldData &d,
|
||||||
|
const size_t &timeInd,
|
||||||
|
const size_t &levInd,
|
||||||
|
const size_t &latInd,
|
||||||
|
const size_t &lonInd
|
||||||
|
) {
|
||||||
|
size_t valArrInd = (d.fieldInd + timeInd) / md.numberOfTimeStepsPerFile;
|
||||||
|
size_t trueTimeInd = (d.fieldInd + timeInd) % md.numberOfTimeStepsPerFile;
|
||||||
|
size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize;
|
||||||
|
size_t size2DMapData = md.widthSize*md.heightSize;
|
||||||
|
return d.valArrays[valArrInd][
|
||||||
|
trueTimeInd*sizeSpatialData
|
||||||
|
+ levInd*size2DMapData
|
||||||
|
+ latInd*md.widthSize
|
||||||
|
+ lonInd
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
#ifndef FIELDDATA_H
|
||||||
|
#define FIELDDATA_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct FieldMetadata {
|
||||||
|
size_t widthSize; // Number of different longitudes
|
||||||
|
size_t heightSize; // Number of different latitudes
|
||||||
|
size_t depthSize; // Number of different levels
|
||||||
|
size_t timeSize; // Number of different times
|
||||||
|
|
||||||
|
// lons is a managed Unified Memory array of size widthCount that indicates
|
||||||
|
// that getVal(t, i, j, k) is a value with longitude of lons[i].
|
||||||
|
// The other such arrays are similarly defined.
|
||||||
|
double *lons;
|
||||||
|
double *lats;
|
||||||
|
double *levs;
|
||||||
|
int *times;
|
||||||
|
|
||||||
|
size_t numberOfTimeStepsPerFile;
|
||||||
|
};
|
||||||
|
|
||||||
|
using FieldMetadata = FieldMetadata;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Allows for accessing data of a time-slice of a scalar field
|
||||||
|
* that may start in the middle of a file or go range over multiple files
|
||||||
|
* by holding references to multiple files at a time.
|
||||||
|
*
|
||||||
|
* @note Use the getVal method to index into it and get values.
|
||||||
|
*/
|
||||||
|
struct FieldData {
|
||||||
|
static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct.
|
||||||
|
static constexpr size_t numberOfTimeStepsPerField = 2;
|
||||||
|
|
||||||
|
size_t fieldInd; // Indicates
|
||||||
|
|
||||||
|
// A uniform array of length FILESNUM storing pointers to 4D arrays stored in device memory.
|
||||||
|
float **valArrays;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the scalar field value at a particular index (timeInd, levInd, latInd, lonInd).
|
||||||
|
* Note that the timeInd may be counter-intuitive. See explanation in gpubufferhandler.h.
|
||||||
|
*
|
||||||
|
* @return scalar field float value.
|
||||||
|
*/
|
||||||
|
extern __device__ float getVal(
|
||||||
|
const FieldMetadata &md,
|
||||||
|
const FieldData &d,
|
||||||
|
const size_t &timeInd,
|
||||||
|
const size_t &levInd,
|
||||||
|
const size_t &latInd,
|
||||||
|
const size_t &lonInd
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif //FIELDDATA_H
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "filepathmanager.h"
|
||||||
|
|
||||||
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
|
FilePathManager::FilePathManager(const std::string& path) {
|
||||||
|
// Check if the directory exists
|
||||||
|
if (!fs::exists(path) || !fs::is_directory(path)) {
|
||||||
|
std::cerr << "Error: Directory does not exist or is not valid." << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over directory entries
|
||||||
|
for (const auto& entry : fs::directory_iterator(path)) {
|
||||||
|
if (entry.is_regular_file() && entry.path().extension() == ".nc4") {
|
||||||
|
fileNames.push_back(entry.path().string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort file names lexicographically
|
||||||
|
std::sort(fileNames.begin(), fileNames.end());
|
||||||
|
|
||||||
|
// Print sorted file names
|
||||||
|
std::cout << "Files in directory '" << path << "' sorted lexicographically:\n";
|
||||||
|
for (const auto& fileName : fileNames) {
|
||||||
|
std::cout << fileName << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* FilePathManager::getPath(size_t index) const {
|
||||||
|
return fileNames.at(index).c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FilePathManager::~FilePathManager() { }
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
#ifndef FILEPATHMANAGER_H
|
||||||
|
#define FILEPATHMANAGER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Simple class that is responsible for mapping a file index to a file path.
|
||||||
|
* So for example:
|
||||||
|
* index = 0 -> MERRA2_400.inst6_3d_ana_Np.20120101.nc4
|
||||||
|
* index = 1 -> MERRA2_400.inst6_3d_ana_Np.20120102.nc4
|
||||||
|
* index = 2 -> MERRA2_400.inst6_3d_ana_Np.20120103.nc4
|
||||||
|
* index = 3 -> MERRA2_400.inst6_3d_ana_Np.20120104.nc4
|
||||||
|
* etc...
|
||||||
|
*/
|
||||||
|
class FilePathManager {
|
||||||
|
public:
|
||||||
|
FilePathManager(const std::string& path);
|
||||||
|
size_t getNumberOfFiles();
|
||||||
|
const char* getPath(size_t index) const;
|
||||||
|
|
||||||
|
~FilePathManager();
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::string> fileNames;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //FILEPATHMANAGER_H
|
||||||
|
|
@ -0,0 +1,277 @@
|
||||||
|
#include "gpubuffer.h"
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
#include <thread>
|
||||||
|
#include <queue>
|
||||||
|
#include <cassert>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <iostream>
|
||||||
|
#include <future>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct File {
|
||||||
|
condition_variable cv;
|
||||||
|
mutex m;
|
||||||
|
bool valid;
|
||||||
|
size_t size;
|
||||||
|
float *h_data; // host data
|
||||||
|
float *d_data; // device data
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LoadFileJob {
|
||||||
|
size_t fileIndex;
|
||||||
|
size_t bufferIndex;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GetSizeJob {
|
||||||
|
size_t fileIndex;
|
||||||
|
promise<size_t> size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GetAxisDoubleJob {
|
||||||
|
size_t fileIndex;
|
||||||
|
string axisName;
|
||||||
|
promise<pair<size_t, double *>> axis;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GetAxisIntJob {
|
||||||
|
size_t fileIndex;
|
||||||
|
string axisName;
|
||||||
|
promise<pair<size_t, int *>> axis;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Job = variant<LoadFileJob, GetSizeJob, GetAxisIntJob, GetAxisDoubleJob>;
|
||||||
|
|
||||||
|
class GPUBuffer::impl {
|
||||||
|
public:
|
||||||
|
impl(DataReader& dataReader);
|
||||||
|
void loadFile(LoadFileJob job);
|
||||||
|
void loadFile(size_t fileIndex, size_t bufferIndex);
|
||||||
|
void getSize(GetSizeJob job);
|
||||||
|
size_t getSize(size_t fileIndex); // Most probably blocking
|
||||||
|
void getAxis(GetAxisDoubleJob job);
|
||||||
|
void getAxis(GetAxisIntJob job);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::pair<size_t, T *> getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking
|
||||||
|
~impl();
|
||||||
|
|
||||||
|
File buffer[numBufferedFiles];
|
||||||
|
|
||||||
|
// Thread worker things
|
||||||
|
void worker();
|
||||||
|
queue<Job> jobs;
|
||||||
|
unique_ptr<thread> ioworker;
|
||||||
|
cudaStream_t iostream;
|
||||||
|
DataReader& dataReader;
|
||||||
|
condition_variable queuecv;
|
||||||
|
mutex queuecv_m;
|
||||||
|
bool workerRunning = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
GPUBuffer::GPUBuffer(DataReader& dataReader): pImpl(make_unique<impl>(dataReader)) { }
|
||||||
|
|
||||||
|
size_t GPUBuffer::impl::getSize(size_t fileIndex) {
|
||||||
|
promise<size_t> promise;
|
||||||
|
future<size_t> future = promise.get_future();
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
|
jobs.push(GetSizeJob{fileIndex, move(promise)});
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
|
||||||
|
future.wait();
|
||||||
|
|
||||||
|
return future.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
pair<size_t, T *> GPUBuffer::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
|
return pImpl->getAxis<T>(fileIndex, axisName);
|
||||||
|
}
|
||||||
|
template pair<size_t, int *> GPUBuffer::getAxis<int>(size_t fileIndex, const string& axisName);
|
||||||
|
template pair<size_t, double *> GPUBuffer::getAxis<double>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
|
template <>
|
||||||
|
pair<size_t, double *> GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
|
promise<pair<size_t, double *>> promise;
|
||||||
|
future<pair<size_t, double *>> future = promise.get_future();
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
|
jobs.push(GetAxisDoubleJob{fileIndex, axisName, move(promise)});
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
|
||||||
|
future.wait();
|
||||||
|
|
||||||
|
return future.get();
|
||||||
|
}
|
||||||
|
template pair<size_t, double *> GPUBuffer::impl::getAxis<double>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
|
template <>
|
||||||
|
pair<size_t, int *> GPUBuffer::impl::getAxis(size_t fileIndex, const string& axisName) {
|
||||||
|
promise<pair<size_t, int *>> promise;
|
||||||
|
future<pair<size_t, int *>> future = promise.get_future();
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
|
jobs.push(GetAxisIntJob{fileIndex, axisName, move(promise)});
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
|
||||||
|
future.wait();
|
||||||
|
|
||||||
|
return future.get();
|
||||||
|
}
|
||||||
|
template pair<size_t, int *> GPUBuffer::impl::getAxis<int>(size_t fileIndex, const string& axisName);
|
||||||
|
|
||||||
|
GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
|
||||||
|
cudaStreamCreate(&iostream);
|
||||||
|
|
||||||
|
ioworker = make_unique<thread>([this]() { worker(); });
|
||||||
|
|
||||||
|
size_t size = getSize(0);
|
||||||
|
auto x = getAxis<int>(0, "time");
|
||||||
|
size_t sizeTime = x.first;
|
||||||
|
cudaFree(x.second);
|
||||||
|
for (size_t i = 0; i < numBufferedFiles; i++) {
|
||||||
|
{
|
||||||
|
File &file = buffer[i];
|
||||||
|
lock_guard<mutex> lk(file.m);
|
||||||
|
cudaMallocHost(&file.h_data, sizeof(float)*size);
|
||||||
|
cudaMalloc(&file.d_data, sizeof(float)*size);
|
||||||
|
file.size = size;
|
||||||
|
file.valid = false;
|
||||||
|
}
|
||||||
|
// loadFile(i, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBuffer::~GPUBuffer() {
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBuffer::impl::~impl() {
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(queuecv_m);
|
||||||
|
workerRunning = false;
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
ioworker->join();
|
||||||
|
for (size_t i = 0; i < numBufferedFiles; i++) {
|
||||||
|
File &file = buffer[i];
|
||||||
|
cudaFree(file.d_data);
|
||||||
|
cudaFreeHost(file.h_data);
|
||||||
|
}
|
||||||
|
cudaStreamDestroy(iostream);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::loadFile(LoadFileJob job) {
|
||||||
|
File &file = buffer[job.bufferIndex];
|
||||||
|
{
|
||||||
|
lock_guard<mutex> lk(file.m);
|
||||||
|
assert(!file.valid);
|
||||||
|
cout << "loading file with index " << job.fileIndex << "\n";
|
||||||
|
dataReader.loadFile<float>(file.h_data, job.fileIndex);
|
||||||
|
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
|
||||||
|
cudaStreamSynchronize(iostream);
|
||||||
|
buffer[job.bufferIndex].valid = true;
|
||||||
|
}
|
||||||
|
file.cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::getSize(GetSizeJob job) {
|
||||||
|
size_t size = dataReader.fileLength(job.fileIndex);
|
||||||
|
job.size.set_value(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::getAxis(GetAxisDoubleJob job) {
|
||||||
|
pair<size_t, double *> array;
|
||||||
|
array.first = dataReader.axisLength(job.fileIndex, job.axisName);
|
||||||
|
cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(double));
|
||||||
|
dataReader.loadFile<double>(array.second, job.fileIndex, job.axisName);
|
||||||
|
job.axis.set_value(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::getAxis(GetAxisIntJob job) {
|
||||||
|
pair<size_t, int *> array;
|
||||||
|
array.first = dataReader.axisLength(job.fileIndex, job.axisName);
|
||||||
|
cudaError_t status = cudaMallocManaged(&array.second, array.first*sizeof(int));
|
||||||
|
dataReader.loadFile<int>(array.second, job.fileIndex, job.axisName);
|
||||||
|
job.axis.set_value(array);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::loadFile(size_t fileIndex, size_t bufferIndex) {
|
||||||
|
pImpl->loadFile(fileIndex, bufferIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::loadFile(size_t fileIndex, size_t bufferIndex) {
|
||||||
|
LoadFileJob job = {
|
||||||
|
.fileIndex = fileIndex,
|
||||||
|
.bufferIndex = bufferIndex
|
||||||
|
};
|
||||||
|
|
||||||
|
// Main thread theoretically blocks on 2 mutexes here
|
||||||
|
// but it _should_ never have to wait for them.
|
||||||
|
{
|
||||||
|
File &file = buffer[bufferIndex];
|
||||||
|
|
||||||
|
std::unique_lock<std::mutex> lk(file.m, std::defer_lock);
|
||||||
|
bool lockval = lk.try_lock();
|
||||||
|
if (!lockval) {
|
||||||
|
cout << "waited on GPUBuffer during loadFile orchestration :(\n";
|
||||||
|
lk.lock();
|
||||||
|
}
|
||||||
|
file.valid = false;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lk(queuecv_m, std::defer_lock);
|
||||||
|
bool lockval = lk.try_lock();
|
||||||
|
if (!lockval) {
|
||||||
|
cout << "waited on IOworker queue during loadFile orchestration :(\n";
|
||||||
|
lk.lock();
|
||||||
|
}
|
||||||
|
jobs.push(job);
|
||||||
|
}
|
||||||
|
queuecv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPUBuffer::impl::worker() {
|
||||||
|
while(workerRunning) {
|
||||||
|
Job job;
|
||||||
|
{
|
||||||
|
unique_lock<mutex> lk(queuecv_m);
|
||||||
|
queuecv.wait(lk, [this]{ return !workerRunning || !jobs.empty(); });
|
||||||
|
if (!workerRunning) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
job = move(jobs.front());
|
||||||
|
jobs.pop();
|
||||||
|
}
|
||||||
|
if(holds_alternative<LoadFileJob>(job)) {
|
||||||
|
loadFile(get<LoadFileJob>(job));
|
||||||
|
} else if(holds_alternative<GetSizeJob>(job)) {
|
||||||
|
getSize(move(get<GetSizeJob>(job)));
|
||||||
|
} else if(holds_alternative<GetAxisDoubleJob>(job)) {
|
||||||
|
getAxis(move(get<GetAxisDoubleJob>(job)));
|
||||||
|
} else if(holds_alternative<GetAxisIntJob>(job)) {
|
||||||
|
getAxis(move(get<GetAxisIntJob>(job)));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) {
|
||||||
|
File &file = pImpl->buffer[bufferIndex];
|
||||||
|
|
||||||
|
// TODO: Might be nice to measure the blocking time here.
|
||||||
|
unique_lock<mutex> lk(file.m);
|
||||||
|
file.cv.wait(lk, [this, bufferIndex]{ return pImpl->buffer[bufferIndex].valid; });
|
||||||
|
|
||||||
|
DataHandle dh = {
|
||||||
|
.d_data = file.d_data,
|
||||||
|
.size = file.size
|
||||||
|
};
|
||||||
|
return dh;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
#ifndef GPUBUFFER_H
|
||||||
|
#define GPUBUFFER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <experimental/propagate_const>
|
||||||
|
|
||||||
|
#include "datareader.h"
|
||||||
|
|
||||||
|
struct DataHandle {
|
||||||
|
float *d_data; // Device memory
|
||||||
|
size_t size;
|
||||||
|
// Could include the data stored in host memory h_data in this handle if it were needed.
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Handles the asynchronous (un)loading data to the GPU. The rest of the
|
||||||
|
* application should not have to interface directly with this class. Getting data
|
||||||
|
* should go over the GPUBufferHandler class.
|
||||||
|
*
|
||||||
|
* @note This class uses a queue to give loading jobs to the worker thread.
|
||||||
|
* NetCDF-C is not thread safe so you may never read data using netCDF directly
|
||||||
|
* on any other thread than this worker thread.
|
||||||
|
*
|
||||||
|
* Assumes all the data in the nc4 files is the same size.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class GPUBuffer {
|
||||||
|
public:
|
||||||
|
static constexpr size_t numBufferedFiles = 3; // Number of files stored in memory at one time.
|
||||||
|
|
||||||
|
GPUBuffer(DataReader& dataReader);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Asynchronously tells the GPUBuffer to eventually load a particular file index
|
||||||
|
* into a buffer index (in which part of the buffer the file should be stored).
|
||||||
|
*/
|
||||||
|
void loadFile(size_t fileIndex, size_t bufferIndex); // No blocking
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the values stored in a particular buffer index
|
||||||
|
* @return A struct DataHandle that points to the memory and gives its size.
|
||||||
|
*/
|
||||||
|
DataHandle getDataHandle(size_t bufferIndex); // Potentially blocking
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the data from an axis (e.g. longitude) and its size.
|
||||||
|
* @note This is a blocking operation, so it makes a job for the worker
|
||||||
|
* to read the data and then waits untill the job is completed.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
std::pair<size_t, T *> getAxis(size_t fileIndex, const std::string& axisName); // Most probably blocking
|
||||||
|
|
||||||
|
~GPUBuffer();
|
||||||
|
private:
|
||||||
|
class impl;
|
||||||
|
std::experimental::propagate_const<std::unique_ptr<impl>> pImpl;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //GPUBUFFER_H
|
||||||
|
|
@ -0,0 +1,93 @@
|
||||||
|
#include "gpubufferhandler.h"
|
||||||
|
#include "fielddata.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
GPUBufferHandler::GPUBufferHandler(GPUBuffer& gpuBuffer):
|
||||||
|
gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) {
|
||||||
|
cudaMallocManaged(&fmd, sizeof(FieldMetadata));
|
||||||
|
|
||||||
|
auto [widthSize, lons] = gpuBuffer.getAxis<double>(0, "lon");
|
||||||
|
fmd->widthSize = widthSize;
|
||||||
|
fmd->lons = lons;
|
||||||
|
|
||||||
|
auto [heightSize, lats] = gpuBuffer.getAxis<double>(0, "lat");
|
||||||
|
fmd->heightSize = heightSize;
|
||||||
|
fmd->lats = lats;
|
||||||
|
|
||||||
|
auto [depthSize, levs] = gpuBuffer.getAxis<double>(0, "lev");
|
||||||
|
fmd->depthSize = depthSize;
|
||||||
|
fmd->levs = levs;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
|
||||||
|
gpuBuffer.loadFile(fileInd,fileInd);
|
||||||
|
fileInd++;
|
||||||
|
}
|
||||||
|
|
||||||
|
fmd->timeSize = FieldData::numberOfTimeStepsPerField;
|
||||||
|
|
||||||
|
cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int)));
|
||||||
|
|
||||||
|
auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis<int>(0, "time");
|
||||||
|
fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile;
|
||||||
|
fmd->times = times;
|
||||||
|
|
||||||
|
cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM);
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
|
||||||
|
|
||||||
|
FieldData fd;
|
||||||
|
size_t fieldDataInd = 0;
|
||||||
|
fd.valArrays = valArrays;
|
||||||
|
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n";
|
||||||
|
for (int i = bufferInd; i <= newEndBufferInd; i++) {
|
||||||
|
DataHandle x = gpuBuffer.getDataHandle(i);
|
||||||
|
fd.valArrays[fieldDataInd] = x.d_data;
|
||||||
|
fieldDataInd++;
|
||||||
|
}
|
||||||
|
fd.fieldInd = fieldInd;
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldData GPUBufferHandler::nextFieldData() {
|
||||||
|
DataHandle x = gpuBuffer.getDataHandle(bufferInd);
|
||||||
|
size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
|
size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
|
size_t endFieldInd = (fieldInd + FieldData::numberOfTimeStepsPerField - 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
|
size_t endBufferInd = (bufferInd + (fieldInd + FieldData::numberOfTimeStepsPerField - 1)/fmd->numberOfTimeStepsPerFile) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
|
size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
|
||||||
|
size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
|
||||||
|
|
||||||
|
if(firstTimeStep) {
|
||||||
|
firstTimeStep = false;
|
||||||
|
return setupField(endBufferInd);
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldInd = newFieldInd;
|
||||||
|
|
||||||
|
if (newBufferInd != bufferInd) {
|
||||||
|
gpuBuffer.loadFile(fileInd, bufferInd);
|
||||||
|
fileInd++;
|
||||||
|
bufferInd = newBufferInd;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newEndBufferInd != endBufferInd) {
|
||||||
|
// maybe dont do things?
|
||||||
|
}
|
||||||
|
|
||||||
|
return setupField(newEndBufferInd);
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBufferHandler::~GPUBufferHandler() {
|
||||||
|
cudaFree(fmd->lons);
|
||||||
|
cudaFree(fmd->lats);
|
||||||
|
cudaFree(fmd->levs);
|
||||||
|
cudaFree(valArrays);
|
||||||
|
cudaFree(fmd);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
#ifndef GPUBUFFERHANDLER_H
|
||||||
|
#define GPUBUFFERHANDLER_H
|
||||||
|
|
||||||
|
#include "fielddata.h"
|
||||||
|
#include "gpubuffer.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Responsible for deciding when the GPUBuffer should load and unload files.
|
||||||
|
* Also assembles and gives access to FieldData.
|
||||||
|
*
|
||||||
|
* You will need to interface with this class.
|
||||||
|
*/
|
||||||
|
class GPUBufferHandler {
|
||||||
|
public:
|
||||||
|
GPUBufferHandler(GPUBuffer& gpuBuffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Produces a FieldData which can be used to retrieve values for a time-slice
|
||||||
|
* into a scalar field with a width of FieldData::numberOfTimeStepsPerField.
|
||||||
|
*
|
||||||
|
* @details This method always increments the start point of the time-slice
|
||||||
|
* by 1. See below:
|
||||||
|
*
|
||||||
|
* time steps = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ...
|
||||||
|
* files (4 time steps per file) = [0 1 2 3][4 5 6 7][8 9 10 11][12 13 ...
|
||||||
|
* nextFieldData() (1st call) = [0 1]
|
||||||
|
* nextFieldData() (2nd call) = [1 2]
|
||||||
|
* nextFieldData() (3rd call) = [2 3]
|
||||||
|
* nextFieldData() (4th call) = [3 4]
|
||||||
|
* nextFieldData() (5th call) = [4 5]
|
||||||
|
* etc...
|
||||||
|
*
|
||||||
|
* When getting values from a FieldData using the getVal method,
|
||||||
|
* the time index is relative to the start of the time-slice.
|
||||||
|
*
|
||||||
|
* This means that for d = nextFieldData() (4th call),
|
||||||
|
* getVal(.., fieldData=d, ..., timeInd = 1, ...) gives a value at
|
||||||
|
* absolute time step 5 as seen above.
|
||||||
|
*/
|
||||||
|
FieldData nextFieldData();
|
||||||
|
|
||||||
|
~GPUBufferHandler();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* You can get the FieldMetaData from here.
|
||||||
|
*/
|
||||||
|
FieldMetadata *fmd;
|
||||||
|
|
||||||
|
static void freeFieldData();
|
||||||
|
|
||||||
|
private:
|
||||||
|
FieldData setupField(size_t endBufferInd);
|
||||||
|
GPUBuffer& gpuBuffer;
|
||||||
|
size_t fileInd;
|
||||||
|
size_t bufferInd;
|
||||||
|
size_t fieldInd;
|
||||||
|
bool firstTimeStep = true;
|
||||||
|
|
||||||
|
float **valArrays;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //GPUBUFFERHANDLER_H
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
#ifndef ILLUMINATION_H
|
||||||
|
#define ILLUMINATION_H
|
||||||
|
|
||||||
|
#include "raycaster.h"
|
||||||
|
#include "shading.h"
|
||||||
|
|
||||||
|
#endif // ILLUMINATION_H
|
||||||
|
|
@ -0,0 +1,126 @@
|
||||||
|
#ifndef RAYCASTER_H
|
||||||
|
#define RAYCASTER_H
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "linalg/linalg.h"
|
||||||
|
#include "consts.h"
|
||||||
|
#include "shading.h"
|
||||||
|
|
||||||
|
|
||||||
|
// Raycast + phong, TODO: Consider wrapping in a class
|
||||||
|
__global__ void raycastKernel(float* volumeData, unsigned char* framebuffer) {
|
||||||
|
int px = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
int py = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
if (px >= IMAGE_WIDTH || py >= IMAGE_HEIGHT) return;
|
||||||
|
|
||||||
|
float accumR = 0.0f;
|
||||||
|
float accumG = 0.0f;
|
||||||
|
float accumB = 0.0f;
|
||||||
|
|
||||||
|
// Multiple samples per pixel
|
||||||
|
for (int s = 0; s < SAMPLES_PER_PIXEL; s++) {
|
||||||
|
// Map to [-1, 1]
|
||||||
|
float u = ((px + 0.5f) / IMAGE_WIDTH ) * 2.0f - 1.0f;
|
||||||
|
float v = ((py + 0.5f) / IMAGE_HEIGHT) * 2.0f - 1.0f;
|
||||||
|
|
||||||
|
// TODO: Move this (and all similar transformation code) to its own separate file
|
||||||
|
float tanHalfFov = tanf(fov * 0.5f);
|
||||||
|
u *= tanHalfFov;
|
||||||
|
v *= tanHalfFov;
|
||||||
|
|
||||||
|
// Find ray direction
|
||||||
|
Vec3 cameraRight = (d_cameraDir.cross(d_cameraUp)).normalize();
|
||||||
|
d_cameraUp = (cameraRight.cross(d_cameraDir)).normalize();
|
||||||
|
Vec3 rayDir = (d_cameraDir + cameraRight*u + d_cameraUp*v).normalize();
|
||||||
|
|
||||||
|
// Intersect (for simplicity just a 3D box from 0 to 1 in all dimensions) - TODO: Think about whether this is the best way to do this
|
||||||
|
float tNear = 0.0f;
|
||||||
|
float tFar = 1e6f;
|
||||||
|
auto intersectAxis = [&](float start, float dirVal) {
|
||||||
|
if (fabsf(dirVal) < epsilon) {
|
||||||
|
if (start < 0.f || start > 1.f) {
|
||||||
|
tNear = 1e9f;
|
||||||
|
tFar = -1e9f;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
float t0 = (0.0f - start) / dirVal;
|
||||||
|
float t1 = (1.0f - start) / dirVal;
|
||||||
|
if (t0>t1) {
|
||||||
|
float tmp=t0;
|
||||||
|
t0=t1;
|
||||||
|
t1=tmp;
|
||||||
|
}
|
||||||
|
if (t0>tNear) tNear = t0;
|
||||||
|
if (t1<tFar ) tFar = t1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
intersectAxis(d_cameraPos.x, rayDir.x);
|
||||||
|
intersectAxis(d_cameraPos.y, rayDir.y);
|
||||||
|
intersectAxis(d_cameraPos.z, rayDir.z);
|
||||||
|
|
||||||
|
if (tNear > tFar) continue; // No intersectionn
|
||||||
|
if (tNear < 0.0f) tNear = 0.0f;
|
||||||
|
|
||||||
|
float colorR = 0.0f, colorG = 0.0f, colorB = 0.0f;
|
||||||
|
float alphaAccum = 0.0f;
|
||||||
|
|
||||||
|
float tCurrent = tNear;
|
||||||
|
while (tCurrent < tFar && alphaAccum < alphaAcumLimit) {
|
||||||
|
Point3 pos = d_cameraPos + rayDir * tCurrent;
|
||||||
|
|
||||||
|
// Convert to volume indices
|
||||||
|
float fx = pos.x * (VOLUME_WIDTH - 1);
|
||||||
|
float fy = pos.y * (VOLUME_HEIGHT - 1);
|
||||||
|
float fz = pos.z * (VOLUME_DEPTH - 1);
|
||||||
|
int ix = (int)roundf(fx);
|
||||||
|
int iy = (int)roundf(fy);
|
||||||
|
int iz = (int)roundf(fz);
|
||||||
|
|
||||||
|
// Sample
|
||||||
|
float density = sampleVolumeNearest(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz);
|
||||||
|
|
||||||
|
// Basic transfer function. TODO: Move to a separate file, and then improve
|
||||||
|
float alphaSample = density * 0.1f;
|
||||||
|
// float alphaSample = 1.0f - expf(-density * 0.1f);
|
||||||
|
Color3 baseColor = Color3::init(density, 0.1f*density, 1.f - density); // TODO: Implement a proper transfer function
|
||||||
|
|
||||||
|
// If density ~ 0, skip shading
|
||||||
|
if (density > minAllowedDensity) {
|
||||||
|
Vec3 grad = computeGradient(volumeData, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH, ix, iy, iz);
|
||||||
|
Vec3 normal = -grad.normalize();
|
||||||
|
|
||||||
|
Vec3 lightDir = (d_lightPos - pos).normalize();
|
||||||
|
Vec3 viewDir = -rayDir.normalize();
|
||||||
|
|
||||||
|
// Apply Phong
|
||||||
|
Vec3 shadedColor = phongShading(normal, lightDir, viewDir, baseColor);
|
||||||
|
|
||||||
|
// Compose
|
||||||
|
colorR += (1.0f - alphaAccum) * shadedColor.x * alphaSample;
|
||||||
|
colorG += (1.0f - alphaAccum) * shadedColor.y * alphaSample;
|
||||||
|
colorB += (1.0f - alphaAccum) * shadedColor.z * alphaSample;
|
||||||
|
alphaAccum += (1.0f - alphaAccum) * alphaSample;
|
||||||
|
}
|
||||||
|
|
||||||
|
tCurrent += stepSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
accumR += colorR;
|
||||||
|
accumG += colorG;
|
||||||
|
accumB += colorB;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Average samples
|
||||||
|
accumR /= (float)SAMPLES_PER_PIXEL;
|
||||||
|
accumG /= (float)SAMPLES_PER_PIXEL;
|
||||||
|
accumB /= (float)SAMPLES_PER_PIXEL;
|
||||||
|
|
||||||
|
// Final colour
|
||||||
|
int fbIndex = (py * IMAGE_WIDTH + px) * 3;
|
||||||
|
framebuffer[fbIndex + 0] = (unsigned char)(fminf(accumR, 1.f) * 255);
|
||||||
|
framebuffer[fbIndex + 1] = (unsigned char)(fminf(accumG, 1.f) * 255);
|
||||||
|
framebuffer[fbIndex + 2] = (unsigned char)(fminf(accumB, 1.f) * 255);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // RAYCASTER_H
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
#ifndef SHADING_H
|
||||||
|
#define SHADING_H
|
||||||
|
|
||||||
|
#include "linalg/linalg.h"
|
||||||
|
#include "consts.h"
|
||||||
|
|
||||||
|
// TODO: Consider wrapping this in a class (?)
|
||||||
|
__device__ Vec3 phongShading(const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& baseColor) {
|
||||||
|
Vec3 ambient = baseColor * ambientStrength;
|
||||||
|
double diff = fmax(normal.dot(lightDir), 0.0);
|
||||||
|
Vec3 diffuse = baseColor * (diffuseStrength * diff);
|
||||||
|
|
||||||
|
Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
|
||||||
|
double spec = pow(fmax(viewDir.dot(reflectDir), 0.0), shininess);
|
||||||
|
Vec3 specular = Vec3::init(1.0, 1.0, 1.0) * (specularStrength * spec);
|
||||||
|
|
||||||
|
return ambient + diffuse + specular;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // SHADING_H
|
||||||
|
|
@ -1 +1,24 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
__device__ Vec3 computeGradient(float* volumeData, const int volW, const int volH, const int volD, int x, int y, int z) {
|
||||||
|
// Finite difference for partial derivatives.
|
||||||
|
// For boundary voxels - clamp to the boundary.
|
||||||
|
// Normal should point from higher to lower intensities
|
||||||
|
|
||||||
|
int xm = max(x - 1, 0);
|
||||||
|
int xp = min(x + 1, volW - 1);
|
||||||
|
int ym = max(y - 1, 0);
|
||||||
|
int yp = min(y + 1, volH - 1);
|
||||||
|
int zm = max(z - 1, 0);
|
||||||
|
int zp = min(z + 1, volD - 1);
|
||||||
|
|
||||||
|
// Note: Assuming data is linearized (idx = z*w*h + y*w + x) TODO: Unlinearize if data not linear
|
||||||
|
float gx = volumeData[z * volW * volH + y * volW + xp]
|
||||||
|
- volumeData[z * volW * volH + y * volW + xm];
|
||||||
|
float gy = volumeData[z * volW * volH + yp * volW + x ]
|
||||||
|
- volumeData[z * volW * volH + ym * volW + x ];
|
||||||
|
float gz = volumeData[zp * volW * volH + y * volW + x ]
|
||||||
|
- volumeData[zm * volW * volH + y * volW + x ];
|
||||||
|
|
||||||
|
return Vec3::init(gx, gy, gz);
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,16 +3,28 @@
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
struct Vec3 { // TODO: Maybe make this into a class
|
struct Vec3 { // TODO: Maybe make this into a class ... maybe
|
||||||
double x, y, z;
|
double x, y, z;
|
||||||
|
|
||||||
__host__ __device__ Vec3() : x(0), y(0), z(0) {}
|
static __host__ __device__ Vec3 init(double x, double y, double z) {Vec3 v = {x, y, z}; return v;}
|
||||||
__host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
|
static __host__ __device__ Vec3 zero() { return Vec3::init(0, 0, 0); }
|
||||||
|
static __host__ __device__ Vec3 init() { return zero(); }
|
||||||
|
static __host__ __device__ Vec3 init(const double (&arr)[3]) { return Vec3::init(arr[0], arr[1], arr[2]); }
|
||||||
|
|
||||||
|
__host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3::init(x + b.x, y + b.y, z + b.z); }
|
||||||
|
__host__ __device__ Vec3& operator+=(const Vec3& b) { x += b.x; y += b.y; z += b.z; return *this; }
|
||||||
|
|
||||||
|
__host__ __device__ Vec3 operator-() const { return Vec3::init(-x, -y, -z); }
|
||||||
|
__host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3::init(x - b.x, y - b.y, z - b.z); }
|
||||||
|
__host__ __device__ Vec3& operator-=(const Vec3& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
|
||||||
|
|
||||||
|
__host__ __device__ Vec3 operator*(double b) const { return Vec3::init(x * b, y * b, z * b); }
|
||||||
|
__host__ __device__ Vec3& operator*=(double b) { x *= b; y *= b; z *= b; return *this; }
|
||||||
|
|
||||||
__host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); }
|
|
||||||
__host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); }
|
|
||||||
__host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); }
|
|
||||||
__host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); }
|
|
||||||
__host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
|
__host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
|
||||||
__host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); }
|
__host__ __device__ Vec3 cross(const Vec3& b) const { return Vec3::init(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); }
|
||||||
|
__host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3::init(x / len, y / len, z / len); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef Vec3 Point3;
|
||||||
|
typedef Vec3 Color3;
|
||||||
150
src/main.cu
150
src/main.cu
|
|
@ -1,102 +1,112 @@
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <device_launch_parameters.h>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "hurricanedata/datareader.h"
|
||||||
#include "linalg/linalg.h"
|
#include "linalg/linalg.h"
|
||||||
#include "objs/sphere.h"
|
#include "objs/sphere.h"
|
||||||
#include "img/handler.h"
|
#include "img/handler.h"
|
||||||
|
#include "consts.h"
|
||||||
#define WIDTH 3840
|
#include "illumination/illumination.h"
|
||||||
#define HEIGHT 2160
|
|
||||||
#define SAMPLES_PER_PIXEL 8
|
|
||||||
|
|
||||||
|
|
||||||
__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) {
|
static float* d_volume = nullptr;
|
||||||
double ambientStrength = 0.1;
|
|
||||||
double diffuseStrength = 0.8;
|
|
||||||
double specularStrength = 0.5;
|
|
||||||
int shininess = 64;
|
|
||||||
|
|
||||||
Vec3 ambient = color * ambientStrength;
|
|
||||||
double diff = max(normal.dot(lightDir), 0.0);
|
|
||||||
Vec3 diffuse = color * (diffuseStrength * diff);
|
|
||||||
|
|
||||||
Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
|
void getTemperature(std::vector<float>& temperatureData, int idx = 0) {
|
||||||
double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess);
|
std::string path = "data/trimmed";
|
||||||
Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
|
std::string variable = "T";
|
||||||
|
DataReader dataReader(path, variable);
|
||||||
return ambient + diffuse + specular;
|
size_t dataLength = dataReader.fileLength(idx);
|
||||||
|
temperatureData.resize(dataLength);
|
||||||
|
dataReader.loadFile(temperatureData.data(), idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) {
|
void getSpeed(std::vector<float>& speedData, int idx = 0) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
std::string path = "data/trimmed";
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
std::string varU = "U";
|
||||||
if (x >= WIDTH || y >= HEIGHT) return;
|
std::string varV = "V";
|
||||||
|
|
||||||
int pixelIndex = (y * WIDTH + x) * 3;
|
DataReader dataReaderU(path, varU);
|
||||||
Vec3 rayOrigin(0, 0, 0);
|
DataReader dataReaderV(path, varV);
|
||||||
Vec3 colCum(0, 0, 0);
|
|
||||||
|
|
||||||
double spp = static_cast<double>(SAMPLES_PER_PIXEL);
|
size_t dataLength = dataReaderU.fileLength(idx);
|
||||||
for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) {
|
speedData.resize(dataLength);
|
||||||
double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH;
|
std::vector<float> uData(dataLength);
|
||||||
double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT;
|
std::vector<float> vData(dataLength);
|
||||||
Vec3 rayDir(u, v, 1.0);
|
|
||||||
rayDir = rayDir.normalize();
|
|
||||||
|
|
||||||
for (int i = 0; i < numSpheres; ++i) {
|
dataReaderU.loadFile(uData.data(), idx);
|
||||||
double t;
|
dataReaderV.loadFile(vData.data(), idx);
|
||||||
if (spheres[i].intersect(rayOrigin, rayDir, t)) {
|
|
||||||
Vec3 hitPoint = rayOrigin + rayDir * t;
|
|
||||||
Vec3 normal = (hitPoint - spheres[i].center).normalize();
|
|
||||||
Vec3 lightDir = (lightPos - hitPoint).normalize();
|
|
||||||
Vec3 viewDir = -rayDir;
|
|
||||||
|
|
||||||
colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color);
|
for (int i = 0; i < dataLength; i++) {
|
||||||
|
speedData[i] = sqrt(uData[i]*uData[i] + vData[i]*vData[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
std::vector<float> data;
|
||||||
|
// getTemperature(data);
|
||||||
|
getSpeed(data);
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Eveontually remove debug below (i.e., eliminate for-loop etc.)
|
||||||
|
// Generate debug volume data
|
||||||
|
float* hostVolume = new float[VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH];
|
||||||
|
// generateVolume(hostVolume, VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH);
|
||||||
|
for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) { // TODO: This is technically an unnecessary artifact of the old code taking in a float* instead of a std::vector
|
||||||
|
// Discard temperatures above a small star (supposedly, missing temperature values)
|
||||||
|
hostVolume[i] = data[i];
|
||||||
|
if (data[i] + epsilon >= infty) hostVolume[i] = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Average color across all samples
|
// Min-max normalization
|
||||||
Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL);
|
float minVal = *std::min_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
|
||||||
|
float maxVal = *std::max_element(hostVolume, hostVolume + VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH);
|
||||||
framebuffer[pixelIndex] = static_cast<unsigned char>(fmin(color.x, 1.0) * 255);
|
for (int i = 0; i < VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH; i++) {
|
||||||
framebuffer[pixelIndex + 1] = static_cast<unsigned char>(fmin(color.y, 1.0) * 255);
|
hostVolume[i] = (hostVolume[i] - minVal) / (maxVal - minVal);
|
||||||
framebuffer[pixelIndex + 2] = static_cast<unsigned char>(fmin(color.z, 1.0) * 255);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allocate + copy data to GPU
|
||||||
|
size_t volumeSize = sizeof(float) * VOLUME_WIDTH * VOLUME_HEIGHT * VOLUME_DEPTH;
|
||||||
|
cudaMalloc((void**)&d_volume, volumeSize);
|
||||||
|
cudaMemcpy(d_volume, hostVolume, volumeSize, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
// Allocate framebuffer
|
||||||
int main() {
|
|
||||||
Sphere spheres[] = {
|
|
||||||
{ Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere
|
|
||||||
{ Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere
|
|
||||||
{ Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere
|
|
||||||
};
|
|
||||||
int numSpheres = sizeof(spheres) / sizeof(Sphere);
|
|
||||||
Vec3 lightPos(5, 5, 0);
|
|
||||||
|
|
||||||
unsigned char* d_framebuffer;
|
unsigned char* d_framebuffer;
|
||||||
unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3];
|
size_t fbSize = IMAGE_WIDTH * IMAGE_HEIGHT * 3 * sizeof(unsigned char);
|
||||||
Sphere* d_spheres;
|
cudaMalloc((void**)&d_framebuffer, fbSize);
|
||||||
cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3);
|
cudaMemset(d_framebuffer, 0, fbSize);
|
||||||
cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere));
|
|
||||||
cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
dim3 threadsPerBlock(16, 16);
|
// Copy external constants from consts.h to cuda
|
||||||
dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x,
|
copyConstantsToDevice();
|
||||||
(HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y);
|
|
||||||
renderKernel<<<numBlocks, threadsPerBlock>>>(d_framebuffer, d_spheres, numSpheres, lightPos);
|
// Launch kernel
|
||||||
|
dim3 blockSize(16, 16); // TODO: Figure out a good size for parallelization
|
||||||
|
dim3 gridSize((IMAGE_WIDTH + blockSize.x - 1)/blockSize.x,
|
||||||
|
(IMAGE_HEIGHT + blockSize.y - 1)/blockSize.y);
|
||||||
|
|
||||||
|
raycastKernel<<<gridSize, blockSize>>>(
|
||||||
|
d_volume,
|
||||||
|
d_framebuffer
|
||||||
|
);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost);
|
// Copy framebuffer back to CPU
|
||||||
saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT);
|
unsigned char* hostFramebuffer = new unsigned char[IMAGE_WIDTH * IMAGE_HEIGHT * 3];
|
||||||
|
cudaMemcpy(hostFramebuffer, d_framebuffer, fbSize, cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
// Export image
|
||||||
|
saveImage("output.ppm", hostFramebuffer, IMAGE_WIDTH, IMAGE_HEIGHT);
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
delete[] hostVolume;
|
||||||
|
delete[] hostFramebuffer;
|
||||||
|
cudaFree(d_volume);
|
||||||
cudaFree(d_framebuffer);
|
cudaFree(d_framebuffer);
|
||||||
cudaFree(d_spheres);
|
|
||||||
delete[] h_framebuffer;
|
|
||||||
|
|
||||||
std::cout << "High-resolution image saved as output.ppm" << std::endl;
|
std::cout << "Phong-DVR rendering done. Image saved to output.ppm" << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
#include "linalg/linalg.h"
|
#include "linalg/linalg.h"
|
||||||
|
|
||||||
|
// TODO: This is technically just for debugging, but if it is to be used outside of that, it should be a made into a proper class (I mean, just look at those functions below, it screams "add class attributes")
|
||||||
struct Sphere {
|
struct Sphere {
|
||||||
Vec3 center;
|
Vec3 center;
|
||||||
double radius;
|
double radius;
|
||||||
|
|
@ -21,3 +22,73 @@ struct Sphere {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// A function to generate two concentric spherical shells
|
||||||
|
__host__ void generateVolume(float* volumeData, int volW, int volH, int volD) {
|
||||||
|
int cx = volW / 2;
|
||||||
|
int cy = volH / 2;
|
||||||
|
int cz = volD / 2;
|
||||||
|
float maxRadius = static_cast<float>(volW) * 0.5f;
|
||||||
|
|
||||||
|
// Two shells
|
||||||
|
float shell1Inner = 0.2f * maxRadius;
|
||||||
|
float shell1Outer = 0.3f * maxRadius;
|
||||||
|
float shell2Inner = 0.4f * maxRadius;
|
||||||
|
float shell2Outer = 0.5f * maxRadius;
|
||||||
|
|
||||||
|
float shell1Intensity = 0.8f;
|
||||||
|
float shell2Intensity = 0.6f;
|
||||||
|
|
||||||
|
for (int z = 0; z < volD; ++z) {
|
||||||
|
for (int y = 0; y < volH; ++y) {
|
||||||
|
for (int x = 0; x < volW; ++x) {
|
||||||
|
float dx = (float)(x - cx);
|
||||||
|
float dy = (float)(y - cy);
|
||||||
|
float dz = (float)(z - cz);
|
||||||
|
float dist = sqrtf(dx*dx + dy*dy + dz*dz);
|
||||||
|
|
||||||
|
float intensity = 0.0f;
|
||||||
|
|
||||||
|
// Shell 1
|
||||||
|
if (dist >= shell1Inner && dist <= shell1Outer) {
|
||||||
|
float mid = 0.5f * (shell1Inner + shell1Outer);
|
||||||
|
if (dist < mid) {
|
||||||
|
float inFactor = (dist - shell1Inner) / (mid - shell1Inner);
|
||||||
|
intensity += shell1Intensity * inFactor;
|
||||||
|
} else {
|
||||||
|
float outFactor = (shell1Outer - dist) / (shell1Outer - mid);
|
||||||
|
intensity += shell1Intensity * outFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shell 2
|
||||||
|
if (dist >= shell2Inner && dist <= shell2Outer) {
|
||||||
|
float mid = 0.5f * (shell2Inner + shell2Outer);
|
||||||
|
if (dist < mid) {
|
||||||
|
float inFactor = (dist - shell2Inner) / (mid - shell2Inner);
|
||||||
|
intensity += shell2Intensity * inFactor;
|
||||||
|
} else {
|
||||||
|
float outFactor = (shell2Outer - dist) / (shell2Outer - mid);
|
||||||
|
intensity += shell2Intensity * outFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intensity > 1.0f) intensity = 1.0f;
|
||||||
|
volumeData[z * volW * volH + y * volW + x] = intensity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Samples the voxel nearest to the given coordinates. TODO: Can be re-used in other places so move
|
||||||
|
__device__ float sampleVolumeNearest(float* volumeData, const int volW, const int volH, const int volD, int vx, int vy, int vz) {
|
||||||
|
if (vx < 0) vx = 0;
|
||||||
|
if (vy < 0) vy = 0;
|
||||||
|
if (vz < 0) vz = 0;
|
||||||
|
if (vx >= volW) vx = volW - 1;
|
||||||
|
if (vy >= volH) vy = volH - 1;
|
||||||
|
if (vz >= volD) vz = volD - 1;
|
||||||
|
|
||||||
|
int idx = vz * volW * volH + vy * volD + vx;
|
||||||
|
return volumeData[idx];
|
||||||
|
}
|
||||||
BIN
src/tests/a.out
BIN
src/tests/a.out
Binary file not shown.
|
|
@ -1,128 +0,0 @@
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <device_launch_parameters.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
|
|
||||||
#define WIDTH 800
|
|
||||||
#define HEIGHT 600
|
|
||||||
|
|
||||||
struct Vec3 {
|
|
||||||
double x, y, z;
|
|
||||||
|
|
||||||
__host__ __device__ Vec3() : x(0), y(0), z(0) {}
|
|
||||||
__host__ __device__ Vec3(double x, double y, double z) : x(x), y(y), z(z) {}
|
|
||||||
|
|
||||||
__host__ __device__ Vec3 operator+(const Vec3& b) const { return Vec3(x + b.x, y + b.y, z + b.z); }
|
|
||||||
__host__ __device__ Vec3 operator-(const Vec3& b) const { return Vec3(x - b.x, y - b.y, z - b.z); }
|
|
||||||
__host__ __device__ Vec3 operator*(double b) const { return Vec3(x * b, y * b, z * b); }
|
|
||||||
__host__ __device__ Vec3 operator-() const { return Vec3(-x, -y, -z); }
|
|
||||||
__host__ __device__ double dot(const Vec3& b) const { return x * b.x + y * b.y + z * b.z; }
|
|
||||||
__host__ __device__ Vec3 normalize() const { double len = sqrt(x * x + y * y + z * z); return Vec3(x / len, y / len, z / len); }
|
|
||||||
};
|
|
||||||
|
|
||||||
// Simple Phong lighting components
|
|
||||||
struct Sphere {
|
|
||||||
Vec3 center;
|
|
||||||
double radius;
|
|
||||||
Vec3 color;
|
|
||||||
|
|
||||||
__device__ bool intersect(const Vec3& rayOrigin, const Vec3& rayDir, double& t) const {
|
|
||||||
Vec3 oc = rayOrigin - center;
|
|
||||||
double b = oc.dot(rayDir);
|
|
||||||
double c = oc.dot(oc) - radius * radius;
|
|
||||||
double h = b * b - c;
|
|
||||||
if (h < 0.0) return false;
|
|
||||||
h = sqrt(h);
|
|
||||||
t = -b - h;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) {
|
|
||||||
double ambientStrength = 0.1;
|
|
||||||
double diffuseStrength = 0.8;
|
|
||||||
double specularStrength = 0.5;
|
|
||||||
int shininess = 32;
|
|
||||||
|
|
||||||
// Ambient
|
|
||||||
Vec3 ambient = color * ambientStrength;
|
|
||||||
|
|
||||||
// Diffuse
|
|
||||||
double diff = max(normal.dot(lightDir), 0.0);
|
|
||||||
Vec3 diffuse = color * (diffuseStrength * diff);
|
|
||||||
|
|
||||||
// Specular
|
|
||||||
Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
|
|
||||||
double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess);
|
|
||||||
Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
|
|
||||||
|
|
||||||
return ambient + diffuse + specular;
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void renderKernel(unsigned char* framebuffer, Sphere sphere, Vec3 lightPos) {
|
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
if (x >= WIDTH || y >= HEIGHT) return;
|
|
||||||
|
|
||||||
int pixelIndex = (y * WIDTH + x) * 3;
|
|
||||||
|
|
||||||
Vec3 rayOrigin(0, 0, 0);
|
|
||||||
Vec3 rayDir((x - WIDTH / 2.0) / WIDTH, (y - HEIGHT / 2.0) / HEIGHT, 1.0);
|
|
||||||
rayDir = rayDir.normalize();
|
|
||||||
|
|
||||||
double t;
|
|
||||||
if (sphere.intersect(rayOrigin, rayDir, t)) {
|
|
||||||
Vec3 hitPoint = rayOrigin + rayDir * t;
|
|
||||||
Vec3 normal = (hitPoint - sphere.center).normalize();
|
|
||||||
Vec3 lightDir = (lightPos - hitPoint).normalize();
|
|
||||||
Vec3 viewDir = -rayDir;
|
|
||||||
|
|
||||||
Vec3 color = phongShading(hitPoint, normal, lightDir, viewDir, sphere.color);
|
|
||||||
|
|
||||||
framebuffer[pixelIndex] = static_cast<unsigned char>(fmin(color.x, 1.0) * 255);
|
|
||||||
framebuffer[pixelIndex + 1] = static_cast<unsigned char>(fmin(color.y, 1.0) * 255);
|
|
||||||
framebuffer[pixelIndex + 2] = static_cast<unsigned char>(fmin(color.z, 1.0) * 255);
|
|
||||||
} else {
|
|
||||||
framebuffer[pixelIndex] = 0;
|
|
||||||
framebuffer[pixelIndex + 1] = 0;
|
|
||||||
framebuffer[pixelIndex + 2] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void saveImage(const char* filename, unsigned char* framebuffer) {
|
|
||||||
std::ofstream imageFile(filename, std::ios::out | std::ios::binary);
|
|
||||||
imageFile << "P6\n" << WIDTH << " " << HEIGHT << "\n255\n";
|
|
||||||
for (int i = 0; i < WIDTH * HEIGHT * 3; i++) {
|
|
||||||
imageFile << framebuffer[i];
|
|
||||||
}
|
|
||||||
imageFile.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
// Initialize sphere and light source
|
|
||||||
Sphere sphere = { Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }; // Red sphere
|
|
||||||
Vec3 lightPos(5, 5, 0);
|
|
||||||
|
|
||||||
// Allocate framebuffer on device and host
|
|
||||||
unsigned char* d_framebuffer;
|
|
||||||
unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3];
|
|
||||||
cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3);
|
|
||||||
|
|
||||||
// Launch
|
|
||||||
dim3 threadsPerBlock(16, 16);
|
|
||||||
dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x,
|
|
||||||
(HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y);
|
|
||||||
renderKernel<<<numBlocks, threadsPerBlock>>>(d_framebuffer, sphere, lightPos);
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
// Copy result back to host and save
|
|
||||||
cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost);
|
|
||||||
saveImage("output.ppm", h_framebuffer);
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
cudaFree(d_framebuffer);
|
|
||||||
delete[] h_framebuffer;
|
|
||||||
|
|
||||||
std::cout << "Image saved as output.ppm" << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Binary file not shown.
|
|
@ -1,16 +0,0 @@
|
||||||
#include <iostream>
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
|
|
||||||
__global__ void hello_from_gpu() {
|
|
||||||
printf("Hello from GPU!\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
hello_from_gpu<<<1, 1>>>();
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
// Reset device
|
|
||||||
cudaDeviceReset();
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Binary file not shown.
|
|
@ -1,102 +0,0 @@
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <device_launch_parameters.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <cmath>
|
|
||||||
|
|
||||||
#include "linalg/linalg.h"
|
|
||||||
#include "sphere.h"
|
|
||||||
#include "img/handler.h"
|
|
||||||
|
|
||||||
#define WIDTH 3840
|
|
||||||
#define HEIGHT 2160
|
|
||||||
#define SAMPLES_PER_PIXEL 8
|
|
||||||
|
|
||||||
|
|
||||||
__device__ Vec3 phongShading(const Vec3& point, const Vec3& normal, const Vec3& lightDir, const Vec3& viewDir, const Vec3& color) {
|
|
||||||
double ambientStrength = 0.1;
|
|
||||||
double diffuseStrength = 0.8;
|
|
||||||
double specularStrength = 0.5;
|
|
||||||
int shininess = 64;
|
|
||||||
|
|
||||||
Vec3 ambient = color * ambientStrength;
|
|
||||||
double diff = max(normal.dot(lightDir), 0.0);
|
|
||||||
Vec3 diffuse = color * (diffuseStrength * diff);
|
|
||||||
|
|
||||||
Vec3 reflectDir = (normal * (2.0 * normal.dot(lightDir)) - lightDir).normalize();
|
|
||||||
double spec = pow(max(viewDir.dot(reflectDir), 0.0), shininess);
|
|
||||||
Vec3 specular = Vec3(1.0, 1.0, 1.0) * (specularStrength * spec);
|
|
||||||
|
|
||||||
return ambient + diffuse + specular;
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void renderKernel(unsigned char* framebuffer, Sphere* spheres, int numSpheres, Vec3 lightPos) {
|
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
if (x >= WIDTH || y >= HEIGHT) return;
|
|
||||||
|
|
||||||
int pixelIndex = (y * WIDTH + x) * 3;
|
|
||||||
Vec3 rayOrigin(0, 0, 0);
|
|
||||||
Vec3 colCum(0, 0, 0);
|
|
||||||
|
|
||||||
double spp = static_cast<double>(SAMPLES_PER_PIXEL);
|
|
||||||
for (int sample = 0; sample < SAMPLES_PER_PIXEL; sample++) {
|
|
||||||
double u = (x + (sample / spp) - WIDTH / 2.0) / WIDTH;
|
|
||||||
double v = (y + (sample / spp) - HEIGHT / 2.0) / HEIGHT;
|
|
||||||
Vec3 rayDir(u, v, 1.0);
|
|
||||||
rayDir = rayDir.normalize();
|
|
||||||
|
|
||||||
for (int i = 0; i < numSpheres; ++i) {
|
|
||||||
double t;
|
|
||||||
if (spheres[i].intersect(rayOrigin, rayDir, t)) {
|
|
||||||
Vec3 hitPoint = rayOrigin + rayDir * t;
|
|
||||||
Vec3 normal = (hitPoint - spheres[i].center).normalize();
|
|
||||||
Vec3 lightDir = (lightPos - hitPoint).normalize();
|
|
||||||
Vec3 viewDir = -rayDir;
|
|
||||||
|
|
||||||
colCum = colCum + phongShading(hitPoint, normal, lightDir, viewDir, spheres[i].color);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Average color across all samples
|
|
||||||
Vec3 color = colCum * (1.0 / SAMPLES_PER_PIXEL);
|
|
||||||
|
|
||||||
framebuffer[pixelIndex] = static_cast<unsigned char>(fmin(color.x, 1.0) * 255);
|
|
||||||
framebuffer[pixelIndex + 1] = static_cast<unsigned char>(fmin(color.y, 1.0) * 255);
|
|
||||||
framebuffer[pixelIndex + 2] = static_cast<unsigned char>(fmin(color.z, 1.0) * 255);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
Sphere spheres[] = {
|
|
||||||
{ Vec3(0, 0, 5), 1.0, Vec3(1.0, 0.0, 0.0) }, // Red sphere
|
|
||||||
{ Vec3(-2, 1, 7), 1.0, Vec3(0.0, 1.0, 0.0) }, // Green sphere
|
|
||||||
{ Vec3(2, -1, 6), 1.0, Vec3(0.0, 0.0, 1.0) } // Blue sphere
|
|
||||||
};
|
|
||||||
int numSpheres = sizeof(spheres) / sizeof(Sphere);
|
|
||||||
Vec3 lightPos(5, 5, 0);
|
|
||||||
|
|
||||||
unsigned char* d_framebuffer;
|
|
||||||
unsigned char* h_framebuffer = new unsigned char[WIDTH * HEIGHT * 3];
|
|
||||||
Sphere* d_spheres;
|
|
||||||
cudaMalloc(&d_framebuffer, WIDTH * HEIGHT * 3);
|
|
||||||
cudaMalloc(&d_spheres, numSpheres * sizeof(Sphere));
|
|
||||||
cudaMemcpy(d_spheres, spheres, numSpheres * sizeof(Sphere), cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
dim3 threadsPerBlock(16, 16);
|
|
||||||
dim3 numBlocks((WIDTH + threadsPerBlock.x - 1) / threadsPerBlock.x,
|
|
||||||
(HEIGHT + threadsPerBlock.y - 1) / threadsPerBlock.y);
|
|
||||||
renderKernel<<<numBlocks, threadsPerBlock>>>(d_framebuffer, d_spheres, numSpheres, lightPos);
|
|
||||||
cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
cudaMemcpy(h_framebuffer, d_framebuffer, WIDTH * HEIGHT * 3, cudaMemcpyDeviceToHost);
|
|
||||||
saveImage("output.ppm", h_framebuffer, WIDTH, HEIGHT);
|
|
||||||
|
|
||||||
cudaFree(d_framebuffer);
|
|
||||||
cudaFree(d_spheres);
|
|
||||||
delete[] h_framebuffer;
|
|
||||||
|
|
||||||
std::cout << "High-resolution image saved as output.ppm" << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue