diff --git a/Makefile b/Makefile index 2123394..b728fc5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ # Compiler and flags NVCC = nvcc CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs) +COMPILE_OBJ_FLAGS = --device-c # Directories SRC_DIR = src @@ -16,12 +17,12 @@ all: $(TARGET) # Build the main target $(TARGET): $(OBJ_FILES) | $(BUILD_DIR) - $(NVCC) $(CXXFLAGS) -o $@ $^ + $(NVCC) $(CXXFLAGS) $^ -o $@ # Compile object files $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu @mkdir -p $(dir $@) - $(NVCC) $(CXXFLAGS) -c $< -o $@ + $(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@ # Debug build debug: CXXFLAGS += -g diff --git a/src/hurricanedata/fielddata.cu b/src/hurricanedata/fielddata.cu index 3683275..c53adc4 100644 --- a/src/hurricanedata/fielddata.cu +++ b/src/hurricanedata/fielddata.cu @@ -1,12 +1,19 @@ #include "fielddata.h" -// __device__ float getVal( -// const FieldMetadata &md, -// const FieldData &d, -// const size_t &timeInd, -// const size_t &lonInd, -// const size_t &latInd, -// const size_t &levInd -// ) { -// return d.valArrays[0][timeInd]; -// } \ No newline at end of file +__device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +) { + size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize; + size_t size2DMapData = md.widthSize*md.heightSize; + return d.valArrays[0][ + timeInd*sizeSpatialData + + levInd*size2DMapData + + latInd*md.widthSize + + lonInd + ]; +} \ No newline at end of file diff --git a/src/hurricanedata/fielddata.h b/src/hurricanedata/fielddata.h index fac8713..4633784 100644 --- a/src/hurricanedata/fielddata.h +++ b/src/hurricanedata/fielddata.h @@ -32,13 +32,13 @@ struct FieldData { using FieldData = FieldData; -// __device__ float getVal( -// const FieldMetadata &md, -// const FieldData &d, -// const size_t &timeInd, -// const size_t &lonInd, -// const size_t &latInd, -// const size_t &levInd -// ); +extern __device__ float getVal( + const FieldMetadata &md, + const FieldData &d, + const size_t &timeInd, + const size_t &levInd, + const size_t &latInd, + const size_t &lonInd +); #endif //FIELDDATA_H diff --git a/src/hurricanedata/gpubuffer.cu b/src/hurricanedata/gpubuffer.cu index d0a8946..40ab5b7 100644 --- a/src/hurricanedata/gpubuffer.cu +++ b/src/hurricanedata/gpubuffer.cu @@ -21,8 +21,6 @@ path(path), variableName(variableName) { readAndAllocateAxis(&fmd->levs, &fmd->depthSize, vars.find("lev")->second); } - - FieldData GPUBuffer::nextFieldData() { NcFile data(path, NcFile::read); @@ -47,8 +45,6 @@ FieldData GPUBuffer::nextFieldData() { var.getVar(h_array); // Copy data to device - // float *d_array; - cudaError_t status = cudaMalloc(&fd.valArrays[0], sizeof(float)*length); if (status != cudaSuccess) cout << "Error allocating memory: " << status << "\n"; diff --git a/src/main.cu b/src/main.cu index fcb1cf3..5e81a5c 100644 --- a/src/main.cu +++ b/src/main.cu @@ -1,38 +1,25 @@ +#include "hurricanedata/fielddata.h" #include "hurricanedata/gpubuffer.h" #include #include #include #include +#include -__device__ float getVal( - const FieldMetadata &md, - const FieldData &d, - const size_t &timeInd, - const size_t &lonInd, - const size_t &latInd, - const size_t &levInd -) { - // TODO: Actaully implement function - return d.valArrays[0][timeInd]; -} // Not parallel computation -__global__ void computeMean(float *ans, size_t *masked_vals, const FieldMetadata &fmd, FieldData fd) { +__global__ void computeMean(float *ans, const FieldMetadata &fmd, FieldData fd) { float sum = 0; size_t num_not_masked_values = 0; - size_t num_masked_values = 0; - for (int i = 0; i < fmd.widthSize*fmd.heightSize*fmd.depthSize*fd.timeSize; i++) { - double xi = getVal(fmd, fd, i, 0, 0, 0); + for (int i = 0; i < fmd.widthSize; i++) { + double xi = getVal(fmd, fd, 2, 20, 100, i); if (xi < 1E14) { /* If x is not missing value */ num_not_masked_values++; sum += xi; - } else { - num_masked_values++; } } *ans = sum/num_not_masked_values; - *masked_vals = num_masked_values; } int main() { @@ -45,17 +32,13 @@ int main() { float *ptr_mean; cudaMallocManaged(&ptr_mean, sizeof(float)); - size_t *ptr_masked; - cudaMallocManaged(&ptr_masked, sizeof(size_t)); - - computeMean<<<1, 1>>>(ptr_mean, ptr_masked, *buffer.fmd, fd); + computeMean<<<1, 1>>>(ptr_mean, *buffer.fmd, fd); cudaDeviceSynchronize(); - std::cout << "Mean = " << *ptr_mean << " values where " << *ptr_masked << " are masked values.\n"; + std::cout << "Mean = " << std::fixed << std::setprecision(6) << *ptr_mean << "\n"; cudaFree(fd.valArrays[0]); cudaFree(ptr_mean); - cudaFree(ptr_masked); return 0; } diff --git a/test_read.py b/test_read.py index 19bbcfc..8a2806b 100644 --- a/test_read.py +++ b/test_read.py @@ -23,11 +23,13 @@ print("Mean of U:", U_mean) print("Sum of U:", U_sum) sumval = 0 -row = U[0,0,100] +row = U[2,20,100] +n = 0 for val in row: if not np.ma.is_masked(val): + n+=1 sumval += val -print(f"Why does {np.sum(row)=} not equal {sumval=} ?!") +print(f"Why does {np.mean(row)=} not equal {sumval/n=} ?!") # Close the NetCDF file ncfile.close() \ No newline at end of file