getVal seems to work

2025-06-07 02:13:10 +02:00 · 2024-12-21 14:17:39 +01:00
parent 4b06f289b8
commit a7ad248c38
6 changed files with 39 additions and 50 deletions
--- a/5
+++ b/5
@@ -1,6 +1,7 @@
 # Compiler and flags
 NVCC = nvcc
 CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs)
 COMPILE_OBJ_FLAGS = --device-c
 # Directories
 SRC_DIR = src
@@ -16,12 +17,12 @@ all: $(TARGET)
 # Build the main target
 $(TARGET): $(OBJ_FILES) | $(BUILD_DIR)
-	$(NVCC) $(CXXFLAGS) -o $@ $^
+	$(NVCC) $(CXXFLAGS) $^ -o $@
 # Compile object files
 $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu
 	@mkdir -p $(dir $@)
-	$(NVCC) $(CXXFLAGS) -c $< -o $@
+	$(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@
 # Debug build
 debug: CXXFLAGS += -g
--- a/src/hurricanedata/fielddata.cu
+++ b/src/hurricanedata/fielddata.cu
@@ -1,12 +1,19 @@
 #include "fielddata.h"
-// __device__ float getVal(
+__device__ float getVal(
-//     const FieldMetadata &md,
+    const FieldMetadata &md,
-//     const FieldData &d,
+    const FieldData &d,
-//     const size_t &timeInd,
+    const size_t &timeInd,
-//     const size_t &lonInd,
+    const size_t &levInd,
-//     const size_t &latInd,
+    const size_t &latInd,
-//     const size_t &levInd
+    const size_t &lonInd
-// ) {
+) {
-//     return d.valArrays[0][timeInd]; 
+    size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize;
-// }
+    size_t size2DMapData = md.widthSize*md.heightSize;
    return d.valArrays[0][
        timeInd*sizeSpatialData
        + levInd*size2DMapData
        + latInd*md.widthSize
        + lonInd
    ]; 
 }
--- a/src/hurricanedata/fielddata.h
+++ b/src/hurricanedata/fielddata.h
@@ -32,13 +32,13 @@ struct FieldData {
 using FieldData = FieldData;
-// __device__ float getVal(
+extern __device__ float getVal(
-//     const FieldMetadata &md,
+    const FieldMetadata &md,
-//     const FieldData &d,
+    const FieldData &d,
-//     const size_t &timeInd,
+    const size_t &timeInd,
-//     const size_t &lonInd,
+    const size_t &levInd,
-//     const size_t &latInd,
+    const size_t &latInd,
-//     const size_t &levInd
+    const size_t &lonInd
-// );
+);
 #endif //FIELDDATA_H
--- a/src/hurricanedata/gpubuffer.cu
+++ b/src/hurricanedata/gpubuffer.cu
@@ -21,8 +21,6 @@ path(path), variableName(variableName) {
    readAndAllocateAxis<double>(&fmd->levs, &fmd->depthSize, vars.find("lev")->second);
 }
 FieldData GPUBuffer::nextFieldData() {
    NcFile data(path, NcFile::read);
@@ -47,8 +45,6 @@ FieldData GPUBuffer::nextFieldData() {
    var.getVar(h_array);
    // Copy data to device
    // float *d_array;
    cudaError_t status = cudaMalloc(&fd.valArrays[0], sizeof(float)*length);
    if (status != cudaSuccess)
        cout << "Error allocating memory: " << status << "\n";
--- a/src/main.cu
+++ b/src/main.cu
@@ -1,38 +1,25 @@
 #include "hurricanedata/fielddata.h"
 #include "hurricanedata/gpubuffer.h"
 #include <cuda_runtime.h>
 #include <device_launch_parameters.h>
 #include <iostream>
 #include <cmath>
 #include <iomanip> 
 __device__ float getVal(
    const FieldMetadata &md,
    const FieldData &d,
    const size_t &timeInd,
    const size_t &lonInd,
    const size_t &latInd,
    const size_t &levInd
 ) {
    // TODO: Actaully implement function
    return d.valArrays[0][timeInd]; 
 }
 // Not parallel computation
-__global__ void computeMean(float *ans, size_t *masked_vals, const FieldMetadata &fmd, FieldData fd) {
+__global__ void computeMean(float *ans, const FieldMetadata &fmd, FieldData fd) {
    float sum = 0;
    size_t num_not_masked_values = 0;
-    size_t num_masked_values = 0;
+    for (int i = 0; i < fmd.widthSize; i++) {
-    for (int i = 0; i < fmd.widthSize*fmd.heightSize*fmd.depthSize*fd.timeSize; i++) {
+        double xi = getVal(fmd, fd, 2, 20, 100, i);
        double xi = getVal(fmd, fd, i, 0, 0, 0);
        if (xi < 1E14) { /* If x is not missing value */
            num_not_masked_values++;
            sum += xi;
        } else {
            num_masked_values++;
        }
    }
    *ans = sum/num_not_masked_values;
    *masked_vals = num_masked_values;
 }
 int main() {
@@ -45,17 +32,13 @@ int main() {
    float *ptr_mean;
    cudaMallocManaged(&ptr_mean, sizeof(float));
-    size_t *ptr_masked;
+    computeMean<<<1, 1>>>(ptr_mean, *buffer.fmd, fd);
    cudaMallocManaged(&ptr_masked, sizeof(size_t));
    computeMean<<<1, 1>>>(ptr_mean, ptr_masked, *buffer.fmd, fd);
    cudaDeviceSynchronize();
-    std::cout << "Mean = " << *ptr_mean << " values where " << *ptr_masked << " are masked values.\n";
+    std::cout << "Mean = " << std::fixed << std::setprecision(6) << *ptr_mean << "\n";
    cudaFree(fd.valArrays[0]);
    cudaFree(ptr_mean);
    cudaFree(ptr_masked);
    return 0;
 }
--- a/test_read.py
+++ b/test_read.py
@@ -23,11 +23,13 @@ print("Mean of U:", U_mean)
 print("Sum of U:", U_sum)
 sumval = 0
-row = U[0,0,100]
+row = U[2,20,100]
 n = 0
 for val in row:
    if not np.ma.is_masked(val):
        n+=1
        sumval += val
-print(f"Why does {np.sum(row)=} not equal {sumval=} ?!")
+print(f"Why does {np.mean(row)=} not equal {sumval/n=} ?!")
 # Close the NetCDF file
 ncfile.close()