From ac63abe93afd514c4efbea5cde8fd289481c8ed5 Mon Sep 17 00:00:00 2001 From: Robin Date: Fri, 20 Dec 2024 12:24:44 +0100 Subject: [PATCH] load to gpu seems to work but the values are different than from python? --- load-modules.sh | 6 +++++ src/hurricanedata/datareader.cu | 42 +++++++++++++++++++-------------- src/hurricanedata/datareader.h | 2 +- src/main.cu | 36 ++++++++++++++++++++++++++-- test_read.py | 11 +++++++-- 5 files changed, 74 insertions(+), 23 deletions(-) create mode 100755 load-modules.sh diff --git a/load-modules.sh b/load-modules.sh new file mode 100755 index 0000000..da7a5c5 --- /dev/null +++ b/load-modules.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Load the needed modules in Habrok + +ml CUDA +ml netCDF-C++4 \ No newline at end of file diff --git a/src/hurricanedata/datareader.cu b/src/hurricanedata/datareader.cu index ed0c5ce..62dc72b 100644 --- a/src/hurricanedata/datareader.cu +++ b/src/hurricanedata/datareader.cu @@ -24,31 +24,37 @@ std::vector readData(std::string path, std::string variableName) { return vec; } -struct cudaArray* loadDataToDevice(std::string path, std::string variableName) { +std::pair loadDataToDevice(std::string path, std::string variableName) { netCDF::NcFile data(path, netCDF::NcFile::read); multimap vars = data.getVars(); NcVar var = vars.find(variableName)->second; - struct cudaChannelFormatDesc arrayType = { - .x = 32, - .y = 0, - .z = 0, - .w = 0, - .f = cudaChannelFormatKindFloat - }; // Float-32 - - struct cudaExtent extent = { - .width = var.getDim(3).getSize(), // longitude - .height = var.getDim(2).getSize(), // latitude - .depth = var.getDim(1).getSize(), // level - }; + int length = 1; + for (NcDim dim: var.getDims()) { + length *= dim.getSize(); + } - struct cudaArray *array; + // Store NetCDF variable in pinned memory on host + float *h_array; - cudaError_t error = cudaMalloc3DArray(&array, &arrayType, extent, 0); - cout << "cuda error: " << error << "\n"; + cudaMallocHost(&h_array, sizeof(float)*length); - return array; + var.getVar(h_array); + + // Copy data to device + float *d_array; + + cudaError_t status = cudaMalloc(&d_array, sizeof(float)*length); + if (status != cudaSuccess) + cout << "Error allocating memory: " << status << "\n"; + + cudaMemcpyAsync(d_array, h_array, sizeof(float)*length, cudaMemcpyHostToDevice); + + cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams + + cudaFreeHost(h_array); + + return std::pair(d_array, length); } \ No newline at end of file diff --git a/src/hurricanedata/datareader.h b/src/hurricanedata/datareader.h index d7e3733..75a7e53 100644 --- a/src/hurricanedata/datareader.h +++ b/src/hurricanedata/datareader.h @@ -5,6 +5,6 @@ #include std::vector readData(std::string path, std::string variableName); -struct cudaArray* loadDataToDevice(std::string path, std::string variableName); +std::pair loadDataToDevice(std::string path, std::string variableName); #endif //DATAREADER_H diff --git a/src/main.cu b/src/main.cu index f472f37..1f5e837 100644 --- a/src/main.cu +++ b/src/main.cu @@ -5,11 +5,43 @@ #include #include +// Not parallel computation +__global__ void computeMean(float *ans, size_t *masked_vals, size_t n, float *x) { + float sum = 0; + size_t num_not_masked_values = 0; + size_t num_masked_values = 0; + for (int i = 0; i < n; i++) { + if (x[i] < 1E14) { /* If x is not missing value */ + num_not_masked_values++; + sum += x[i]; + } else { + num_masked_values++; + } + } + *ans = sum/num_not_masked_values; + *masked_vals = num_masked_values; +} + int main() { std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4"; - std::string variable = "U"; + std::string variable = "T"; auto arr = loadDataToDevice(path, variable); - cudaFreeArray(arr); + + float *ptr_mean; + cudaMallocManaged(&ptr_mean, sizeof(float)); + + size_t *ptr_masked; + cudaMallocManaged(&ptr_masked, sizeof(size_t)); + + computeMean<<<1, 1>>>(ptr_mean, ptr_masked, arr.second, arr.first); + + cudaDeviceSynchronize(); + + std::cout << "Mean = " << *ptr_mean << " calculated from " << arr.second << " values where " << *ptr_masked << " are masked values.\n"; + + cudaFree(arr.first); + cudaFree(ptr_mean); + cudaFree(ptr_masked); return 0; } diff --git a/test_read.py b/test_read.py index bcee40c..90eaa69 100644 --- a/test_read.py +++ b/test_read.py @@ -1,5 +1,6 @@ import numpy as np from netCDF4 import Dataset +from math import prod # Load the NetCDF file file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4' @@ -10,8 +11,8 @@ print(ncfile.variables.keys()) U = ncfile.variables['T'][:] -# Check the shape of the variable (it should be 3D) -print("Shape of U:", U.shape) +# Check the shape of the variable +print(f"Shape of U: {U.shape} and total length is {prod(U.shape)}") # Compute the mean of the variable across all axes (for all elements in U) U_mean = np.mean(U) @@ -19,5 +20,11 @@ U_mean = np.mean(U) # Print the mean print("Mean of U:", U_mean) +print(f"{U[0,0,0,1]=}") +is_masked = np.ma.isMaskedArray(U) +print(f"Is U a masked array? {is_masked}") +masked_count = np.ma.count_masked(U) +print("Number of masked values in U:", masked_count) + # Close the NetCDF file ncfile.close()