load to gpu seems to work but the values are different than from python?

This commit is contained in:
Robin 2024-12-20 12:24:44 +01:00
parent 2719b93fa6
commit ac63abe93a
5 changed files with 74 additions and 23 deletions

6
load-modules.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
# Load the needed modules in Habrok
ml CUDA
ml netCDF-C++4

View File

@ -24,31 +24,37 @@ std::vector<float> readData(std::string path, std::string variableName) {
return vec; return vec;
} }
struct cudaArray* loadDataToDevice(std::string path, std::string variableName) { std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName) {
netCDF::NcFile data(path, netCDF::NcFile::read); netCDF::NcFile data(path, netCDF::NcFile::read);
multimap<string, NcVar> vars = data.getVars(); multimap<string, NcVar> vars = data.getVars();
NcVar var = vars.find(variableName)->second; NcVar var = vars.find(variableName)->second;
struct cudaChannelFormatDesc arrayType = { int length = 1;
.x = 32, for (NcDim dim: var.getDims()) {
.y = 0, length *= dim.getSize();
.z = 0, }
.w = 0,
.f = cudaChannelFormatKindFloat
}; // Float-32
struct cudaExtent extent = { // Store NetCDF variable in pinned memory on host
.width = var.getDim(3).getSize(), // longitude float *h_array;
.height = var.getDim(2).getSize(), // latitude
.depth = var.getDim(1).getSize(), // level
};
struct cudaArray *array; cudaMallocHost(&h_array, sizeof(float)*length);
cudaError_t error = cudaMalloc3DArray(&array, &arrayType, extent, 0); var.getVar(h_array);
cout << "cuda error: " << error << "\n";
return array; // Copy data to device
float *d_array;
cudaError_t status = cudaMalloc(&d_array, sizeof(float)*length);
if (status != cudaSuccess)
cout << "Error allocating memory: " << status << "\n";
cudaMemcpyAsync(d_array, h_array, sizeof(float)*length, cudaMemcpyHostToDevice);
cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams
cudaFreeHost(h_array);
return std::pair(d_array, length);
} }

View File

@ -5,6 +5,6 @@
#include <string> #include <string>
std::vector<float> readData(std::string path, std::string variableName); std::vector<float> readData(std::string path, std::string variableName);
struct cudaArray* loadDataToDevice(std::string path, std::string variableName); std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName);
#endif //DATAREADER_H #endif //DATAREADER_H

View File

@ -5,11 +5,43 @@
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
// Not parallel computation
__global__ void computeMean(float *ans, size_t *masked_vals, size_t n, float *x) {
float sum = 0;
size_t num_not_masked_values = 0;
size_t num_masked_values = 0;
for (int i = 0; i < n; i++) {
if (x[i] < 1E14) { /* If x is not missing value */
num_not_masked_values++;
sum += x[i];
} else {
num_masked_values++;
}
}
*ans = sum/num_not_masked_values;
*masked_vals = num_masked_values;
}
int main() { int main() {
std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4"; std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4";
std::string variable = "U"; std::string variable = "T";
auto arr = loadDataToDevice(path, variable); auto arr = loadDataToDevice(path, variable);
cudaFreeArray(arr);
float *ptr_mean;
cudaMallocManaged(&ptr_mean, sizeof(float));
size_t *ptr_masked;
cudaMallocManaged(&ptr_masked, sizeof(size_t));
computeMean<<<1, 1>>>(ptr_mean, ptr_masked, arr.second, arr.first);
cudaDeviceSynchronize();
std::cout << "Mean = " << *ptr_mean << " calculated from " << arr.second << " values where " << *ptr_masked << " are masked values.\n";
cudaFree(arr.first);
cudaFree(ptr_mean);
cudaFree(ptr_masked);
return 0; return 0;
} }

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
from netCDF4 import Dataset from netCDF4 import Dataset
from math import prod
# Load the NetCDF file # Load the NetCDF file
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4' file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
@ -10,8 +11,8 @@ print(ncfile.variables.keys())
U = ncfile.variables['T'][:] U = ncfile.variables['T'][:]
# Check the shape of the variable (it should be 3D) # Check the shape of the variable
print("Shape of U:", U.shape) print(f"Shape of U: {U.shape} and total length is {prod(U.shape)}")
# Compute the mean of the variable across all axes (for all elements in U) # Compute the mean of the variable across all axes (for all elements in U)
U_mean = np.mean(U) U_mean = np.mean(U)
@ -19,5 +20,11 @@ U_mean = np.mean(U)
# Print the mean # Print the mean
print("Mean of U:", U_mean) print("Mean of U:", U_mean)
print(f"{U[0,0,0,1]=}")
is_masked = np.ma.isMaskedArray(U)
print(f"Is U a masked array? {is_masked}")
masked_count = np.ma.count_masked(U)
print("Number of masked values in U:", masked_count)
# Close the NetCDF file # Close the NetCDF file
ncfile.close() ncfile.close()