load to gpu seems to work but the values are different than from python?
This commit is contained in:
parent
2719b93fa6
commit
ac63abe93a
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Load the needed modules in Habrok
|
||||||
|
|
||||||
|
ml CUDA
|
||||||
|
ml netCDF-C++4
|
||||||
|
|
@ -24,31 +24,37 @@ std::vector<float> readData(std::string path, std::string variableName) {
|
||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct cudaArray* loadDataToDevice(std::string path, std::string variableName) {
|
std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName) {
|
||||||
netCDF::NcFile data(path, netCDF::NcFile::read);
|
netCDF::NcFile data(path, netCDF::NcFile::read);
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
NcVar var = vars.find(variableName)->second;
|
NcVar var = vars.find(variableName)->second;
|
||||||
|
|
||||||
struct cudaChannelFormatDesc arrayType = {
|
int length = 1;
|
||||||
.x = 32,
|
for (NcDim dim: var.getDims()) {
|
||||||
.y = 0,
|
length *= dim.getSize();
|
||||||
.z = 0,
|
}
|
||||||
.w = 0,
|
|
||||||
.f = cudaChannelFormatKindFloat
|
|
||||||
}; // Float-32
|
|
||||||
|
|
||||||
struct cudaExtent extent = {
|
|
||||||
.width = var.getDim(3).getSize(), // longitude
|
|
||||||
.height = var.getDim(2).getSize(), // latitude
|
|
||||||
.depth = var.getDim(1).getSize(), // level
|
|
||||||
};
|
|
||||||
|
|
||||||
struct cudaArray *array;
|
// Store NetCDF variable in pinned memory on host
|
||||||
|
float *h_array;
|
||||||
|
|
||||||
cudaError_t error = cudaMalloc3DArray(&array, &arrayType, extent, 0);
|
cudaMallocHost(&h_array, sizeof(float)*length);
|
||||||
cout << "cuda error: " << error << "\n";
|
|
||||||
|
|
||||||
return array;
|
var.getVar(h_array);
|
||||||
|
|
||||||
|
// Copy data to device
|
||||||
|
float *d_array;
|
||||||
|
|
||||||
|
cudaError_t status = cudaMalloc(&d_array, sizeof(float)*length);
|
||||||
|
if (status != cudaSuccess)
|
||||||
|
cout << "Error allocating memory: " << status << "\n";
|
||||||
|
|
||||||
|
cudaMemcpyAsync(d_array, h_array, sizeof(float)*length, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams
|
||||||
|
|
||||||
|
cudaFreeHost(h_array);
|
||||||
|
|
||||||
|
return std::pair(d_array, length);
|
||||||
}
|
}
|
||||||
|
|
@ -5,6 +5,6 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
std::vector<float> readData(std::string path, std::string variableName);
|
std::vector<float> readData(std::string path, std::string variableName);
|
||||||
struct cudaArray* loadDataToDevice(std::string path, std::string variableName);
|
std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName);
|
||||||
|
|
||||||
#endif //DATAREADER_H
|
#endif //DATAREADER_H
|
||||||
|
|
|
||||||
36
src/main.cu
36
src/main.cu
|
|
@ -5,11 +5,43 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
|
// Not parallel computation
|
||||||
|
__global__ void computeMean(float *ans, size_t *masked_vals, size_t n, float *x) {
|
||||||
|
float sum = 0;
|
||||||
|
size_t num_not_masked_values = 0;
|
||||||
|
size_t num_masked_values = 0;
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (x[i] < 1E14) { /* If x is not missing value */
|
||||||
|
num_not_masked_values++;
|
||||||
|
sum += x[i];
|
||||||
|
} else {
|
||||||
|
num_masked_values++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*ans = sum/num_not_masked_values;
|
||||||
|
*masked_vals = num_masked_values;
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4";
|
std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4";
|
||||||
std::string variable = "U";
|
std::string variable = "T";
|
||||||
auto arr = loadDataToDevice(path, variable);
|
auto arr = loadDataToDevice(path, variable);
|
||||||
cudaFreeArray(arr);
|
|
||||||
|
float *ptr_mean;
|
||||||
|
cudaMallocManaged(&ptr_mean, sizeof(float));
|
||||||
|
|
||||||
|
size_t *ptr_masked;
|
||||||
|
cudaMallocManaged(&ptr_masked, sizeof(size_t));
|
||||||
|
|
||||||
|
computeMean<<<1, 1>>>(ptr_mean, ptr_masked, arr.second, arr.first);
|
||||||
|
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
std::cout << "Mean = " << *ptr_mean << " calculated from " << arr.second << " values where " << *ptr_masked << " are masked values.\n";
|
||||||
|
|
||||||
|
cudaFree(arr.first);
|
||||||
|
cudaFree(ptr_mean);
|
||||||
|
cudaFree(ptr_masked);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
11
test_read.py
11
test_read.py
|
|
@ -1,5 +1,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from netCDF4 import Dataset
|
from netCDF4 import Dataset
|
||||||
|
from math import prod
|
||||||
|
|
||||||
# Load the NetCDF file
|
# Load the NetCDF file
|
||||||
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
|
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
|
||||||
|
|
@ -10,8 +11,8 @@ print(ncfile.variables.keys())
|
||||||
|
|
||||||
U = ncfile.variables['T'][:]
|
U = ncfile.variables['T'][:]
|
||||||
|
|
||||||
# Check the shape of the variable (it should be 3D)
|
# Check the shape of the variable
|
||||||
print("Shape of U:", U.shape)
|
print(f"Shape of U: {U.shape} and total length is {prod(U.shape)}")
|
||||||
|
|
||||||
# Compute the mean of the variable across all axes (for all elements in U)
|
# Compute the mean of the variable across all axes (for all elements in U)
|
||||||
U_mean = np.mean(U)
|
U_mean = np.mean(U)
|
||||||
|
|
@ -19,5 +20,11 @@ U_mean = np.mean(U)
|
||||||
# Print the mean
|
# Print the mean
|
||||||
print("Mean of U:", U_mean)
|
print("Mean of U:", U_mean)
|
||||||
|
|
||||||
|
print(f"{U[0,0,0,1]=}")
|
||||||
|
is_masked = np.ma.isMaskedArray(U)
|
||||||
|
print(f"Is U a masked array? {is_masked}")
|
||||||
|
masked_count = np.ma.count_masked(U)
|
||||||
|
print("Number of masked values in U:", masked_count)
|
||||||
|
|
||||||
# Close the NetCDF file
|
# Close the NetCDF file
|
||||||
ncfile.close()
|
ncfile.close()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue