FieldData structs implemented
This commit is contained in:
parent
ac63abe93a
commit
0db764c52f
|
|
@ -1,60 +0,0 @@
|
||||||
#include "datareader.h"
|
|
||||||
|
|
||||||
#include <netcdf>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
using namespace netCDF;
|
|
||||||
|
|
||||||
std::vector<float> readData(std::string path, std::string variableName) {
|
|
||||||
netCDF::NcFile data(path, netCDF::NcFile::read);
|
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
|
||||||
|
|
||||||
NcVar var = vars.find(variableName)->second;
|
|
||||||
|
|
||||||
int length = 1;
|
|
||||||
for (NcDim dim: var.getDims()) {
|
|
||||||
length *= dim.getSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<float> vec(length);
|
|
||||||
|
|
||||||
var.getVar(vec.data());
|
|
||||||
|
|
||||||
return vec;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName) {
|
|
||||||
netCDF::NcFile data(path, netCDF::NcFile::read);
|
|
||||||
|
|
||||||
multimap<string, NcVar> vars = data.getVars();
|
|
||||||
|
|
||||||
NcVar var = vars.find(variableName)->second;
|
|
||||||
|
|
||||||
int length = 1;
|
|
||||||
for (NcDim dim: var.getDims()) {
|
|
||||||
length *= dim.getSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store NetCDF variable in pinned memory on host
|
|
||||||
float *h_array;
|
|
||||||
|
|
||||||
cudaMallocHost(&h_array, sizeof(float)*length);
|
|
||||||
|
|
||||||
var.getVar(h_array);
|
|
||||||
|
|
||||||
// Copy data to device
|
|
||||||
float *d_array;
|
|
||||||
|
|
||||||
cudaError_t status = cudaMalloc(&d_array, sizeof(float)*length);
|
|
||||||
if (status != cudaSuccess)
|
|
||||||
cout << "Error allocating memory: " << status << "\n";
|
|
||||||
|
|
||||||
cudaMemcpyAsync(d_array, h_array, sizeof(float)*length, cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams
|
|
||||||
|
|
||||||
cudaFreeHost(h_array);
|
|
||||||
|
|
||||||
return std::pair(d_array, length);
|
|
||||||
}
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
#ifndef DATAREADER_H
|
|
||||||
#define DATAREADER_H
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
std::vector<float> readData(std::string path, std::string variableName);
|
|
||||||
std::pair<float*, size_t> loadDataToDevice(std::string path, std::string variableName);
|
|
||||||
|
|
||||||
#endif //DATAREADER_H
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
#include "fielddata.h"
|
||||||
|
|
||||||
|
// __device__ float getVal(
|
||||||
|
// const FieldMetadata &md,
|
||||||
|
// const FieldData &d,
|
||||||
|
// const size_t &timeInd,
|
||||||
|
// const size_t &lonInd,
|
||||||
|
// const size_t &latInd,
|
||||||
|
// const size_t &levInd
|
||||||
|
// ) {
|
||||||
|
// return d.valArrays[0][timeInd];
|
||||||
|
// }
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
#ifndef FIELDDATA_H
|
||||||
|
#define FIELDDATA_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct FieldMetadata {
|
||||||
|
size_t widthSize; // Number of different longitudes
|
||||||
|
size_t heightSize; // Number of different latitudes
|
||||||
|
size_t depthSize; // Number of different levels
|
||||||
|
|
||||||
|
// lons is a managed Unified Memory array of size widthCount that indicates
|
||||||
|
// that getVal(t, i, j, k) is a value with longitude of lons[i].
|
||||||
|
// The other such arrays are similarly defined.
|
||||||
|
double *lons;
|
||||||
|
double *lats;
|
||||||
|
double *levs;
|
||||||
|
};
|
||||||
|
|
||||||
|
using FieldMetadata = FieldMetadata;
|
||||||
|
|
||||||
|
struct FieldData {
|
||||||
|
static constexpr size_t FILESNUM = 2; // Number of files stored in a FieldData struct.
|
||||||
|
|
||||||
|
// An array of length FILESNUM storing pointers to 4D arrays stored in device memory.
|
||||||
|
float *valArrays[FILESNUM];
|
||||||
|
|
||||||
|
size_t timeSize; // Number of different times
|
||||||
|
// times is a managed Unified Memory array of size timeSize that indicates
|
||||||
|
// that getVal(md, d, t, i, j, k) is a value at time times[t].
|
||||||
|
int *times;
|
||||||
|
};
|
||||||
|
|
||||||
|
using FieldData = FieldData;
|
||||||
|
|
||||||
|
// __device__ float getVal(
|
||||||
|
// const FieldMetadata &md,
|
||||||
|
// const FieldData &d,
|
||||||
|
// const size_t &timeInd,
|
||||||
|
// const size_t &lonInd,
|
||||||
|
// const size_t &latInd,
|
||||||
|
// const size_t &levInd
|
||||||
|
// );
|
||||||
|
|
||||||
|
#endif //FIELDDATA_H
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
#include "gpubuffer.h"
|
||||||
|
#include "fielddata.h"
|
||||||
|
#include "gpubufferhelper.h"
|
||||||
|
|
||||||
|
#include <netcdf>
|
||||||
|
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace netCDF;
|
||||||
|
|
||||||
|
GPUBuffer::GPUBuffer(std::string path, std::string variableName):
|
||||||
|
path(path), variableName(variableName) {
|
||||||
|
NcFile data(path, NcFile::read);
|
||||||
|
|
||||||
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
|
cudaMallocManaged(&fmd, sizeof(FieldMetadata));
|
||||||
|
|
||||||
|
readAndAllocateAxis<double>(&fmd->lons, &fmd->widthSize, vars.find("lon")->second);
|
||||||
|
readAndAllocateAxis<double>(&fmd->lats, &fmd->heightSize, vars.find("lat")->second);
|
||||||
|
readAndAllocateAxis<double>(&fmd->levs, &fmd->depthSize, vars.find("lev")->second);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
FieldData GPUBuffer::nextFieldData() {
|
||||||
|
NcFile data(path, NcFile::read);
|
||||||
|
|
||||||
|
multimap<string, NcVar> vars = data.getVars();
|
||||||
|
|
||||||
|
FieldData fd;
|
||||||
|
size_t timeSize;
|
||||||
|
readAndAllocateAxis(&fd.times, &fd.timeSize, vars.find("time")->second);
|
||||||
|
|
||||||
|
NcVar var = vars.find(variableName)->second;
|
||||||
|
|
||||||
|
int length = 1;
|
||||||
|
for (NcDim dim: var.getDims()) {
|
||||||
|
length *= dim.getSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store NetCDF variable in pinned memory on host
|
||||||
|
float *h_array;
|
||||||
|
|
||||||
|
cudaMallocHost(&h_array, sizeof(float)*length);
|
||||||
|
|
||||||
|
var.getVar(h_array);
|
||||||
|
|
||||||
|
// Copy data to device
|
||||||
|
// float *d_array;
|
||||||
|
|
||||||
|
cudaError_t status = cudaMalloc(&fd.valArrays[0], sizeof(float)*length);
|
||||||
|
if (status != cudaSuccess)
|
||||||
|
cout << "Error allocating memory: " << status << "\n";
|
||||||
|
|
||||||
|
cudaMemcpyAsync(fd.valArrays[0], h_array, sizeof(float)*length, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
cudaDeviceSynchronize(); // Heavy hammer synchronisation // TODO: Use streams
|
||||||
|
|
||||||
|
cudaFreeHost(h_array);
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUBuffer::~GPUBuffer() {
|
||||||
|
cudaFree(fmd->lons);
|
||||||
|
cudaFree(fmd->lats);
|
||||||
|
cudaFree(fmd->levs);
|
||||||
|
cudaFree(fmd);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
#ifndef GPUBUFFER_H
|
||||||
|
#define GPUBUFFER_H
|
||||||
|
|
||||||
|
#include "fielddata.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class GPUBuffer {
|
||||||
|
public:
|
||||||
|
GPUBuffer(std::string path, std::string variableName);
|
||||||
|
|
||||||
|
FieldData nextFieldData();
|
||||||
|
|
||||||
|
~GPUBuffer();
|
||||||
|
|
||||||
|
FieldMetadata *fmd;
|
||||||
|
private:
|
||||||
|
// TODO: Implement GPUBuffer
|
||||||
|
std::string path;
|
||||||
|
std::string variableName;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif //GPUBUFFER_H
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
#include <netcdf>
|
||||||
|
#include <cassert>
|
||||||
|
template <typename T>
|
||||||
|
void readAndAllocateAxis(T** axis_ptr, size_t *size, const netCDF::NcVar &var) {
|
||||||
|
assert(var.getDimCount() == 1);
|
||||||
|
netCDF::NcDim dim = var.getDim(0);
|
||||||
|
*size = dim.getSize();
|
||||||
|
cudaError_t status = cudaMallocManaged(axis_ptr, *size*sizeof(T));
|
||||||
|
var.getVar(*axis_ptr);
|
||||||
|
}
|
||||||
33
src/main.cu
33
src/main.cu
|
|
@ -1,19 +1,31 @@
|
||||||
#include "hurricanedata/datareader.h"
|
#include "hurricanedata/gpubuffer.h"
|
||||||
|
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <device_launch_parameters.h>
|
#include <device_launch_parameters.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
|
__device__ float getVal(
|
||||||
|
const FieldMetadata &md,
|
||||||
|
const FieldData &d,
|
||||||
|
const size_t &timeInd,
|
||||||
|
const size_t &lonInd,
|
||||||
|
const size_t &latInd,
|
||||||
|
const size_t &levInd
|
||||||
|
) {
|
||||||
|
return d.valArrays[0][timeInd];
|
||||||
|
}
|
||||||
|
|
||||||
// Not parallel computation
|
// Not parallel computation
|
||||||
__global__ void computeMean(float *ans, size_t *masked_vals, size_t n, float *x) {
|
__global__ void computeMean(float *ans, size_t *masked_vals, const FieldMetadata &fmd, FieldData fd) {
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
size_t num_not_masked_values = 0;
|
size_t num_not_masked_values = 0;
|
||||||
size_t num_masked_values = 0;
|
size_t num_masked_values = 0;
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < fmd.widthSize*fmd.heightSize*fmd.depthSize*fd.timeSize; i++) {
|
||||||
if (x[i] < 1E14) { /* If x is not missing value */
|
double xi = getVal(fmd, fd, i, 0, 0, 0);
|
||||||
|
if (xi < 1E14) { /* If x is not missing value */
|
||||||
num_not_masked_values++;
|
num_not_masked_values++;
|
||||||
sum += x[i];
|
sum += xi;
|
||||||
} else {
|
} else {
|
||||||
num_masked_values++;
|
num_masked_values++;
|
||||||
}
|
}
|
||||||
|
|
@ -25,7 +37,9 @@ __global__ void computeMean(float *ans, size_t *masked_vals, size_t n, float *x)
|
||||||
int main() {
|
int main() {
|
||||||
std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4";
|
std::string path = "data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4";
|
||||||
std::string variable = "T";
|
std::string variable = "T";
|
||||||
auto arr = loadDataToDevice(path, variable);
|
GPUBuffer buffer{path, variable};
|
||||||
|
|
||||||
|
auto fd = buffer.nextFieldData();
|
||||||
|
|
||||||
float *ptr_mean;
|
float *ptr_mean;
|
||||||
cudaMallocManaged(&ptr_mean, sizeof(float));
|
cudaMallocManaged(&ptr_mean, sizeof(float));
|
||||||
|
|
@ -33,15 +47,14 @@ int main() {
|
||||||
size_t *ptr_masked;
|
size_t *ptr_masked;
|
||||||
cudaMallocManaged(&ptr_masked, sizeof(size_t));
|
cudaMallocManaged(&ptr_masked, sizeof(size_t));
|
||||||
|
|
||||||
computeMean<<<1, 1>>>(ptr_mean, ptr_masked, arr.second, arr.first);
|
computeMean<<<1, 1>>>(ptr_mean, ptr_masked, *buffer.fmd, fd);
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
std::cout << "Mean = " << *ptr_mean << " calculated from " << arr.second << " values where " << *ptr_masked << " are masked values.\n";
|
std::cout << "Mean = " << *ptr_mean << " values where " << *ptr_masked << " are masked values.\n";
|
||||||
|
|
||||||
cudaFree(arr.first);
|
cudaFree(fd.valArrays[0]);
|
||||||
cudaFree(ptr_mean);
|
cudaFree(ptr_mean);
|
||||||
cudaFree(ptr_masked);
|
cudaFree(ptr_masked);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
17
test_read.py
17
test_read.py
|
|
@ -1,6 +1,5 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from netCDF4 import Dataset
|
from netCDF4 import Dataset
|
||||||
from math import prod
|
|
||||||
|
|
||||||
# Load the NetCDF file
|
# Load the NetCDF file
|
||||||
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
|
file_path = 'data/MERRA2_400.inst6_3d_ana_Np.20120101.nc4'
|
||||||
|
|
@ -12,19 +11,23 @@ print(ncfile.variables.keys())
|
||||||
U = ncfile.variables['T'][:]
|
U = ncfile.variables['T'][:]
|
||||||
|
|
||||||
# Check the shape of the variable
|
# Check the shape of the variable
|
||||||
print(f"Shape of U: {U.shape} and total length is {prod(U.shape)}")
|
print("Shape of U:", U.shape)
|
||||||
|
|
||||||
# Compute the mean of the variable across all axes (for all elements in U)
|
# Compute the mean of the variable across all axes (for all elements in U)
|
||||||
U_mean = np.mean(U)
|
U_mean = np.mean(U)
|
||||||
|
U_sum = np.sum(U)
|
||||||
|
|
||||||
# Print the mean
|
# Print the mean
|
||||||
print("Mean of U:", U_mean)
|
print("Mean of U:", U_mean)
|
||||||
|
|
||||||
print(f"{U[0,0,0,1]=}")
|
print("Sum of U:", U_sum)
|
||||||
is_masked = np.ma.isMaskedArray(U)
|
|
||||||
print(f"Is U a masked array? {is_masked}")
|
sumval = 0
|
||||||
masked_count = np.ma.count_masked(U)
|
row = U[0,0,100]
|
||||||
print("Number of masked values in U:", masked_count)
|
for val in row:
|
||||||
|
if not np.ma.is_masked(val):
|
||||||
|
sumval += val
|
||||||
|
print(f"Why does {np.sum(row)=} not equal {sumval=} ?!")
|
||||||
|
|
||||||
# Close the NetCDF file
|
# Close the NetCDF file
|
||||||
ncfile.close()
|
ncfile.close()
|
||||||
Loading…
Reference in New Issue