getVal seems to work
This commit is contained in:
parent
4b06f289b8
commit
a7ad248c38
5
Makefile
5
Makefile
|
|
@ -1,6 +1,7 @@
|
|||
# Compiler and flags
|
||||
NVCC = nvcc
|
||||
CXXFLAGS = -I./src -I./hurricanedata -std=c++17 $(shell nc-config --cxx4flags) $(shell nc-config --cxx4libs)
|
||||
COMPILE_OBJ_FLAGS = --device-c
|
||||
|
||||
# Directories
|
||||
SRC_DIR = src
|
||||
|
|
@ -16,12 +17,12 @@ all: $(TARGET)
|
|||
|
||||
# Build the main target
|
||||
$(TARGET): $(OBJ_FILES) | $(BUILD_DIR)
|
||||
$(NVCC) $(CXXFLAGS) -o $@ $^
|
||||
$(NVCC) $(CXXFLAGS) $^ -o $@
|
||||
|
||||
# Compile object files
|
||||
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cu
|
||||
@mkdir -p $(dir $@)
|
||||
$(NVCC) $(CXXFLAGS) -c $< -o $@
|
||||
$(NVCC) $(CXXFLAGS) $(COMPILE_OBJ_FLAGS) -c $< -o $@
|
||||
|
||||
# Debug build
|
||||
debug: CXXFLAGS += -g
|
||||
|
|
|
|||
|
|
@ -1,12 +1,19 @@
|
|||
#include "fielddata.h"
|
||||
|
||||
// __device__ float getVal(
|
||||
// const FieldMetadata &md,
|
||||
// const FieldData &d,
|
||||
// const size_t &timeInd,
|
||||
// const size_t &lonInd,
|
||||
// const size_t &latInd,
|
||||
// const size_t &levInd
|
||||
// ) {
|
||||
// return d.valArrays[0][timeInd];
|
||||
// }
|
||||
__device__ float getVal(
|
||||
const FieldMetadata &md,
|
||||
const FieldData &d,
|
||||
const size_t &timeInd,
|
||||
const size_t &levInd,
|
||||
const size_t &latInd,
|
||||
const size_t &lonInd
|
||||
) {
|
||||
size_t sizeSpatialData = md.widthSize*md.heightSize*md.depthSize;
|
||||
size_t size2DMapData = md.widthSize*md.heightSize;
|
||||
return d.valArrays[0][
|
||||
timeInd*sizeSpatialData
|
||||
+ levInd*size2DMapData
|
||||
+ latInd*md.widthSize
|
||||
+ lonInd
|
||||
];
|
||||
}
|
||||
|
|
@ -32,13 +32,13 @@ struct FieldData {
|
|||
|
||||
using FieldData = FieldData;
|
||||
|
||||
// __device__ float getVal(
|
||||
// const FieldMetadata &md,
|
||||
// const FieldData &d,
|
||||
// const size_t &timeInd,
|
||||
// const size_t &lonInd,
|
||||
// const size_t &latInd,
|
||||
// const size_t &levInd
|
||||
// );
|
||||
extern __device__ float getVal(
|
||||
const FieldMetadata &md,
|
||||
const FieldData &d,
|
||||
const size_t &timeInd,
|
||||
const size_t &levInd,
|
||||
const size_t &latInd,
|
||||
const size_t &lonInd
|
||||
);
|
||||
|
||||
#endif //FIELDDATA_H
|
||||
|
|
|
|||
|
|
@ -21,8 +21,6 @@ path(path), variableName(variableName) {
|
|||
readAndAllocateAxis<double>(&fmd->levs, &fmd->depthSize, vars.find("lev")->second);
|
||||
}
|
||||
|
||||
|
||||
|
||||
FieldData GPUBuffer::nextFieldData() {
|
||||
NcFile data(path, NcFile::read);
|
||||
|
||||
|
|
@ -47,8 +45,6 @@ FieldData GPUBuffer::nextFieldData() {
|
|||
var.getVar(h_array);
|
||||
|
||||
// Copy data to device
|
||||
// float *d_array;
|
||||
|
||||
cudaError_t status = cudaMalloc(&fd.valArrays[0], sizeof(float)*length);
|
||||
if (status != cudaSuccess)
|
||||
cout << "Error allocating memory: " << status << "\n";
|
||||
|
|
|
|||
31
src/main.cu
31
src/main.cu
|
|
@ -1,38 +1,25 @@
|
|||
#include "hurricanedata/fielddata.h"
|
||||
#include "hurricanedata/gpubuffer.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <device_launch_parameters.h>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
|
||||
__device__ float getVal(
|
||||
const FieldMetadata &md,
|
||||
const FieldData &d,
|
||||
const size_t &timeInd,
|
||||
const size_t &lonInd,
|
||||
const size_t &latInd,
|
||||
const size_t &levInd
|
||||
) {
|
||||
// TODO: Actaully implement function
|
||||
return d.valArrays[0][timeInd];
|
||||
}
|
||||
|
||||
// Not parallel computation
|
||||
__global__ void computeMean(float *ans, size_t *masked_vals, const FieldMetadata &fmd, FieldData fd) {
|
||||
__global__ void computeMean(float *ans, const FieldMetadata &fmd, FieldData fd) {
|
||||
float sum = 0;
|
||||
size_t num_not_masked_values = 0;
|
||||
size_t num_masked_values = 0;
|
||||
for (int i = 0; i < fmd.widthSize*fmd.heightSize*fmd.depthSize*fd.timeSize; i++) {
|
||||
double xi = getVal(fmd, fd, i, 0, 0, 0);
|
||||
for (int i = 0; i < fmd.widthSize; i++) {
|
||||
double xi = getVal(fmd, fd, 2, 20, 100, i);
|
||||
if (xi < 1E14) { /* If x is not missing value */
|
||||
num_not_masked_values++;
|
||||
sum += xi;
|
||||
} else {
|
||||
num_masked_values++;
|
||||
}
|
||||
}
|
||||
*ans = sum/num_not_masked_values;
|
||||
*masked_vals = num_masked_values;
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
|
@ -45,17 +32,13 @@ int main() {
|
|||
float *ptr_mean;
|
||||
cudaMallocManaged(&ptr_mean, sizeof(float));
|
||||
|
||||
size_t *ptr_masked;
|
||||
cudaMallocManaged(&ptr_masked, sizeof(size_t));
|
||||
|
||||
computeMean<<<1, 1>>>(ptr_mean, ptr_masked, *buffer.fmd, fd);
|
||||
computeMean<<<1, 1>>>(ptr_mean, *buffer.fmd, fd);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
std::cout << "Mean = " << *ptr_mean << " values where " << *ptr_masked << " are masked values.\n";
|
||||
std::cout << "Mean = " << std::fixed << std::setprecision(6) << *ptr_mean << "\n";
|
||||
|
||||
cudaFree(fd.valArrays[0]);
|
||||
cudaFree(ptr_mean);
|
||||
cudaFree(ptr_masked);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,11 +23,13 @@ print("Mean of U:", U_mean)
|
|||
print("Sum of U:", U_sum)
|
||||
|
||||
sumval = 0
|
||||
row = U[0,0,100]
|
||||
row = U[2,20,100]
|
||||
n = 0
|
||||
for val in row:
|
||||
if not np.ma.is_masked(val):
|
||||
n+=1
|
||||
sumval += val
|
||||
print(f"Why does {np.sum(row)=} not equal {sumval=} ?!")
|
||||
print(f"Why does {np.mean(row)=} not equal {sumval/n=} ?!")
|
||||
|
||||
# Close the NetCDF file
|
||||
ncfile.close()
|
||||
Loading…
Reference in New Issue