moved times to metadata, improved freeing, created example

This commit is contained in:
Robin 2024-12-26 17:23:07 +01:00
parent c4a2ce1b44
commit 3c8e68ac1d
7 changed files with 33 additions and 48 deletions

View File

@ -49,7 +49,6 @@ void DataReader::loadFile(T* dataOut, size_t fileIndex, const string& varName) {
multimap<string, NcVar> vars = data.getVars();
NcVar var = vars.find(varName)->second;
cout << "var = " << varName << "with size = " << var.getDim(0).getSize() << "\n";
var.getVar(dataOut);
}

View File

@ -17,6 +17,8 @@ struct FieldMetadata {
size_t timeSize; // Number of different times
// times is a managed Unified Memory array of size numberOfTimeStepsPerFile
int *times;
size_t numberOfTimeStepsPerFile;
};
@ -29,9 +31,6 @@ struct FieldData {
float **valArrays;
size_t fieldInd;
// times is a managed Unified Memory array of size (FILESNUM, numberOfTimeStepsPerFile)
int **times;
};

View File

@ -18,8 +18,6 @@ struct File {
size_t size;
float *h_data; // host data
float *d_data; // device data
int *times;
size_t timeSize;
};
struct LoadFileJob {
@ -143,10 +141,8 @@ GPUBuffer::impl::impl(DataReader& dataReader): dataReader(dataReader) {
lock_guard<mutex> lk(file.m);
cudaMallocHost(&file.h_data, sizeof(float)*size);
cudaMalloc(&file.d_data, sizeof(float)*size);
cudaMallocManaged(&file.times, sizeof(int)*sizeTime);
file.size = size;
file.valid = false;
file.timeSize = sizeTime;
}
// loadFile(i, i);
}
@ -165,20 +161,17 @@ GPUBuffer::impl::~impl() {
for (size_t i = 0; i < numBufferedFiles; i++) {
File &file = buffer[i];
cudaFree(file.d_data);
cudaFree(file.h_data);
cudaFree(file.times);
cudaFreeHost(file.h_data);
}
cudaStreamDestroy(iostream);
}
void GPUBuffer::impl::loadFile(LoadFileJob job) {
File &file = buffer[job.bufferIndex];
{
lock_guard<mutex> lk(file.m);
assert(!file.valid);
dataReader.loadFile<int>(file.times, job.fileIndex, "time"); // TODO: Times dont store anything useful :(
cout << "times[1] (inside inside) " << file.times[1] << " for file with fileindex = " << job.fileIndex << "\n";
cout << "loading file with index " << job.fileIndex << "\n";
dataReader.loadFile<float>(file.h_data, job.fileIndex);
cudaMemcpyAsync(file.d_data, file.h_data, sizeof(float)*file.size, cudaMemcpyHostToDevice, iostream);
cudaStreamSynchronize(iostream);
@ -278,8 +271,6 @@ DataHandle GPUBuffer::getDataHandle(size_t bufferIndex) {
DataHandle dh = {
.d_data = file.d_data,
.times = file.times,
.timeSize = file.timeSize,
.size = file.size
};
return dh;

View File

@ -9,8 +9,6 @@
struct DataHandle {
float *d_data; // Device memory
int* times; // Uniform memory
size_t timeSize;
size_t size;
};

View File

@ -21,28 +21,31 @@ gpuBuffer(gpuBuffer), fieldInd(0), bufferInd(0), fileInd(0) {
fmd->depthSize = depthSize;
fmd->levs = levs;
for (size_t i = 0; i < GPUBuffer::numBufferedFiles; i++) {
gpuBuffer.loadFile(fileInd,fileInd);
fileInd++;
}
fmd->numberOfTimeStepsPerFile = 4; // TODO: Maybe find a better way to do this.
fmd->timeSize = GPUBufferHandler::numberOfTimeStepsPerField;
cudaMallocManaged(&fmd->times, sizeof(fmd->numberOfTimeStepsPerFile*sizeof(int)));
auto [numberOfTimeStepsPerFile, times] = gpuBuffer.getAxis<int>(0, "time");
fmd->numberOfTimeStepsPerFile = numberOfTimeStepsPerFile;
fmd->times = times;
cudaMallocManaged(&valArrays, sizeof(float *)*FieldData::FILESNUM);
}
FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
FieldData fd;
cudaMallocManaged(&fd.valArrays, sizeof(sizeof(float *)*FieldData::FILESNUM));
cudaMallocManaged(&fd.times, sizeof(sizeof(int *)*FieldData::FILESNUM));
size_t fieldDataInd = 0;
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << "\n";
fd.valArrays = valArrays;
cout << "getting field from files " << bufferInd << " to " << newEndBufferInd << " with a field index of " << fieldInd << "\n";
for (int i = bufferInd; i <= newEndBufferInd; i++) {
cout << "getting handle for " << i << "\n";
DataHandle x = gpuBuffer.getDataHandle(i);
fd.valArrays[fieldDataInd] = x.d_data;
fd.times[fieldDataInd] = x.times;
fieldDataInd++;
}
fd.fieldInd = fieldInd;
@ -51,7 +54,6 @@ FieldData GPUBufferHandler::setupField(size_t newEndBufferInd) {
}
FieldData GPUBufferHandler::nextFieldData() {
DataHandle x = gpuBuffer.getDataHandle(bufferInd);
size_t newFieldInd = (fieldInd + 1) % fmd->numberOfTimeStepsPerFile;
size_t newBufferInd = (bufferInd + ((fieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
@ -62,25 +64,17 @@ FieldData GPUBufferHandler::nextFieldData() {
size_t newEndFieldInd = (endFieldInd + 1) % fmd->numberOfTimeStepsPerFile;
size_t newEndBufferInd = (endBufferInd + ((endFieldInd + 1) / fmd->numberOfTimeStepsPerFile)) % GPUBuffer::numBufferedFiles;
// size_t newBufferInd = (bufferInd + 1) % GPUBuffer::numBufferedFiles;
// size_t newFieldInd = (fieldInd + ((bufferInd + 1) / 4)) % x.timeSize;
// size_t endBufferInd = (bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) % GPUBuffer::numBufferedFiles;
// size_t endFieldInd = (fieldInd + ((bufferInd + GPUBufferHandler::numberOfTimeStepsPerField) / 4)) % x.timeSize;
// size_t newEndBufferInd = (endBufferInd + 1) % GPUBuffer::numBufferedFiles;
// size_t newEndFieldInd = (endFieldInd + ((endBufferInd + 1) / 4)) % x.timeSize;
if(firstTimeStep) {
firstTimeStep = false;
return setupField(endFieldInd);
return setupField(endBufferInd);
}
fieldInd = newFieldInd;
if (newBufferInd != bufferInd) {
fileInd++;
gpuBuffer.loadFile(fileInd, bufferInd);
bufferInd = newBufferInd;
fieldInd = newFieldInd;
}
if (newEndBufferInd != endBufferInd) {
@ -94,5 +88,6 @@ GPUBufferHandler::~GPUBufferHandler() {
cudaFree(fmd->lons);
cudaFree(fmd->lats);
cudaFree(fmd->levs);
cudaFree(valArrays);
cudaFree(fmd);
}

View File

@ -18,6 +18,8 @@ public:
static constexpr size_t numberOfTimeStepsPerField = 2; // TODO: Move this to fielddata
static void freeFieldData();
private:
FieldData setupField(size_t endBufferInd);
GPUBuffer& gpuBuffer;
@ -25,6 +27,8 @@ private:
size_t bufferInd;
size_t fieldInd;
bool firstTimeStep = true;
float **valArrays;
};
#endif //GPUBUFFERHANDLER_H

View File

@ -16,7 +16,7 @@ __global__ void getSingleValue(float *ans, const FieldMetadata &fmd, FieldData f
}
int main() {
std::string path = "data";
std::string path = "data/atmosphere_MERRA-wind-speed[179253532]";
std::string variable = "T";
@ -31,24 +31,23 @@ int main() {
GPUBufferHandler bufferHandler(buffer);
std::cout << "created buffer handler\n";
auto fd = bufferHandler.nextFieldData();
std::cout << "aquired field\n";
float *ptr_test_read;
cudaMallocManaged(&ptr_test_read, sizeof(float));
getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
std::cout << "created buffer handler\n";
for (int i = 0; i < 10; i++) {
FieldData fd = bufferHandler.nextFieldData();
cudaDeviceSynchronize();
getSingleValue<<<1, 1>>>(ptr_test_read, *bufferHandler.fmd, fd);
std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
cudaDeviceSynchronize();
// TODO: Write a example loop using buffering and measure it.
std::cout << "ptr_test_read = " << std::fixed << std::setprecision(6) << *ptr_test_read << "\n";
}
// TODO: Write an example loop using buffering and measure it.
cudaFree(fd.valArrays[0]); // TODO: Free data properly in FieldData (maybe make an iterator)
// TODO: Free data properly in FieldData (maybe make an iterator)
cudaFree(ptr_test_read);
return 0;
}