Skip to content

Commit

Permalink
fix bug in projector sub-chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
kylechampley committed Nov 5, 2024
1 parent f884b4c commit 3de88c6
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/cuda_utils.cu
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ float getAvailableGPUmemory(int whichGPU)
std::size_t free_byte;
std::size_t total_byte;
cudaMemGetInfo(&free_byte, &total_byte);
return float(double(free_byte) / pow(2.0, 30.0));
return float(double(free_byte) / pow(2.0, 30.0)) * GPU_MEMORY_SAFETY_MULTIPLIER;
}
else
return 0.0;
Expand Down
2 changes: 2 additions & 0 deletions src/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
* This header and associated source file are for generic GPU-based functions that are used in LEAP
*/

#define GPU_MEMORY_SAFETY_MULTIPLIER 1.0

#ifndef __USE_CPU
#include "cuda_runtime.h"

Expand Down
5 changes: 4 additions & 1 deletion src/filtered_backprojection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,11 @@ bool filteredBackprojection::execute(float* g, float* f, parameters* params, boo
int numVolumeData = 1;
if (volume_on_cpu == false)
numVolumeData = 0;
int numProjectionData = 1;
//if (data_on_cpu == false)
// numProjectionData = 0;

if (params->hasSufficientGPUmemory(false, 0, 1, numVolumeData) == false)
if (params->hasSufficientGPUmemory(false, 0, numProjectionData, numVolumeData) == false)
{
printf("Error: insufficient GPU memory\n");
return false;
Expand Down
14 changes: 10 additions & 4 deletions src/projectors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@ bool projectors::project(float* g, float* f, parameters* params, bool data_on_cp
int numVolumeData = 1;
if (volume_on_cpu == false)
numVolumeData = 0;
int numProjectionData = 1;
if (data_on_cpu == false)
numProjectionData = 0;

if (data_on_cpu)
{
if (params->hasSufficientGPUmemory(false, 0, 1, numVolumeData) == false)
if (params->hasSufficientGPUmemory(false, 0, numProjectionData, numVolumeData) == false)
{
printf("Error: insufficient GPU memory\n");
return false;
Expand All @@ -73,9 +76,9 @@ bool projectors::project(float* g, float* f, parameters* params, bool data_on_cp
else if (params->muSpecified())
return project_attenuated(g, f, params, data_on_cpu);
else if (params->geometry == parameters::MODULAR)
return project_Joseph_modular(g, f, params, data_on_cpu);
return project_Joseph_modular(g, f, params, data_on_cpu, volume_on_cpu, accumulate);
else
return project_SF(g, f, params, data_on_cpu);
return project_SF(g, f, params, data_on_cpu, volume_on_cpu, accumulate);
}
#endif
else
Expand Down Expand Up @@ -144,10 +147,13 @@ bool projectors::backproject(float* g, float* f, parameters* params, bool data_o
int numVolumeData = 1;
if (volume_on_cpu == false)
numVolumeData = 0;
int numProjectionData = 1;
if (data_on_cpu == false)
numProjectionData = 0;

if (data_on_cpu)
{
if (params->hasSufficientGPUmemory(false, 0, 1, numVolumeData) == false)
if (params->hasSufficientGPUmemory(false, 0, numProjectionData, numVolumeData) == false)
{
printf("Error: insufficient GPU memory\n");
return false;
Expand Down
54 changes: 46 additions & 8 deletions src/tomographic_models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,8 @@ bool tomographicModels::project_multiGPU(float* g, float* f)

float memAvailable = getAvailableGPUmemory(params.whichGPUs);

//memAvailable = 2.0; // FIXME!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

// Calculate the minimum number of rows one would like to calculate at a time before the volume must be divided further
// We want to make sure that this lower bound is set low enough to leave extra room for the volume data
// For now let's make it so it can't occupy more than half the memory
Expand Down Expand Up @@ -842,9 +844,13 @@ bool tomographicModels::project_multiGPU(float* g, float* f)
}
else
{
// Further reduce number of volume slices
// memAvailable => params.projectionDataSize() * float(numRows) / float(params.numRows) + params.volumeDataSize() * float(numSlices_stage2) / float(params.numZ)
int numSlices_stage2 = std::max(1, int(floor((memAvailable - params.projectionDataSize() * float(numRows) / float(params.numRows)) * float(params.numZ) / params.volumeDataSize())));
int numChunks_z = std::max(1, int(ceil(float(numSlices) / float(numSlices_stage2))));
numSlices_stage2 = std::max(1, int(ceil(float(numSlices) / float(numChunks_z)))); // make each chunk approximately equal in size

LOG(logDEBUG, className, "") << "GPU " << params.whichGPUs[omp_get_thread_num()] << ": numSices = " << numSlices_stage2 << ", numRows = " << numRows << std::endl;

float* dev_g = 0;
cudaError_t cudaStatus;
Expand Down Expand Up @@ -879,7 +885,7 @@ bool tomographicModels::project_multiGPU(float* g, float* f)
chunk_params.whichGPU = params.whichGPUs[omp_get_thread_num()];
chunk_params.whichGPUs.clear();
if (params.mu != NULL)
chunk_params.mu = &params.mu[uint64(sliceRange[0]) * uint64(params.numX * params.numY)];
chunk_params.mu = &params.mu[uint64(sliceRange_stage2[0]) * uint64(params.numX * params.numY)];

LOG(logDEBUG, className, "") << "GPU " << chunk_params.whichGPU << ": volume: z-slices: (" << sliceRange_stage2[0] << ", " << sliceRange_stage2[1] << "), rows = (" << firstRow << ", " << lastRow << ")" << std::endl;

Expand Down Expand Up @@ -1072,13 +1078,18 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)

int numProjectionData = 1;
int numVolumeData = 1;
if (params.mu != NULL)
numVolumeData += 1;
if (doFBP)
numProjectionData = 2; // need an extra for texture memory

int numViewsPerChunk = params.numAngles;
int numViewChunks = 1;
//int numViewsPerChunk = params.numAngles;
//int numViewChunks = 1;

float memAvailable = getAvailableGPUmemory(params.whichGPUs);
LOG(logDEBUG, className, "") << "GPU memory available: " << memAvailable << " GB" << std::endl;

//memAvailable = 0.5; // FIXME!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

// Calculate the minimum number of slices one would like to calculate at a time before jobs are broken across views
// We want to make sure that this lower bound is set low enough so that the volume will still fit into memory
Expand All @@ -1095,8 +1106,22 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)
int numChunks = std::max(1, int(ceil(float(params.numZ) / float(numSlicesPerChunk))));
if (params.hasSufficientGPUmemory(true, extraCols, numProjectionData, numVolumeData) == false)
{
float memNeeded = backproject_memoryRequired(numSlicesPerChunk, extraCols, doFBP, numViewsPerChunk);
//*
float memNeeded = backproject_memoryRequired(numSlicesPerChunk, extraCols, doFBP);
while (memAvailable < memNeeded)
{
numSlicesPerChunk = numSlicesPerChunk / 2;
if (numSlicesPerChunk <= minSlicesForChunking_local)
{
numSlicesPerChunk = minSlicesForChunking_local;
break;
}
memNeeded = backproject_memoryRequired(numSlicesPerChunk, extraCols, doFBP);
}
//*/

/*
float memNeeded = backproject_memoryRequired(numSlicesPerChunk, extraCols, doFBP, numViewsPerChunk);
while (memAvailable < memNeeded)
{
if (numSlicesPerChunk <= minSlicesForChunking_local)
Expand All @@ -1107,8 +1132,9 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)
return false;
memNeeded = backproject_memoryRequired(numSlicesPerChunk, extraCols, doFBP, numViewsPerChunk);
}
numChunks = std::max(1, int(ceil(float(params.numZ) / float(numSlicesPerChunk))));
numViewChunks = std::max(1, int(ceil(float(params.numAngles) / float(numViewsPerChunk))));
//*/
numChunks = std::max(1, int(ceil(float(params.numZ) / float(numSlicesPerChunk))));
}
else if (int(params.whichGPUs.size()) <= 1 || params.requiredGPUmemory(extraCols, numProjectionData, numVolumeData) <= params.chunkingMemorySizeThreshold)
return false;
Expand All @@ -1128,8 +1154,6 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)
return false;
}

LOG(logDEBUG, className, "") << "using volume slab size of " << numSlicesPerChunk << " and number of angles: " << numViewsPerChunk << std::endl;

bool retVal = true;

omp_set_num_threads(std::min(int(params.whichGPUs.size()), omp_get_num_procs()));
Expand Down Expand Up @@ -1170,8 +1194,16 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)
chunk_params.whichGPU = params.whichGPUs[omp_get_thread_num()];
chunk_params.whichGPUs.clear();

if (numViewChunks <= 1)
// Calculate the view chunking
float memForVolume = numVolumeData * params.volumeDataSize() * float(numSlices) / float(params.numZ);
float memForOneProjection = numProjectionData * params.projectionDataSize(extraCols) * (chunk_params.numRows / float(params.numRows)) / float(params.numAngles);
// memAvailable > memForVolume + numViewsPerChunk * memForOneProjection
int numViewsPerChunk = std::max(1, std::min(params.numAngles, int(floor((memAvailable - memForVolume) / memForOneProjection))));
int numViewChunks = std::max(1, int(ceil(float(params.numAngles) / float(numViewsPerChunk))));
if (numViewsPerChunk == params.numAngles)
{
LOG(logDEBUG, className, "") << "using volume slab size of " << numSlicesPerChunk << std::endl;

//*
float* g_chunk = NULL;
if (rowRange[0] == 0 && rowRange[1] == params.numRows - 1)
Expand All @@ -1197,6 +1229,12 @@ bool tomographicModels::backproject_FBP_multiGPU(float* g, float* f, bool doFBP)
}
else
{
numViewsPerChunk = int(ceil(float(params.numAngles) / float(numViewChunks)));
numViewChunks = std::max(1, int(ceil(float(params.numAngles) / float(numViewsPerChunk))));
LOG(logDEBUG, className, "") << "using volume slab size of " << numSlicesPerChunk << " and number of angles: " << numViewsPerChunk << std::endl;
//LOG(logDEBUG, className, "") << "memForVolume = " << memForVolume << std::endl;
//LOG(logDEBUG, className, "") << "memForOneProjection = " << memForOneProjection << std::endl;

cudaError_t cudaStatus;
cudaSetDevice(chunk_params.whichGPU);
float* dev_f = 0;
Expand Down
10 changes: 6 additions & 4 deletions unitTests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
pixelSize = 1.03*2.0*250.0/512 # 0.5*512*x=240
centerRow = 0.5*(numRows-1)
centerCol = 0.5*(numCols-1)
#sdd = 765
sdd = 1500
sdd = 765
#sdd = 1500
sod = 0.5*sdd

#centerCol += centerCol*0.1
Expand All @@ -24,6 +24,7 @@

includeEar = False

geometries = list(range(6))
voxelScales = [0.5, 1.0, 2.0]
angularRanges = [200.0, 360.0]
projection_methods = ['VD', 'SF']
Expand All @@ -34,9 +35,10 @@
#voxelScales = [1.0]
#voxelScales = [2.0]
#projection_methods = ['VD']
#geometries = [3]


for igeom in range(6):
for ii in range(len(geometries)):
igeom = geometries[ii]
leapct.reset()
#leapct.set_rampFilter(12)
geo = None
Expand Down

0 comments on commit 3de88c6

Please sign in to comment.