from
poly2mask using GPU
by Jun Tan
poly2mask using GPU.
|
| poly2mask_gpu(x, y, m, n)
|
function mask = poly2mask_gpu(x, y, m, n)
persistent k_poly2mask;
if isempty(k_poly2mask)
gpu = gpuDevice;
k_poly2mask = parallel.gpu.CUDAKernel('poly2mask_cuda.ptx', 'poly2mask_cuda.cu');
k_poly2mask.ThreadBlockSize = gpu.MaxThreadsPerBlock / 2; % Use only half of the max number of threads allowed. Don't know why but sometimes it just doesn't work if use more than half.
end
nMaskPoints = m * n;
nBlocks = ceil(nMaskPoints / k_poly2mask.ThreadBlockSize(1));
if nBlocks <= 1024 % Use no more than 1024 blocks per grid row.
k_poly2mask.GridSize = nBlocks;
else
k_poly2mask.GridSize = [1024 ceil(nBlocks/1024)];
end
g_mask = parallel.gpu.GPUArray.zeros(m, n, 'int32');
g_nMaskPoints = gpuArray(int32(nMaskPoints));
g_nPolygonPoints = gpuArray(int32(length(x)));
g_x = gpuArray(single(x));
g_y = gpuArray(single(y));
g_m = gpuArray(int32(m));
g_n = gpuArray(int32(n));
g_mask = feval(k_poly2mask, ...
g_mask, g_nMaskPoints, g_nPolygonPoints, ...
g_x, g_y, g_m, g_n);
mask = logical(gather(g_mask));
|
|
Contact us