from poly2mask using GPU by Jun Tan
poly2mask using GPU.

poly2mask_gpu(x, y, m, n)
function mask = poly2mask_gpu(x, y, m, n)

persistent k_poly2mask;

if isempty(k_poly2mask)
    gpu = gpuDevice;
    k_poly2mask = parallel.gpu.CUDAKernel('poly2mask_cuda.ptx', 'poly2mask_cuda.cu');
    k_poly2mask.ThreadBlockSize = gpu.MaxThreadsPerBlock / 2; % Use only half of the max number of threads allowed. Don't know why but sometimes it just doesn't work if use more than half.
end

nMaskPoints = m * n;

nBlocks = ceil(nMaskPoints / k_poly2mask.ThreadBlockSize(1));
if nBlocks <= 1024 % Use no more than 1024 blocks per grid row.
    k_poly2mask.GridSize = nBlocks;
else
    k_poly2mask.GridSize = [1024 ceil(nBlocks/1024)];
end

g_mask = parallel.gpu.GPUArray.zeros(m, n, 'int32');
g_nMaskPoints = gpuArray(int32(nMaskPoints));
g_nPolygonPoints = gpuArray(int32(length(x)));
g_x = gpuArray(single(x));
g_y = gpuArray(single(y));
g_m = gpuArray(int32(m));
g_n = gpuArray(int32(n));

g_mask = feval(k_poly2mask, ...
    g_mask, g_nMaskPoints, g_nPolygonPoints, ...
    g_x, g_y, g_m, g_n);

mask = logical(gather(g_mask));

Contact us