Code covered by the BSD License

# A CUDA accelerated Beam Propagation Method [BPM] Solver using the Parallel Computing Toolbox

### Patrick Kano (view profile)

21 Oct 2010 (Updated )

A beam propagation method solver using the CUDA capabilities in the parallel computing toolbox.

CUDAparaxbpm(CUDAswitch, PLOTswitch, BetaSwitch, Nx, Ny, LengthX, LengthY, LengthZ, deltaZ, Lambda)
%CUDABEAMPROP A CUDA-MATLAB implementation of a simple 2D FFT based
% paraxial beam propagation method
%
% dE/dz = 1/(2ibeta)( (d/dx^2 + d/dy^2) - (beta^2 - kzero^2*n^2) )E
%
% Author: Patrick Kano, Paul Lundquist, Eric Nelson-Melby
% Applied Energetics, Inc.
% Modification Dates [M/D/Y]: 10/21/10 - Initial script work
% References:
% www.mathworks.com/discovery/matlab-gpu.html
% A. Weideman, Linear Dispersive Wave Equations,
% http://dip.sun.ac.za/~weideman/research/waves.html
% Okamoto, K.  Fundamentals of optical waveguides, Academic Press, 2000.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [RunTimeOut,Efield,VecposX,VecposY] = CUDAparaxbpm(CUDAswitch, PLOTswitch, BetaSwitch, Nx, Ny, LengthX, LengthY, LengthZ, deltaZ, Lambda)

if(nargin==0)
CUDAswitch = 1; %0-CPU fft2, 1-GPU fft2
PLOTswitch = 1; %0-No plots, 1-With plots
BetaSwitch = 1; %0-Constant beta, 1-Update beta

LengthX = 4*10^(-3);  %m
LengthY = 4*10^(-3);  %m
LengthZ = 5*10^(-3);     %m
deltaZ = LengthZ/1000; %m

Lambda = 1000*10^(-6); %m

Nx = 2^6; %Must be 2^N
Ny = 2^6; %Must be 2^N
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%DO NOT MODIFY BELOW THIS LINE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;

%Check for the parallel computing toolbox is installed
vcheck = ver;
Texists = 0;
for vidx=1:length(vcheck)
vname = getfield(vcheck(vidx),'Name');
vversion = str2num(getfield(vcheck(vidx),'Version'));
if(strcmp(vname,'Parallel Computing Toolbox')==1 && 5.0<=vversion), Texists=1; end
end

if(Texists==0)
disp('The parallel computing toolbox of version 5.0 or higher is not available.');
disp('Defaulting to a computation with standard MATLAB.');
CUDAswitch = 0;
else
%Check that a GPU is aviable.
%If it does not exist, compute with standard MATLAB.

%If a GPU does exist, check that it has sufficient capability.
%If does not have sufficient capability, return an error.

NumGPUs = gpuDeviceCount;
if NumGPUs==0
disp('No GPU detected.  Computation with standard MATLAB.');
CUDAswitch = 0;
else
gpuProperties = gpuDevice;

if(str2num(gpuProperties.ComputeCapability)<1.3), error(['Insufficient computing capability = ',str2num(gpuProperties.ComputeCapability)]); end
end
end

kzerolambda = 2*pi/Lambda;

%%Setup the grid and the wavenumbers
nXidx = -Nx:Nx-1;
deltaX = LengthX/Nx;
VecposX = nXidx*deltaX; %[-Lx,Lx]
VeckX = nXidx*pi/LengthX;

nYidx = -Ny:Ny-1;
deltaY = LengthY/Ny;
VecposY = nYidx*deltaY; %[-Ly,Ly]
VeckY = nYidx*pi/LengthY;

Nz = floor(LengthZ/deltaZ);
VecposZ = linspace(0,LengthZ,Nz);

[MeshX, MeshY] = meshgrid(VecposX,VecposY);
[MeshkX, MeshkY] = meshgrid(VeckX,VeckY);

MeshSquarekxy = MeshkX.*MeshkX + MeshkY.*MeshkY;
MeshSquarekxy = fftshift(MeshSquarekxy);

%%Intiailize the E-field to ones
Efield = FunEfieldinit(MeshX, MeshY, Nx, Ny, LengthX, LengthY);

nMatrix = FunRefractIdx(MeshX, MeshY, Nx, Ny, 0);

Beta = kzerolambda*sqrt( sum(sum(abs(nMatrix.*Efield).^2)) / sum(sum(abs(Efield).^2))); %Ignore spatial E-gradients
BetaStoreVec = kzerolambda*ones(Nz,1);

FreeSpaceProp = exp(1i*deltaZ*MeshSquarekxy/(2*Beta));
%if CUDAswitch==1, gFreeSpaceProp = gpuArray(FreeSpaceProp);  end %Put on the GPU

for zidx=1:Nz
PosZ = VecposZ(zidx);
nMatrix = FunRefractIdx(MeshX, MeshY, Nx, Ny, PosZ);

if BetaSwitch==1 %Updated beta methods
Beta = kzerolambda*sqrt( sum(sum(abs(nMatrix.*Efield).^2)) / sum(sum(abs(Efield).^2))); %Ignore spatial E-gradients
BetaStoreVec(zidx) = Beta;

FreeSpaceProp = exp(1i*deltaZ*MeshSquarekxy/(2*Beta));
%if CUDAswitch==1, gFreeSpaceProp = gpuArray(FreeSpaceProp);  end %Put on the GPU
end

PhaseScreen = exp((0.5*deltaZ/(-2i*Beta))*(Beta*Beta*ones(2*Nx,2*Ny) - kzerolambda*kzerolambda*nMatrix.^2));

%Strang-splitting
Efield = PhaseScreen.*Efield;
if CUDAswitch==0, Efield = ifft2( FreeSpaceProp.*fft2(Efield) );
else Efield = gather(ifft2( gpuArray(FreeSpaceProp).*fft2( gpuArray(Efield)) ));
end
Efield = PhaseScreen.*Efield;

if PLOTswitch==1, FunPlotSim(zidx, PosZ, MeshX, MeshY, nMatrix, Efield, MeshkX, MeshkY, MeshSquarekxy); end
end %z for loop

if(PLOTswitch==1)
figure(1004);
plot(VecposZ,BetaStoreVec,'-ko');
xlabel('z [m]');
ylabel('\beta');
end

RunTimeOut=toc;
end %function definition
%end of file
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%