Code covered by the BSD License  

Highlights from
A CUDA accelerated Beam Propagation Method [BPM] Solver using the Parallel Computing Toolbox

image thumbnail

A CUDA accelerated Beam Propagation Method [BPM] Solver using the Parallel Computing Toolbox

by

 

21 Oct 2010 (Updated )

A beam propagation method solver using the CUDA capabilities in the parallel computing toolbox.

CUDAparaxbpm(CUDAswitch, PLOTswitch, BetaSwitch, Nx, Ny, LengthX, LengthY, LengthZ, deltaZ, Lambda)
%CUDABEAMPROP A CUDA-MATLAB implementation of a simple 2D FFT based
% paraxial beam propagation method
%
% dE/dz = 1/(2ibeta)( (d/dx^2 + d/dy^2) - (beta^2 - kzero^2*n^2) )E
%
% Author: Patrick Kano, Paul Lundquist, Eric Nelson-Melby
% Applied Energetics, Inc.
% Modification Dates [M/D/Y]: 10/21/10 - Initial script work
% References: 
% www.mathworks.com/discovery/matlab-gpu.html
% A. Weideman, Linear Dispersive Wave Equations, 
% http://dip.sun.ac.za/~weideman/research/waves.html
% Okamoto, K.  Fundamentals of optical waveguides, Academic Press, 2000.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [RunTimeOut,Efield,VecposX,VecposY] = CUDAparaxbpm(CUDAswitch, PLOTswitch, BetaSwitch, Nx, Ny, LengthX, LengthY, LengthZ, deltaZ, Lambda)

if(nargin==0)
 CUDAswitch = 1; %0-CPU fft2, 1-GPU fft2
 PLOTswitch = 1; %0-No plots, 1-With plots
 BetaSwitch = 1; %0-Constant beta, 1-Update beta

 LengthX = 4*10^(-3);  %m
 LengthY = 4*10^(-3);  %m
 LengthZ = 5*10^(-3);     %m
 deltaZ = LengthZ/1000; %m

 Lambda = 1000*10^(-6); %m

 Nx = 2^6; %Must be 2^N
 Ny = 2^6; %Must be 2^N
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%DO NOT MODIFY BELOW THIS LINE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;

%Check for the parallel computing toolbox is installed
vcheck = ver;
Texists = 0;
for vidx=1:length(vcheck)
 vname = getfield(vcheck(vidx),'Name');
 vversion = str2num(getfield(vcheck(vidx),'Version'));
 if(strcmp(vname,'Parallel Computing Toolbox')==1 && 5.0<=vversion), Texists=1; end     
end

if(Texists==0)
 disp('The parallel computing toolbox of version 5.0 or higher is not available.');
 disp('Defaulting to a computation with standard MATLAB.');
 CUDAswitch = 0;
else
 %Check that a GPU is aviable.  
 %If it does not exist, compute with standard MATLAB.

 %If a GPU does exist, check that it has sufficient capability.
 %If does not have sufficient capability, return an error.

 NumGPUs = gpuDeviceCount;
 if NumGPUs==0
  disp('No GPU detected.  Computation with standard MATLAB.');
  CUDAswitch = 0;
 else
  gpuProperties = gpuDevice;

  if(str2num(gpuProperties.ComputeCapability)<1.3), error(['Insufficient computing capability = ',str2num(gpuProperties.ComputeCapability)]); end
 end
end

kzerolambda = 2*pi/Lambda;

%%Setup the grid and the wavenumbers
nXidx = -Nx:Nx-1;
deltaX = LengthX/Nx;
VecposX = nXidx*deltaX; %[-Lx,Lx]
VeckX = nXidx*pi/LengthX;

nYidx = -Ny:Ny-1;
deltaY = LengthY/Ny;
VecposY = nYidx*deltaY; %[-Ly,Ly]
VeckY = nYidx*pi/LengthY;

Nz = floor(LengthZ/deltaZ);
VecposZ = linspace(0,LengthZ,Nz);

[MeshX, MeshY] = meshgrid(VecposX,VecposY);
[MeshkX, MeshkY] = meshgrid(VeckX,VeckY);

MeshSquarekxy = MeshkX.*MeshkX + MeshkY.*MeshkY;
MeshSquarekxy = fftshift(MeshSquarekxy);

%%Intiailize the E-field to ones
Efield = FunEfieldinit(MeshX, MeshY, Nx, Ny, LengthX, LengthY);

nMatrix = FunRefractIdx(MeshX, MeshY, Nx, Ny, 0);

Beta = kzerolambda*sqrt( sum(sum(abs(nMatrix.*Efield).^2)) / sum(sum(abs(Efield).^2))); %Ignore spatial E-gradients
BetaStoreVec = kzerolambda*ones(Nz,1);

FreeSpaceProp = exp(1i*deltaZ*MeshSquarekxy/(2*Beta));
%if CUDAswitch==1, gFreeSpaceProp = gpuArray(FreeSpaceProp);  end %Put on the GPU

%%Z-updates
for zidx=1:Nz
 PosZ = VecposZ(zidx);
 nMatrix = FunRefractIdx(MeshX, MeshY, Nx, Ny, PosZ);

 if BetaSwitch==1 %Updated beta methods
  Beta = kzerolambda*sqrt( sum(sum(abs(nMatrix.*Efield).^2)) / sum(sum(abs(Efield).^2))); %Ignore spatial E-gradients
  BetaStoreVec(zidx) = Beta; 
   
  FreeSpaceProp = exp(1i*deltaZ*MeshSquarekxy/(2*Beta));
  %if CUDAswitch==1, gFreeSpaceProp = gpuArray(FreeSpaceProp);  end %Put on the GPU   
 end
  
 PhaseScreen = exp((0.5*deltaZ/(-2i*Beta))*(Beta*Beta*ones(2*Nx,2*Ny) - kzerolambda*kzerolambda*nMatrix.^2));
 
 %Strang-splitting
 Efield = PhaseScreen.*Efield;
 if CUDAswitch==0, Efield = ifft2( FreeSpaceProp.*fft2(Efield) );
 else Efield = gather(ifft2( gpuArray(FreeSpaceProp).*fft2( gpuArray(Efield)) ));     
 end
 Efield = PhaseScreen.*Efield;
 
 if PLOTswitch==1, FunPlotSim(zidx, PosZ, MeshX, MeshY, nMatrix, Efield, MeshkX, MeshkY, MeshSquarekxy); end   
end %z for loop 

if(PLOTswitch==1)
 figure(1004);
 plot(VecposZ,BetaStoreVec,'-ko');
 xlabel('z [m]');
 ylabel('\beta');
end 
 
RunTimeOut=toc;
end %function definition
%end of file
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Contact us