function data_out=fftGPU(data_in,sign)
% FFTGPU compute fft on the columns of the matrix data_in
%
% assuming that the MEX-file gpu_FFT is compliled
%
%
% fft minimal size = 16 for complex data and 32 for real data, it must be
% a power of 2
%
% Number of fft must be 1 or a power of 2
%
% data_out is the same answer as the fft function of matlab
%
% sign: 1 for FORWARD_FFT
% -1 for BACKWARD_FFT
%
% WARNING: the fft is done in single precision
%
%
% (c) Simon Potvin and Jerome Genest, July 2006
% Centre d'optique, photonique et laser (COPL)
% Universite Laval
% Quebec, Canada
%
% Mail : simon.potvin.1@ulaval.ca
Npoints=size(data_in);
I=sqrt(-1);
j=Npoints(1,1)/2;
if(isreal(data_in))
tmp=data_in;
data_out=zeros(Npoints(1,1),Npoints(1,2))+I;
data_in=transpose(data_in);
data_in=single(data_in);
gpu_FFT(data_in,sign);
data_in=reshape(data_in,2,[]);
if(sign~=1)
for i=0:Npoints(1,2)-1
w=i+1;
wj=w*j;
y=i*j+1;
z=y+1;
data_out(1,w)=data_in(1,y);
data_out(j+1,w)=data_in(2,y);
data_out(2:j,w)=fliplr(data_in(1,z:wj))+fliplr(I*data_in(2,z:wj));
data_out(j+2:end,w)=data_in(1,z:wj)-I*data_in(2,z:wj);
end
else
for i=0:Npoints(1,2)-1
w=i+1;
wj=w*j;
y=i*j+1;
z=y+1;
data_out(1,w)=data_in(1,y);
data_out(j+1,w)=data_in(2,y);
data_out(2:j,w)=data_in(1,z:wj)-I*data_in(2,z:wj);
data_out(j+2:end,w)=fliplr(data_in(1,z:wj))+fliplr(I*data_in(2,z:wj));
end
end
data_in=tmp;
else
data_out=data_in;
data_out=transpose(data_out);
data_out=single(data_out);
gpu_FFT(data_out,sign);
data_out=reshape(data_out,Npoints(1,1),[]);
end