%% Handle PCA in case of Lager Volumn Datasets
% This code show how to compute Principal Component in case of
% lager volumn datasets. We load the samples one by one to compute
% the mean and variance. The co-variance matrix is obtained after
% vector operation. Then we use matlab function to compute the
% eigenvalues and eigen-vectors.
% Copyright Richard Hong.
% National University of Singapore
% $Revision: 1.1.6.7 $ $Date: 2008/12/13 15:53:31 $
function [EigValue,EigVec] = LPCA(strFileName,odim,ndim)
%% File Format
% input: strFileName: the input txt file include the specified path,
% rows are samples and columns are dimension of feature;
% ndim: the dimension should be reduced to;
% odim: the original dimension;
% output: EigValue: the eigenvalue of covariance;
% EigVec: the eigen-vactor of covariance;
%
% check the dim
if odim < ndim;
disp('ndim should lower than odim');
return;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% open the file for reading data
fid = fopen(strFileName, 'r');
disp('................Start to compute the mean of the samples.....................')
% compute the mean of the samples
vec = fscanf(fid,'%f',odim);
ind = 1; m_sumVec = zeros(length(vec),1);
while ( length(vec) > 0 )
m_sumVec = m_sumVec + vec;
vec = fscanf(fid,'%f',odim);
if ( mod(ind,10000) == 1 )
disp('Now processed:');disp(ind);
end
ind = ind + 1;
end
% count the number of the samples
num = ind - 1;
% compute the mean m_Vec
m_Vec = m_sumVec / num;
fclose(fid);
disp('....................Compute the means of the samples over!!...................')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp('................Start to substract the mean of the samples.....................')
% open the file for substract the mean
fod = fopen('temp.dat', 'wt');
% compute the substraction
fid = fopen(strFileName, 'r');
vec = fscanf(fid,'%f',odim);
ind = 1;
while ( length(vec) > 0 )
vec = vec - m_Vec; vec = vec';
% output the data into file
fprintf(fod,'%f\n',vec);
% read the data
vec = fscanf(fid,'%f',odim);
if ( mod(ind,10000) == 1 )
disp('Now processed:');disp(ind);
end
ind = ind + 1;
end
fclose(fod);
fclose(fid);
disp('................Substract the mean of the samples over!!.....................')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp('................Start to compute the covariance of the samples.....................')
% open the file for computing covariance
fid = fopen('temp.dat', 'r');
% generate the covariance
coV = zeros(odim,odim); ind = 1;
vec = fscanf(fid,'%f',odim);
while ( length(vec) > 0 )
coV = coV + vec * vec';
% read the data repeatedly
vec = fscanf(fid,'%f',odim);
ind = ind + 1;
if ( mod(ind,10000) == 1 )
disp('Now processed:');disp(ind);
end
end
fclose(fid);
% expand the up-diagonal to down-diagonal matrix
for i = 1:odim
for j = i+1:odim
coV(j,i) = coV(i,j);
end
end
coV = coV / num;
disp('................Compute the covariance of the samples over!.....................')
delete temp.dat;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Now the coV is the covaraiance matrix
% compute its eigen value and corresponding eigen vector
[V,D] = eig(coV);
EigValue = diag(D); EigVec = V;
% produce new basis
v_basis = V(:,odim-ndim+1:odim);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
disp('................Start to compute the projection after eigen decomposition....................')
% dimension reduction
fid = fopen(strFileName, 'r');
% the new sample set after reduced dimension.
fod = fopen('pcaReduced.txt', 'w');
vec = fscanf(fid,'%f',odim); ind = 1;
while ( length(vec) > 0 )
vec = vec' * v_basis;
% output the data into file
for i = 1:ndim
fprintf(fod,'%7.4f',vec(i));
end
fprintf(fod,'\n');
% read the data
vec = fscanf(fid,'%f',odim);
if ( mod(ind,10000) == 1 )
disp('Now processed:');disp(ind);
end
ind = ind + 1;
end
fclose(fid);
fclose(fod);