%function INDFunc
%
%This function willl calculate the value of the indicator function. These
%values are used to predict the optimal number of principal components one
%needs from a PCA-analysis. The indicator function has, according to
%Elbergali et al. [1], a better predictive value of this optimal number of
%principal components.
%
%Syntax:
% Ind = INDFunc (X, T, l, full)
% Ind = INDFunc (X, T, l)
% Ind = INDFunc (X, T)
%
%Input parameters:
% - X: your original X-data
% - T: the corresponding score matrix
% - l: the numbers of principal components of which the indicator
% function needs to be calculated (has no influence if full ==1),
% optional: if not given then full = 1
% - full: if 1, then the value of the indicator function will be
% calculated for all principal components, default 0
%
%Output parameters:
% - Ind: the value of the indicator function
%
%Literature:
% 1) Elbergali, A. et al. Analytica chimica acta, 379, 1999, 143-158
%The Biodata toolbox for MATLAB: a spectral database system for storing and
%processing spectra
%C 2003-2008, Kris De Gussem, Raman Spectroscopy Research Group, Department
%of analytical chemistry, Ghent University
%C 2009 Kris De Gussem
%
%This file is part of Biodata.
%
%Biodata is free software: you can redistribute it and/or modify
%it under the terms of the GNU General Public License as published by
%the Free Software Foundation, either version 3 of the License, or
%(at your option) any later version.
%
%Biodata is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
%GNU General Public License for more details.
%
%You should have received a copy of the GNU General Public License
%along with Biodata. If not, see <http://www.gnu.org/licenses/>.
%Copyright (c) 2003-2009, Kris De Gussem
%All rights reserved.
%
%Redistribution and use in source and binary forms, with or without
%modification, are permitted provided that the following conditions are
%met:
%
% * Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% * Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in
% the documentation and/or other materials provided with the distribution
% * Neither the name of Raman Spectroscopy Research Group, Department of
% analytical chemistry, Ghent University nor the names
% of its contributors may be used to endorse or promote products derived
% from this software without specific prior written permission.
%
%THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
%AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
%IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
%ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
%LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
%CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
%SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
%INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
%CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
%ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
%POSSIBILITY OF SUCH DAMAGE.
function Ind = INDFunc (X, T, l, full)
%check input
switch nargin
case 2
full = 1;
case 3
full = 0;
case 4
otherwise
error ('Biodata:msg', '2, 3 of 4 parameters needed; see help INDFunc ...');
end
if size (X,1) ~= size (T,1)
error ('Biodata:msg', 'X and T komen do not correspond ...');
end
[m, n] = size(X);
if full == 1
l = 1:1:size (T,2); % calculate Ind for all principal components
end
if ((l <= 0) || (l > m)) %m-1: if == m: ind value 0: full-dim pca-analysis
disp (['l: ' l]);
disp (['m: ' m]);
error ('Biodata:msg', 'Second parameter needs to be between 1 and the number of variables m (minus 1) ...');
end
%calculate total variance
g_tot = sum(sum (X.*X));
%calculate lambda
lambda = sum (T.*T)';
q = min (n,m);
Sl = cumsum (lambda)';
%don't calculate for last PC if it's the last PC of full-dimensional PCA decomposition
if (lambda( size (T,2)) == g_tot)
l (:, size(l,2)) = [];
Ind = (1./(q-l).^2) .* sqrt ((g_tot - Sl(1,l)) ./ (n * (q-l)));
Ind (1, size(l,2)) = NaN;
return;
end
Ind = (1./(q-l).^2) .* sqrt ((g_tot - Sl(1,l)) ./ (n * (q-l)));