%function mean_woK
%
%This function calculates the average spectrum of some spectra. Outliers
%are eliminated per spectral channel.
%
%syntax:
% meandata = mean_woK(inputdata, TitleGraph, RamanAxis)
%
%Parameters:
% - inputdata: the spectra, 1 spectrum is one row
% - TitleGraph: optional title for the graph in which the original
% spectra and the spectra without outliers are plotted. When TitleGraph
% has the value 'NoGraph', the plot is automatically closed at the end of
% CalculateMeans.
% - RamanAxis: the values of the x-xaxis
% - meandata: the average spectrum
%
%Example:
% data = rand (20,50); %generate data
% data(10,15) = data(10,15)*100; %generate outliers
% data(13,40) = data(13,40)*100;
% mdata = mean_woK(data, 'Example of the calculation of the average spectrum, without outliers');
%The Biodata toolbox for MATLAB: a spectral database system for storing and
%processing spectra
%C 2004-2008, Kris De Gussem, Raman Spectroscopy Research Group, Department
%of analytical chemistry, Ghent University
%C 2009 Kris De Gussem
%
%This file is part of Biodata.
%
%Biodata is free software: you can redistribute it and/or modify
%it under the terms of the GNU General Public License as published by
%the Free Software Foundation, either version 3 of the License, or
%(at your option) any later version.
%
%Biodata is distributed in the hope that it will be useful,
%but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
%GNU General Public License for more details.
%
%You should have received a copy of the GNU General Public License
%along with Biodata. If not, see <http://www.gnu.org/licenses/>.
%Copyright (c) 2004-2009, Kris De Gussem
%All rights reserved.
%
%Redistribution and use in source and binary forms, with or without
%modification, are permitted provided that the following conditions are
%met:
%
% * Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% * Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in
% the documentation and/or other materials provided with the distribution
% * Neither the name of Raman Spectroscopy Research Group, Department of
% analytical chemistry, Ghent University nor the names
% of its contributors may be used to endorse or promote products derived
% from this software without specific prior written permission.
%
%THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
%AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
%IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
%ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
%LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
%CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
%SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
%INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
%CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
%ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
%POSSIBILITY OF SUCH DAMAGE.
function meandata = mean_woK(inputdata, TitleGraph, RamanAxis)
switch nargin
case 2
RamanAxis = 1:1:size(inputdata,2);
xlab = 'Datapoint or pixel';
case 3
xlab = 'Raman shift (cm^{-1})';
otherwise
error ('Biodata:msg', 'Wrong number of input parameters');
end
ylab = 'Intensity (Arbitrary units)';
if size (inputdata, 1) == 1;
warning ('Biodata:msg', 'Tried to take the mean spectrum of only one spectrum...');
meandata = inputdata;
return;
end
if size (inputdata, 1) == 2
meandata = mean(inputdata);
if (nargin >= 2) && (strcmp (TitleGraph, 'NoGraph') == 0)
TitleGraph = strcat (TitleGraph, ' (2 spectra: no outlier detection, only taken mean)');
end
else
% search for outliers
meandata = SimpleOutDel (inputdata, 2.5); %last input value is the number of standard deviations
meandata = mean(meandata);
end
if strcmp (TitleGraph, 'NoGraph') == 0
figure, plot(RamanAxis, snv(inputdata), RamanAxis, snv(meandata)+20); %just to check the spectra, they are snv'd
xlabel (xlab);
ylabel (ylab);
%the following code labels the spectra per individual sample
%in this way you do not have lots of figures of which you do not know
%the original sample
if nargin >= 2
h = title (TitleGraph);
set (h, 'FontWeight', 'bold');
set (h, 'FontSize', 16);
set (gcf, 'PaperOrientation', 'landscape');
set (gcf, 'PaperPositionMode', 'auto ');
end
end
function data = SimpleOutDel (data, astdev)
%check for the presence of outliers while not taking into account the
%maximum and minimum value (because these extrema will influence the
%determination of whether the extremum is an outlier)
[ntot,mtot] = size (data);
if ntot < 3
warning ('Biodata:msg', 'Tried to do outlier detection on 2 spectra: no outlier detection, only taken mean...');
return; % 2 samples: not possible to check for the presence of outliers
end
for m = 1:mtot % for all spectral channels
oldpos = []; % to store the position of outliers
vec = data (:,m);
%check whether it is an outlier
% 1) check the maximum
% 2) remove this value if it is an outlier
% 3) check minimum and remove it if it is an outlier
% 4) check whether the maximum is an outlier (this has to be done
% again, because an outlier as maximum may be hidden by an
% outlier as minimum)
ma = max (vec);
posma = find (vec == ma);
vec_wo_ma = vec;
vec_wo_ma (posma) = [];
magrens = mean (vec_wo_ma) + astdev * std(vec_wo_ma);
if ma > magrens
%maximum is an outlier
%change value to the average value
tmpp = length (oldpos)+1;
oldpos (tmpp:tmpp+length (posma)-1,1) = posma;
vec(oldpos) = mean (vec_wo_ma);
ma_was_outlier = false;
else
ma_was_outlier = true;
end
%maximum is OK: however it is possible that we have less than three
%values in the spectral channel: if this is the case, we can not
%calculate the standard deviation
mi = min (vec);
posmi = find (vec == mi);
vec_wo_mi = vec;
vec_wo_mi ([oldpos; posmi]) = [];
if size (vec_wo_mi, 1) < 2
return;
end
migrens = mean (vec_wo_mi) - astdev * std(vec_wo_mi);
if mi < migrens
%max is outlier
%change value to the average value
tmpp = length (oldpos)+1;
oldpos (tmpp:tmpp+length (posmi)-1,1) = posmi;
vec(oldpos) = mean (vec_wo_mi);
if ma_was_outlier
ma = max (vec);
posma = find (vec == ma);
vec_wo_ma = vec;
vec_wo_ma ([oldpos; posma]) = [];
if size (vec_wo_mi, 1) < 2
return;
end
magrens = mean (vec_wo_ma) + astdev * std(vec_wo_ma);
if ma > magrens
tmpp = length (oldpos)+1;
oldpos (tmpp:tmpp+length (posma)-1,1) = posma;
vec(oldpos) = mean (vec_wo_ma);
end
end
end
data (:,m) = vec;
end