| [LMSout, blms, Rsq, error1]=LMTSregor(y, X, max_fits, max_points)
|
function [LMSout, blms, Rsq, error1]=LMTSregor(y, X, max_fits, max_points)
% % Syntax;
% % [LMSout,blms,Rsq]=LMTSregor(y, X, max_fits, max_points);
% %
% % Least Median Trimmed Squares Through the Origin
% %
% % This program is a modification of LMSregor. It has been modified to
% % trim the input data sets adn trim the number of combinations of
% % line fits that are processed. The trimming allows the program to
% % accomodate large data sets.
% %
% % This program performs the Least Median Trimmed Squares Robust
% % Regression thorugh the origin for simple or multiple columns of
% % data and outputs the regression parameters.
% %
% % Breakdown has been observed to occur at 50%; however, the breakdown
% % point is not known for all problems.
% %
% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% % Input Variable Description
% %
% % y is the column vector of the dependent variable.
% %
% % X is the matrix of the independent variable. If it is one dimensional,
% % then it should be a column vector. If X is an empty matrix, then
% % X is assumed to be a column of integers starting from 0.
% %
% % max_fits is the number of best fit pairs of data. The default value
% % does not exceed 1000. The maximum value is 10000.
% %
% % max_points is the number of data points for curve fitting.
% % The maximum value is 100000. The default value is 100000.
% %
% % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% %
% % Output Variable Description
% %
% % LMSout is the LMTS estimated values vector.
% %
% % blms is the LMTS [intercept slopes] vector.
% %
% % Rsq is the R-squared error estimate of the fit.
% %
% % error1 is 1 if there is an error otherwise it is 0.
% %
% % **********************************************************************
% %
% % This program is originally the work of
% %
% % Alexandros Leontitsis
% % Institute of Mathematics and Statistics
% % University of Kent at Canterbury
% % Canterbury
% % Kent, CT2 7NF
% % U.K.
% %
% % University e-mail: al10@ukc.ac.uk (until December 2002)
% % Lifetime e-mail: leoaleq@yahoo.com
% % Homepage: http://www.geocities.com/CapeCanaveral/Lab/1421
% %
% % Sep 3, 2001.
% %
% % **********************************************************************
% %
% % Reference:
% % Rousseeuw PJ, Leroy AM (1987):
% % Robust regression and outlier detection. Wiley.
% %
% % **********************************************************************
% %
% Example='';
% % Establish an exact solution (xe, ye)
%
% xe=1/100*(1:10000)';
% ye=10*xe;
%
% % Create a noisy data set with an outlier (X, y)
%
% X=1/100*randn(size(xe))+xe;
% y=10*(1/100*randn(size(xe))+xe);
%
% % Perform the robust median trimmed squares linear regression through the
% % origin.
% max_fits=100;
% max_points=500;
%
% % Outlier data points form a line with a different slope
% % randomly select 49% of the data points to be outliers
% [ndraw]=rand_int(1, length(xe), 0.40*length(X), 1, 1);
% y(ndraw)=50./100.*randn(size(ndraw))+50*ndraw;
% X(ndraw)=1./100.*randn(size(ndraw))+1./100*ndraw;
%
% [LMSout,blms,Rsq]=LMTSregor(y, X, max_fits, max_points);
% % plot the robust solution
% xr=xe;
% yr=polyval([blms(1) 0], xr);
% % plot the typical regression solution
% xp=xr;
% p=polyfit(X, y, 1);
% yp=polyval(p, xp);
%
% figure(1); plot(X, y, 'linestyle', 'none', 'marker', '.', 'markersize', 3, 'markeredgecolor', 'k');
% hold on; plot(xe, ye, 'g', 'linewidth', 1);
% plot(xr, yr, 'r', 'linewidth', 1);
% plot(xp, yp, 'b', 'linewidth', 1);
% legend({'Scattered Data', 'Exact Solution', 'Robust Solution', 'Regular Regression'});
% xlim([1 100]);
% ylim([1 100000]);
% title({'60% of the data is near the x-axis', '40% of the data is near the y-axis'}, 'fontsize', 20);
% xlabel('x-axis', 'fontsize', 18);
% ylabel('y-axis', 'fontsize', 18);
% set(gca, 'fontsize', 14);
%
% % ***********************************************************
% %
% % This program was modified by Edward L. Zechmann
% %
% % date 1 February 2008 updated comments
% % added rand_int code to randomly select
% % data points
% %
% % modified 11 February 2008 trimmed the input data arrays
% % updated comments
% %
% % modified 14 February 2008 trimmed the input data arrays
% % updated comments.
% % Improved the error handling and default
% % values.
% %
% % ***********************************************************
% %
% % Feel free to modify this code.
% %
% %
% set the flag to null
% set the error to no error
flag=0;
error1=0;
if nargin < 1 || isempty(y)
warning('Not enough input arguments. Return empty array.');
flag=1;
error1=1;
n=1;
y=1;
else
% y must be a column vector
y=y(:);
% n is the length of the data set
n=length(y);
end
if nargin < 2 || isempty(X)
% if X is omitted give it the values 1:n
X=(1:n)';
else
% X must be a 2-dimensional matrix
[mx, nx]=size(X);
if nx > mx
X=X';
end
if ndims(X) > 2
warning('Invalid data set X. Return empty array.');
flag=1;
error1=1;
end
if n~=size(X,1)
warning('The rows of X and y must have the same length');
flag=1;
error1=1;
end
end
if isequal(flag, 1)
LMSout=[];
blms=[];
Rsq=[];
else
LMSout=1;
blms=1;
Rsq=1;
error1=1;
pp=size(X,2);
% If not input, set the maximum number of fits
if nargin < 3 || isempty(max_fits)
% default value of max_fits is 1000
max_fits=min([1000, floor(n/pp)]);
end
% make sure that max_fits does not exceed 10000
max_fits=min( [max_fits, floor(n/pp), 10000]);
% If max_points is not an input, set the maximum number of points
% for the input arrays X and y to a reasonable value.
if nargin < 4
max_points=max([min([length(y), 100000]), max_fits*pp]);
end
if max_points < max_fits
max_points=max_fits;
end
% Program Modified Here
% input data is trimmed
% best fit combinations are trimmed
[C, y, X, n, p]=LMS_trim(y, X, max_fits, max_points, 2);
% The "half" of the data points
h=floor(n/2)+floor((p+1)/2);
rmin=Inf;
for i=1:size(C,1)
%A=[ones(p, 1) X(C(i,:), :)];
%b=y(C(i,:));
for j=1:p
A(j,:)=X(C(i,j),:);
b(j,1)=y(C(i,j));
end
if rank(A')==p
% Calculate the slopes
c=inv(A'*A)*A'*b;
% There is no intercept, so the estimation is straightforward
est=X*c;
r=y-est;
r2=r.^2;
r2=sort(r2);
rlms=r2(h);
if rlms<rmin
rmin=rlms;
blms=c;
LMSout=est;
% Chapter 2, eq. 3.12
Rsq=1-(median(abs(r))/median(abs(y)))^2;
end
end
end
end
|
|