MATLAB code for the nonparametric fitting video

by

 

Use localized regression, cross validation, and the bootstrap for nonparametric fitting.

MTT Nonparametric Fitting Demo.m
%% Copyright

%Copyright (c) 2010, The MathWorks, Inc.
%All rights reserved.

%Redistribution and use in source and binary forms, with or without 
%modification, are permitted provided that the following conditions are 
%met:

 %   * Redistributions of source code must retain the above copyright 
 %     notice, this list of conditions and the following disclaimer.
 %   * Redistributions in binary form must reproduce the above copyright 
 %     notice, this list of conditions and the following disclaimer in 
 %     the documentation and/or other materials provided with the distribution
 %   * Neither the name of the The MathWorks, Inc. nor the names 
 %     of its contributors may be used to endorse or promote products derived 
 %     from this software without specific prior written permission.
      
%THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
%AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
%IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
%ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
%LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
%CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
%SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
%INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
%CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
%ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
%POSSIBILITY OF SUCH DAMAGE.

%% Generate some data

s = RandStream('mt19937ar','seed',1971);
RandStream.setDefaultStream(s);

X = linspace(1,10,100); 
X = X';

w = .6067;
a0 = 1.6345;
a1 = -.6235;
b1 = -1.3501;
a2 = -1.1622;
b2 = -.9443;

Y = a0 + a1*cos(X*w) + b1*sin(X*w) + a2*cos(2*X*w) + b2*sin(2*X*w);

% Take a noisy sample from the Curve

K = max(Y) - min(Y);
noisy = Y +  .2*K*randn(100,1);

hold off
scatter(X,noisy,'k');

L2 = legend('Noisy Data Sample', 2);

%% LOWESS (Spanning parameter = too large)

hold on
lowess_large = smooth(X,noisy,.9, 'lowess');
LL_Plot = plot(X,lowess_large,'k','linestyle','-','linewidth',2);
legend('Data Point','Nonparametric fit (LOWESS: Span = .9',2)

%% LOWESS (Spanning parameter = too small)

lowess_small = smooth(X,noisy,.02, 'lowess');
plot(X,lowess_small,'r','linestyle','-','linewidth',2);
legend('Data Point','Nonparametric fit (LOWESS: Span = .9','Nonparametric fit (LOWESS: Span = .02',2)

%% Cross Validation: Partition into Training Set and Validation Set

hold off

index = 1:100;
index = index';
index = randsample(index, 100);

training = sort(index(11:100));
test = sort(index(1:10));

scatter(X(training), noisy(training), 'k', 'filled');

hold on
scatter(X(test),noisy(test), 'r', 'filled')

legend('Training Set', 'Validation Set',2)

%%  Cross Validation:  Fit a Curve to the Training Set

Lowess_Large = smooth(X(training),noisy(training),.99, 'lowess');
plot(X(training),Lowess_Large,'k','linestyle','-','linewidth',2);

legend('Training Set', 'Validation Set', 'LOWESS Fit',2)

%%  Evaluate the accuracy of the curve using the Validation Set

foo = fit(X(training),Lowess_Large, 'linearinterp');

for i = 1:10

plot([X(test(i)), X(test(i))], [noisy(test(i)), foo(X(test(i)))], 'r','linestyle','-','linewidth',2);

end

legend('Training Set', 'Validation Set','LOWESS Fit', 'Residual', 2)

%%  Rotate through the different combinations of Training / Validation sets

training = sort([index(1:10); index(21:100)]);
test = sort(index(11:20));

scatter(X(training), noisy(training), 'k', 'filled');

Lowess_Large = smooth(X(training),noisy(training),.99, 'lowess');
LS_Plot = plot(X(training),Lowess_Large,'k','linestyle','--','linewidth',2);


scatter(X(test),noisy(test), 'r', 'filled')
foo = fit(X(training),Lowess_Large, 'linearinterp');

for i = 1:10

plot([X(test(i)), X(test(i))], [noisy(test(i)), foo(X(test(i)))], 'r','linestyle','--','linewidth',2);

end



%% Use "cvpartition" and "crossval"

options = statset('UseParallel','always');

hold off
scatter(X, noisy)

num = 99;
spans = linspace(.01,.99,num);
sse = zeros(size(spans));
cp = cvpartition(100,'k',10);

for j=1:length(spans)
    f = @(train,test) norm(test(:,2) - mylowess(train,test(:,1),spans(j)))^2;
    sse(j) = sum(crossval(f,[X,noisy],'partition',cp));
end

[minsse,minj] = min(sse);
span = spans(minj);

x = linspace(min(X),max(X));
line(x,mylowess([X,noisy],x,span),'color','k','linestyle','-', 'linewidth',2)

legend('Noisy Data Sample','LOWESS: Optimal Span',2)

%%  Show where this data came from

figure('Numbertitle', 'off', 'name', 'Y = a0 + a1*cos(X*w) + b1*sin(X*w) + a2*cos(2*X*w) + b2*sin(2*X*w)')
hold off
actual = plot(X,Y,'color','b','linestyle','-', 'linewidth',2);
xlabel('X Data');
ylabel('Y Data');
legend('True Relationship Between X and Y',2);

%%
% Add in a noise vector
hold on
sample = scatter(X,noisy,'b');

legend('True Relationship Between X and Y', 'Noisy Data Sample',2);

%%
% Superimpose the LOWESS fit

lowess = line(x,mylowess([X,noisy],x,span),'color','k','linestyle','-', 'linewidth',2);
legend('True Relationship Between X and Y', 'Noisy Data Sample', 'LOWESS: Optimal Span',2);

%%
%  Show the output from a regression analysis

f = fit(X,noisy,'fourier2');
fourier = plot(f, 'r');

L1 = legend('True Relationship Between X and Y', 'Noisy Data Sample',... 
    'LOWESS: Optimal Span', 'Nonlinear Regression',2);

%%

hold off
scatter(X, noisy)

f = @(xy) mylowess(xy,X,span);
yboot2 = bootstrp(1000,f,[X,noisy])';
meanloess = mean(yboot2,2);
h1 = line(X, meanloess,'color','k','linestyle','-','linewidth',2);

stdloess = std(yboot2,0,2);
h2 = line(X, meanloess+2*stdloess,'color','r','linestyle','--','linewidth',2);
h3 = line(X, meanloess-2*stdloess,'color','r','linestyle','--','linewidth',2);

L5 = legend('Localized Regression','Confidence Intervals');


Contact us