Code covered by the BSD License  

Highlights from
Update PDF Estimation

image thumbnail
from Update PDF Estimation by Yi Cao
Update PDF estimation upon receiving new data

p=gkde(x,p)
function p=gkde(x,p)
% GKDE  Gaussian kernel density estimation and update.
% 
% Usage:
% p = gkde(d) returns an estmate of pdf of the given random data d in p,
%             where p.f is the pdf vector estimated at p.x locations, 
%             p.h and p.n are the bandwidth and number of samples used for
%             the estimation. 
% p = gkde(d,p) calculates (p.n=0) or updates (p.n>0) the pdf estimation
%             using locations at p.x, bandwidth p.h and previous estimation
%             p.f. For a fresh estimation, p.f=0. Specify p.hnew if a
%             change of bandwidth is required.
%
% Without any output, gkde(d) or gkde(d,p) will disply the estimated
% (updated) pdf plot.  
%
% See also hist, histc, ksdensity, ecdf, cdfplot, ecdfhist

% Example 1: Normal distribution
%{
gkde(randn(1e5,1));
%}
% Example 2: Uniform distribution and update
%{
p=gkde(rand(1e3,1));
plot(p.x,p.f,'b','linewidth',2)
hold on
p=gkde(rand(1e3,1),p);
plot(p.x,p.f,'r','linewidth',2)
p=gkde(rand(1e3,1),p);
plot(p.x,p.f,'g','linewidth',2)
p=gkde(rand(1e3,1),p);
plot(p.x,p.f,'c','linewidth',2)
hold off
%}
% Example 3: Two sets of Gaussian random data with different means
%{
X1=randn(1e3,1)-5;
X2=randn(1e3,1);
p1=gkde(X1);
p2=gkde(X2,p1);
p3=gkde([X1;X2]);
dx=mean(diff(p1.x));
subplot(211)
plot(p1.x,p1.f,'b',p2.x,p2.f,'r',p3.x,p3.f,'c','linewidth',2)
legend('orginal','updated','joint estimation')
ylabel('f(x)')
grid
title('Probability Density Function')
subplot(212)
plot(p1.x,cumsum(p1.f*dx),'b',p2.x,cumsum(p2.f*dx),'r',p3.x,cumsum(p3.f*mean(diff(p3.x))),'c','linewidth',2)
ylabel('F(x)')
title('Probability Function: F(x)=\int_-_\infty^xf(x)dx')
xlabel('x')
grid
meanx1 = sum(p1.x.*p1.f*dx);
varx1 = sum((p1.x-meanx1).^2.*p1.f*dx);
meanx2 = sum(p2.x.*p2.f*dx);
varx2 = sum((p2.x-meanx2).^2.*p2.f*dx);
meanx3 = sum(p3.x.*p3.f*mean(diff(p3.x)));
varx3 = sum((p3.x-meanx3).^2.*p3.f*mean(diff(p3.x)));
str1=sprintf('old mean(x) = %g\n old var(x) = %g\n',meanx1,varx1);
str2=sprintf('new mean(x) = %g\n new var(x) = %g\n',meanx2,varx2);
str3=sprintf('joint mean(x) = %g\n joint var(x) = %g\n',meanx3,varx3);
text(max(p1.x),0.3,[str1 str2 str3]) 
%}

% V2.0 by Yi Cao at Cranfield University on 11th March 2008
%

% Check input and output
error(nargchk(1,2,nargin));
error(nargoutchk(0,1,nargout));

% features of given data
x=x(:);
n=numel(x);

% Default parameters, optimal for Gaussian kernel
if nargin<2
    N=100;
    p.h=median(abs(x-median(x)))*1.5704/(n^0.2);
    dx=p.h*3;
    p.n=0;
    p.x=linspace(min(x)-dx,max(x)+dx,N);
    p.f=zeros(1,N);
end

% check the structue
checkp(p);

% scale back if update
if p.n>0
    p.f=p.f*p.n;
    dx=mean(diff(p.x));
    xl=p.x(1)-min(x)+3*p.h;
    nl=max(0,ceil(xl/dx));
    xh=max(x)+p.h*3-p.x(end);
    nh=max(0,ceil(xh/dx));
    p.x=[p.x(1)-nl*dx:dx:p.x(1)-dx p.x p.x(end)+dx:dx:p.x(end)+nh*dx];
    p.f=[zeros(1,nl) p.f zeros(1,nh)];
end
N=numel(p.x);

% Gaussian kernel function
kerf=@(z)exp(-z.*z/2)/sqrt(2*pi);

% density estimation or update
for k=1:N
    p.f(k)=p.f(k)+sum(kerf((p.x(k)-x)/p.h))/p.h;
end
p.n = p.n + n;
p.f = p.f / p.n;

% Plot
if ~nargout
    plot(p.x,p.f)
    set(gca,'ylim',[0 max(p.f)*1.1])
    ylabel('f(x)')
    xlabel('x')
    title('Estimated Probability Density Function');
end

function checkp(p)
%check structure p
if ~isstruct(p) || ~all(isfield(p,{'x','f','n','h'}))
    error('p is not a right structure.');
end
error(varchk(eps, inf, p.h, 'Bandwidth, p.h is not positive.')); 

function msg=varchk(low,high,n,msg)
% check if variable n is not between low and high, returns msg, otherwise
% empty matrix
if n>=low && n<=high
    msg=[];
end

Contact us at files@mathworks.com