Code covered by the BSD License  

Highlights from
Probability Density Function (PDF) Estimator (V3.2)

image thumbnail
from Probability Density Function (PDF) Estimator (V3.2) by Yi Cao
An estimate of probability density function of the given random data with bounded support.

p=gkdeb(x,p)
function p=gkdeb(x,p)
% GKDEB  Gaussian Kernel Density Estimation with Bounded Support
% 
% Usage:
% p = gkdeb(d) returns an estmate of pdf of the given random data d in p,
%             where p.pdf and p.cdf are the pdf and cdf vectors estimated at
%             p.x locations, respectively and p.h is the bandwidth used for
%             the estimation. 
% p = gkdeb(d,p) specifies optional parameters for the estimation:
%             p.h - bandwidth
%             p.x - locations to make estimation
%             p.uB - upper bound
%             p.lB - lower bound.
%             p.alpha - to calculate inverse cdfs at p.alpha locations
%
% Without output, gkdeb(d) and gkdeb(d,p) will disply the pdf and cdf
% (cumulative distribution function) plot.  
%
% See also: hist, histc, ksdensity, ecdf, cdfplot, ecdfhist

% Example 1: Normal distribution
%{
gkdeb(randn(1e4,1));
%}
% Example 2: Uniform distribution
%{
clear p
p.uB=1;
p.lB=0;
gkdeb(rand(1e3,1),p);
%}
% Example 3: Exponential distribution
%{
clear p
p.lB=0;
gkdeb(-log(1-rand(1,1000)),p);
%}
% Example 4: Rayleigh distribution
%{
clear p
p.lB=0;
gkdeb(sqrt(randn(1,1000).^2 + randn(1,1000).^2),p);
%}

% V3.2 by Yi Cao at Cranfield University on 7th April 2010
%

% Check input and output
error(nargchk(1,2,nargin));
error(nargoutchk(0,1,nargout));

n=length(x);
% Default parameters
if nargin<2
    N=100;
    h=median(abs(x-median(x)))/0.6745*(4/3/n)^0.2;
    xmax=max(x);
    xmin=min(x);
    xmax=xmax+3*h;
    xmin=xmin-3*h;
    dx=(xmax-xmin)/(N-1);
    p.x=xmin+(0:N-1)*dx;
    p.pdf=zeros(1,N);
    p.cdf=zeros(1,N);
    p.h=h;
    dxdz=ones(size(p.x));
    z=p.x;
else
    [p,x,dxdz,z]=checkp(x,p);
    N=numel(p.x);
    h=p.h;
end

% Gaussian kernel function
kerf=@(z)exp(-z.*z/2);
ckerf=@(z)(1+erf(z/sqrt(2)))/2;
nh=n*h*sqrt(2*pi);

for k=1:N
    p.pdf(k)=sum(kerf((p.x(k)-x)/h));
    p.cdf(k)=sum(ckerf((p.x(k)-x)/h));
end
p.x=z;
p.pdf=p.pdf.*dxdz/nh;
dx=[0 diff(p.x)];
p.cdf=p.cdf/n;
% p.cdf=cumsum(p.pdf.*dx);

if isfield(p,'alpha')
    n=numel(p.alpha);
    p.icdf=p.alpha;
    for k=1:n
        alpha=p.alpha(k);
        ix=find(p.cdf>alpha,1)-1;
        x1=p.x(ix);
        x2=p.x(ix+1);
        F1=p.cdf(ix);
        F2=p.cdf(ix+1);
        p.icdf(k)=x1+(alpha-F1)*(x2-x1)/(F2-F1);
    end
end

% Plot
if ~nargout
    subplot(211)
    plot(p.x,p.pdf,'linewidth',2)
    grid
%     set(gca,'ylim',[0 max(p.pdf)*1.1])
    ylabel('f(x)')
    title('Estimated Probability Density Function');
    subplot(212)
    plot(p.x,p.cdf,'linewidth',2)
    ylabel('F(x)')
    title('Cumulative Distribution Function')
    xlabel('x')
    grid
    meanx = sum(p.x.*p.pdf.*dx);
    varx = sum((p.x-meanx).^2.*p.pdf.*dx);
    text(min(p.x),0.6,sprintf('mean(x) = %g\n var(x) = %g\n',meanx,varx));
    if isfield(p,'alpha') && numel(p.alpha)==1
        text(min(p.x),0.85,sprintf('icdf at %g = %g',p.alpha,p.icdf));
    end
end

function [p,x,dxdz,z]=checkp(x,p)
n=numel(x);
%check structure p
if ~isstruct(p)
    error('p is not a structure.');
end
if ~isfield(p,'uB')
    p.uB=Inf;
end
if ~isfield(p,'lB')
    p.lB=-Inf;
end
if p.lB>-Inf || p.uB<Inf
    [p,x,dxdz,z]=bounded(x,p);
else
    if ~isfield(p,'h')
        p.h=median(abs(x-median(x)))/0.6745*(4/3/n)^0.2;
    end
    error(varchk(eps, inf, p.h, 'Bandwidth, p.h is not positive.'));
    if ~isfield(p,'x')
        N=100;
        xmax=max(x);
        xmin=min(x);
        xmax=xmax+3*p.h;
        xmin=xmin-3*p.h;
        dx=(xmax-xmin)/(N-1);
        p.x=xmin+(0:N-1)*dx;
    end
    dxdz=ones(N,1);
    z=p.x;
end
p.pdf=zeros(size(p.x));
p.cdf=zeros(size(p.x));

function [p,x,dxdz,z]=bounded(x,p)
if p.lB==-Inf
    dx=@(t)1./(p.uB-t);
    y=@(t)-log(p.uB-t);
    zf=@(t)(p.uB-exp(-t));
elseif p.uB==Inf
    dx=@(t)1./(t-p.lB);
    y=@(t)log(t-p.lB);
    zf=@(t)exp(t)+p.lB;
else
    dx=@(t)(p.uB-p.lB)./(t-p.lB)./(p.uB-t);
    y=@(t)log((t-p.lB)./(p.uB-t));
    zf=@(t)(exp(t)*p.uB+p.lB)./(exp(t)+1);
end
x=y(x);
n=numel(x);
if ~isfield(p,'h')
    p.h=median(abs(x-median(x)))/0.6745*(4/3/n)^0.2;
end
h=p.h;
if ~isfield(p,'x')
    N=100;
    xmax=max(x);
    xmin=min(x);
    xmax=xmax+3*h;
    xmin=xmin-3*h;
    p.x=xmin+(0:N-1)*(xmax-xmin)/(N-1);
    z=zf(p.x);
else
    z=p.x;
    p.x=y(p.x);
end
dxdz=dx(z);

function msg=varchk(low,high,n,msg)
% check if variable n is not between low and high, returns msg, otherwise
% empty matrix
if n>=low && n<=high
    msg=[];
end

Contact us at files@mathworks.com