Code covered by the BSD License  

Highlights from
Generation of Random Variates

image thumbnail

Generation of Random Variates

by

James Huntley (view profile)

 

generates random variates from over 870 univariate distributions

gen_distrib2(userin,rstart)
% gen_distrib2.m - generates random numbers from any of a wide variety of distributions
%               via numerical inversion of the CDF or other techniques.
%
% Created by:  Jim Huntley,  08/15/06, modified 10/22/08 to avoid
% repetitive calls to cdf-function for discrete distributions.
%
% Calls: 'cdf4d.m', 'pdf4d.m', + '*_cdf' (See "distrib_full" for list), 
%        '*_pdf' (See "distrib_full" for list), 'ProgressString.m',
%        'func.m', 'marcumq2.m', 'binomial_coef.m', 'dedomaind_enum.m',
%        'cgamma.m', 'gaus1_ppf.m', 'gaus2_ppf.m', 'LaguerrePoly.m',
%        'AssociatedLaguerrePoly.m', 'binomial.m', 'stable_rand.m',
%        'psi.m', 'psin.m', 'partition.m', 'genherm.m', 'mchgm_jmh.m',
%        'smooth_diff.m', 'qbinomial_coef.m', 'stirling1.m', 'stirling2.m',
%        'carlitz1.m', 'carlitz2.m', 'partition.m', partioni8.m'.
%
% Called by:  'distribsXX.m'.
%
% Inputs:  userin: input Data Structure.
%          rstart: seed for random number generator.
%
% Outputs:  xhold: noise-values, 
%           fhold: pdf-values, 
%           ghold: cdf-values, 
%           jhold: index-values, 
%           xsort: sorted noise-values,
%           fsort: sorted pdf-values, 
%           gsort: sorted cdf-values, 
%           chold: equi-spaced x-values.
%           isint = 1 => integer distribtion.
%

function [xhold,fhold,ghold,jhold,xsort,fsort,gsort,chold,isint] = gen_distrib2(userin,rstart)

global hObjectE16 hObjectE20

tic

% Retreive Input Data from Structure.
distrib = userin.distrib;
p_value = userin.p_value;
d_indx = userin.d_indx;
xmax_value = userin.xmax_value;
xmin_value = userin.xmin_value;
ftype = userin.ftype;
fnorm = userin.fnorm;
nsamples = userin.nsamples;
c_value = userin.c_value;

% Pre-Allocate.
xhold(1:nsamples) = 0;
fhold(1:nsamples) = 0;
ghold(1:nsamples) = 0;
jhold(1:nsamples) = 0;
xsort(1:nsamples) = 0;
fsort(1:nsamples) = 0;
gsort(1:nsamples) = 0;
chold(1:nsamples) = 0;
    
%*******************************************************************************************************************    

% Generate samples of selected distribution.  Four separate techniques are used here:
%
% 1. Integer (Discrete) Distributions - find CDF value closest to URD & use corresponding
%                                       domain value as output sample for this distribution.
%                                       Discrete distributions are further divided into those
%                                       having closed-form CDFs and those without (need normalization).
% 2. Real Distributions (PPF Inv.)    - use PPF(Inverse CDF) to find samples of
%                                       Tukey's Generalized Lambda, "G and H", Wakeby, or Davies
%                                       Distributions.
% 3. Real Distributions (Integration) - integrate PDF vector to estimate CDF vector, interpolate 
%                                       between CDF values closest to URD & use interpolated domain value
%                                       as output sample for this distribution. "Graphical Inversion".
% 4. Real Distributions (Inversion)   - invert CDF - URD = 0 to find corresponding domain value
%                                       for use as output sample for this distribution. This
%                                       is the standard technique for sampling most continuous distributions.
%
%*******************************************************************************************************************        
	
%Initialize the uniform random number generator.   
rand('state',rstart);
hbar = waitbar(0,'Please wait...');

% Determine type of Distribution.

% Integer (Discrete) Distributions.
% Note that PDFs and CDFs are scalars.
if(ftype == 1) 

    
    if(isempty(c_value))
            c_value = 0;
    end
    isint = 1;
    xmn = round(xmin_value);
    xmx = round(xmax_value);
    step = 1;
    if(strcmp(distrib,'invbinom') == 1 || strcmp(distrib,'otter') == 1 || ...
       strcmp(distrib,'felix') == 1 || strcmp(distrib,'teja') == 1 || ...
       strcmp(distrib,'sunil') == 1 || strcmp(distrib,'delotter') == 1 || ...
       strcmp(distrib,'delteja') == 1 || strcmp(distrib,'delfelix') == 1 || ...
       strcmp(distrib,'delsunil') == 1 || strcmp(distrib,'shenton') == 1 || ...
       strcmp(distrib,'modfelix') == 1 || strcmp(distrib,'hari') == 1)
       step = 2;
    end
    if(strcmp(distrib,'ved') == 1 || strcmp(distrib,'delved') == 1 || ...
       strcmp(distrib,'modved') == 1)
       step = 3;
    end
    if(strcmp(distrib,'poispear1_int') == 1 || strcmp(distrib,'poispear6_int') == 1)
        c_value = xmx;
    end
    y = (xmn:step:xmx);
    if(strcmp(distrib,'crommelin') == 1)
        fy = pdf4d(y, d_indx, p_value, distrib, c_value);
        gy = cdf4d(y, d_indx, p_value, distrib, c_value);
        jylim = size(gy,2);
    elseif(strcmp(distrib,'dirmultnom') == 1)
        fy = pdf4d(y(1), d_indx, p_value, distrib, c_value);
        jylim = size(fy,2);
        for jy = 1:jylim
            gy(jy) = sum(fy(1:jy));
            %gy(jy) = cdf4d(y(jy), d_indx, p_value, distrib, c_value);
        end
    else
        jylim = size(y,2);
        fy(1:jylim) = 0;
        for jy = 1:jylim
            fy(jy) = pdf4d(y(jy), d_indx, p_value, distrib, c_value);
        end

        for jy = 1:jylim
            gy(jy) = sum(fy(1:jy));
            %gy(jy) = cdf4d(y(jy), d_indx, p_value, distrib, c_value);
        end

    end
   
    
    % Normalize Distributions where CDF is just sum of PDFs.
    if(fnorm == 1)
        gy = gy./gy(end);
    end

    % Main Loop Over Samples for Integer (Discrete) Distributions.
    for js = 1:nsamples
        waitbar(js / nsamples);
        urd1 = rand(1);   			% uniform random deviate (0 : 1)
        %Find closest "y" to urd1.
        xm = 1;
        for jy = 1:jylim
            if(urd1 < gy(jy))
                xm = fix((y(jy)-xmn)/step) + 1;
                break;
            end
        end
       
        fm = fy(xm);
        gm = gy(xm);
        % Store Output for plotting.        
        xhold(js) = y(xm);                      % Samples
        fhold(js) = fm;                         % PDF
        ghold(js) = gm;                         % CDF            
        jhold(js) = js;                         % Sample Index
        % Special scaling for Harris Distribution.
        if(strcmp(distrib,'harris') == 1)
            xhold(js) = xm*p_value(1) + 1;
        end         
        
    end % Loop over Samples for Integer Distributions.
    close(hbar);  

else        

    
    % Real (Continuous) Distributions.
    isint = 0;
    
    % PPF Inverse Distributions - Generalized Lambda, "G and H", Wakeby, Davies, Schmeiser-Deutsch or Tadika.
    % Note that, for these distributions, samples are obtained directly from the PPFs.  The CDFs and PDFs 
    % calaculated for these distributions are strictly empirical.  They're provided as visual aids
    % for the user and they're only accurate iff both xmin and xmax can be specified as functions of
    % the disrtibution parameters.
    % Note that PDFs and CDFs are vectors.
    if(ftype == 2) 

        if(isempty(c_value))
            c_value = 0;
        end
        % Make input vectors.
        delta = 1 / (nsamples-1);
        xv = 0:delta:1;                                             % Tabulate at equally-spaced values of CDF.                     
                
        % Estimate CDF and random samples.
        name2 = char(strcat(distrib,'_cdf'));
        h = str2func(name2);
        [fintp, fhold] = h(xv,p_value);                             % Return tabulated CDF at equally-spaced values of PPF.
                
        % Store outputs for plotting.
        xhold = xv;
        ghold = fintp;
        [gsort,indx] = sort(ghold);
        xsort = xmin_value:(xmax_value-xmin_value)/(nsamples-1):xmax_value;
        jhold = 1:nsamples;
                
        % Estimate PDF.
        fsort = filter(-smooth_diff(4),1,fintp)/delta;        

        
    % Vector Quadrature Continuous CDFs.  
    % Samples are generated by numerically integrating analytical PDFs into tabulated CDFs 
    % and then "graphically" inverting these tabulated CDFs. 
    % Note that PDFs and CDFs are vectors.
    elseif(ftype == 3)            
        % Make input vectors.
        if(isempty(c_value))
            c_value = 0;
        end
        xv = xmin_value:(xmax_value-xmin_value)/(nsamples-1):xmax_value;
        fv = cdf4d(xv, d_indx, p_value, distrib, c_value);
        
        %Add noise to enforce single-valued interpolation.
        urd = rand(1,size(fv,2));
        fv = fv + urd.*0.000001;
        
        % Main Inversion Loop Over Samples for Vector Quadrature Distributions.
        for js = 1:nsamples
            waitbar(js / nsamples);
            urd1 = rand(1);   			% uniform random deviate (0 : 1)
            if(strcmp(distrib,'tweedie') == 0)
                % Find last "fv" < "urd1" and take corresponding interpolated "xm" as the sample.
                % This is the "Graphical Inversion" method with interpolation to estimate CDF^(-1).
                % NOTE: Interpolation may fail if multiple values for 'fv' become too small, 
                % i.e. more than one fv-value is assigned an xm-value of zero.  If this error occurs,
                % try tightening the domain limits so that 'fv' doesn't completely vanish.
                xm = interp1(fv,xv,urd1,'linear');
                %xm = max(xmin_value,xm) + rand(1)*1e-10;                    
                % NOTE: 'cubic' interpolation is more accurate but occasionally fails. 'Linear' is safer. 
            end
            % Special Case.
            if(strcmp(distrib,'tweedie') == 1)
                zp = exp(-p_value(1));
                if(urd1 <= zp)
                    xm = 0;                          
                elseif(urd1 > zp)
                    xm = interp1(fv,xv,urd1,'linear','extrap');                    
                end
            end 
            % Store output for plotting.
            xhold(js) = xm;
            ghold(js) = urd1;
            jhold(js) = js;
            
        end  % for loop over samples.

        % Store more outputs for plotting.
        dxv = (xmax_value-xmin_value)/(nsamples-1);
        xv = xmin_value:dxv:xmax_value;
        chold = xv;
        % Only CDFs (no PDFs) exist for the following distributions. Use
        % smooth differentiating filter to estimate PDFs.
        if(strcmp(distrib,'cnormlapl') == 1 || strcmp(distrib,'pear7laplp') == 1 || ...
            strcmp(distrib,'pear7lapld') == 1 || strcmp(distrib,'tdiff') == 1 || ...
            strcmp(distrib,'weibexp') == 1 || strcmp(distrib,'cantor') == 1 || ...
            strcmp(distrib,'rwalkrng') == 1 || strcmp(distrib,'dncbet') == 1 || ...
            strcmp(distrib,'ncbet2') == 1 || strcmp(distrib,'ncbet1') == 1) 
            fhold = filter(-smooth_diff(4),1,fv) ./ dxv;
        else
            fhold = pdf4d(xv, d_indx, p_value, distrib, c_value);
        end
        close(hbar);
        
    % Scalar Inverted Continuous CDFs.  
    % Samples are generates by numerical inversion of analytical CDFs. 
    % Note that PDFs and CDFs are scalars.
    elseif(ftype == 4)     
       
        ftol = 1e-8;  					% convergence criterion.
        itmax = 100;  					% iteration limit.

        % Scalar CDF requires normalization? 
        if(isempty(c_value))
            c_value = 0;
            flast = 1;
        else %(Crommelin)
            flast = cdf4d(xmax_value, d_indx, p_value, distrib, c_value);
        end

        % Main Loop Over Samples for Scalar Inverted Continuous Distributions.
        for js = 1:nsamples
            waitbar(js / nsamples);
            %js
            urd1 = rand(1);    			% uniform random deviate (0 : 1)
            % Correction for zero-truncated CDFs.
            f0 = 0;
            if(strcmp(distrib,'volodin') == 1)
                x0 = xmin_value;
                f0 = cdf4d(x0, d_indx, p_value, distrib, c_value)/flast;
                urd1 = f0 + urd1*(1-f0);
            end                
            % Sample the distribution's PDF by using MFP to numerically find the zeros of 
            % the function: CDF - URD.  CDF is evaluated explictly for each iteration.
            % This is the standard solution method for sampling most distributions.
            x0 = xmin_value;
            xn = xmax_value;  
            n = 0;
            f0 = cdf4d(x0, d_indx, p_value, distrib, c_value)/flast - urd1;
            xi = xn;
            fi = cdf4d(xn, d_indx, p_value, distrib, c_value)/flast - urd1;
            while (abs(fi) > ftol && n <= itmax)  
                xn = xi;
                fn = fi;
                xi = (x0*fn - xn*f0) / (fn - f0);
                fi = cdf4d(xi, d_indx, p_value, distrib, c_value)/flast - urd1;
                if(abs(fi) <= ftol)
                    break
                end
                n = n + 1;
                if(n > itmax)
                    out_string = char('WARNING n > itmax. Hit "Enter" to Continue.');
                    set(hObjectE16,'String',out_string);
                    set(hObjectE20,'Visible','on');
                    set(hObjectE20,'BackgroundColor',[1.0 0.969 0.922]);
                    pause
                    break 
                end
                if((fi * fn) > 0)
                    f0 = 0.5 * f0;
                else
                    x0 = xn;
                    f0 = fn;
                end
            end % while Loop
            
            % Store Output for plotting.
            fi = pdf4d(xi, d_indx, p_value, distrib, c_value);
            fhold(js) = fi;
            gi = cdf4d(xi, d_indx, p_value, distrib, c_value)/flast;
            xhold(js) = xi;
            ghold(js) = gi;
            jhold(js) = js;
                        
        end % for Loop over samples for Real Distributions.
        close(hbar);

    end  % if Choice among PPF Inverse, Special Cases, Vector Quadrature, and Scalar Inverted CDFs.     

end % Choice between Real or Integer (Discrete) Distributions.

toc
    
return

Contact us