Code covered by the BSD License  

Highlights from
Generation of Random Variates

image thumbnail

Generation of Random Variates

by

James Huntley (view profile)

 

generates random variates from over 870 univariate distributions

gen_distrib(userin,rstart)
% gen_distrib.m - generates random numbers from any of a wide variety of distributions
%               via numerical inversion of the CDF or other techniques.
%
% Created by:  Jim Huntley,  08/15/06.
%
% Calls: 'cdf2.m', 'cdf3.m', 'pdf2.m', + '*_cdf' (See "distrib_full" for list), 
%        '*_pdf' (See "distrib_full" for list), 'ProgressString.m',
%        'func.m', 'marcumq2.m', 'binomial_coef.m', 'dedomaind_enum.m',
%        'cgamma.m', 'gaus1_ppf.m', 'gaus2_ppf.m', 'LaguerrePoly.m',
%        'AssociatedLaguerrePoly.m', 'binomial.m', 'stable_rand.m',
%        'psi.m', 'psin.m', 'partition.m', 'genherm.m', 'mchgm_jmh.m',
%        'smooth_diff.m', 'qbinomial_coef.m', 'stirling1.m', 'stirling2.m',
%        'carlitz1.m', 'carlitz2.m', 'partition.m'.
%
% Called by:  'distribs.m'.
%
% Inputs:  'userin' Data Structure.
%
% Outputs: noise-values, cdf-values, pdf-values, index-values, sorted noise-values,
%           sorted pdf-values, sorted cdf-values.
%

function [xhold,fhold,ghold,jhold,xsort,fsort,gsort,chold,isint] = gen_distrib(userin,rstart)

tic

% Retreive Input Data from Structure.
distrib = userin.distrib;
p_value = userin.p_value;
d_indx = userin.d_indx;
xmax_value = userin.xmax_value;
xmin_value = userin.xmin_value;
ftype = userin.ftype;
fnorm = userin.fnorm;
nsamples = userin.nsamples;
c_value = userin.c_value;

% Pre-Allocate.
xhold(1:nsamples) = 0;
fhold(1:nsamples) = 0;
ghold(1:nsamples) = 0;
jhold(1:nsamples) = 0;
xsort(1:nsamples) = 0;
fsort(1:nsamples) = 0;
gsort(1:nsamples) = 0;
chold(1:nsamples) = 0;
    
%*******************************************************************************************************************    

% Generate samples of selected distribution.  Four separate techniques are used here:
%
% 1. Integer (Discrete) Distributions - find CDF value closest to URD & use corresponding
%                                       domain value as output sample for this distribution.
%                                       Discrete distributions are further divided into those
%                                       having closed-form CDFs and those without (need normalization).
% 2. Real Distributions (PPF Inv.)    - use PPF(Inverse CDF) to find samples of
%                                       Tukey's Generalized Lambda, "G and H", Wakeby, or Davies
%                                       Distributions.
% 3. Real Distributions (Integration) - integrate PDF vector to estimate CDF vector, interpolate 
%                                       between CDF values closest to URD & use interpolated domain value
%                                       as output sample for this distribution. "Graphical Inversion".
% 4. Real Distributions (Inversion)   - invert CDF - URD = 0 to find corresponding domain value
%                                       for use as output sample for this distribution. This
%                                       is the standard technique for sampling most continuous distributions.
%
%*******************************************************************************************************************        
	
%Initialize the uniform random number generator.   
rand('state',rstart);
jscreen = 25;                           % print msg. to screen every 'jscreen' samples. 
nx = 1000;
if(strcmp(distrib,'betawarn') == 1)
    nx = 5000;
end



% Determine type of Distribution.


% Integer (Discrete) Distributions.
if(ftype == 1) 

    isint = 1;
    xmn = fix(xmin_value);
    xmx = fix(xmax_value+1.0e-6);
    step = 1;
    if(strcmp(distrib,'invbinom') == 1)
        step = 2;
    end
    y = (xmn:step:xmx);        
    jylim = size(y,2);
    fy(1:jylim) = 0;
    for jy = 1:jylim
        yy = y(jy);
        fy(jy) = cdf2d(y(jy), d_indx, p_value, distrib);
    end

    % Normalize Distributions where CDF is just sum of PDFs.        
    if(fnorm == 1)
        fy = fy./fy(end);
    end

    % Main Loop Over Samples for Integer (Discrete) Distributions.
    for js = 1:nsamples    
        urd1 = rand(1);   			% uniform random deviate (0 : 1)
        %Find closest "y" to urd1.    
        xm = xmn;
        for jy = 1:jylim
            if(urd1 < fy(jy))
                xm = y(jy);
                break;
            end
        end
        % Store Output for plotting.
        fm = pdf2d(xm, d_indx, p_value, distrib);
        gm = cdf2d(xm, d_indx, p_value, distrib);
        xhold(js) = xm;                         % Samples
        fhold(js) = fm;                         % PDF
        ghold(js) = gm;                         % CDF            
        jhold(js) = js;                         % Sample Index
        % Special scaling for Harris Distribution.
        if(strcmp(distrib,'harris') == 1)
            xhold(js) = xm*p_value(1) + 1;
        end                
        % Output Counter.
        if(mod(js,jscreen) == 0)
            numprnt = num2str(js);
            PS2 = [' ',distrib,' samples have been generated.'];
            PS_string = ['  ',numprnt,PS2];
            ProgressString(PS_string);
        end            
    end % Loop over Samples for Integer Distributions.
    % Normalize Distributions where CDF is just sum of PDFs.
    if(fnorm == 1)
         ghold = ghold./cdf2d(y(end), d_indx, p_value, distrib);
    end

else        

    
    % Real (Continuous) Distributions.
    isint = 0;

    
    % PPF Inverse Distributions - Generalized Lambda, "G and H", Wakeby, Davies, Schmeiser-Deutsch or Tadika.
    % Note that, for these distributions, samples are obtained directly from the PPFs.  The CDFs and PDFs 
    % calaculated for these distributions are strictly empirical.  They're provided as visual aids
    % for the user and they're only accurate iff both xmin and xmax can be specified as functions of
    % the disrtibution parameters.
    if(ftype == 2) 

        % Make input vectors.
        delta = 1/(nsamples-1);
        xv = 0:delta:1;                                             % Tabulate at equally-spaced values of CDF.                     
        
        % Estimate CDF and random samples.
        name2 = char(strcat(distrib,'_cdf'));
        [fintp, fhold] = feval(name2,xv,p_value);                    % Return tabulated CDF at equally-spaced values of PPF.
                
        % Store outputs for plotting.
        xhold = xv;
        ghold = fintp;
        [gsort,indx] = sort(ghold);
        xsort = xmin_value:(xmax_value-xmin_value)/(nsamples-1):xmax_value;
        jhold = 1:nsamples;
        
        % Output counter.
        js = nsamples;
        if(mod(js,jscreen) == 0)                
            numprnt = num2str(js);
            PS2 = [' ',distrib,' samples have been generated.'];
            PS_string = ['  ',numprnt,PS2];
            ProgressString(PS_string);
        end
        
        % Estimate PDF.
        fsort = filter(-smooth_diff(4),1,fintp)/delta;        

        
    % Vector Quadrature Continuous CDFs.  
    % Samples are generated by numerically integrating analytical PDFs into CDFs 
    % and then "graphically" inverting those CDFs. Note that PDFs and CDFs are vectors.
    elseif(ftype == 3)            
        % Make input vectors.
        xv = xmin_value:(xmax_value-xmin_value)/(nx-1):xmax_value;
        fv = cdf2d(xv, d_indx, p_value, distrib); 
        urd = rand(1,size(fv,2));
        fv = fv + urd.*0.000001;

        % Main Inversion Loop Over Samples for Vector Quadrature Distributions.
        for js = 1:nsamples 
            urd1 = rand(1);   			% uniform random deviate (0 : 1)
            if(strcmp(distrib,'tweedie') == 0)
                % Find last "fv" < "urd1" and take corresponding interpolated "xm" as the sample.
                % This is the "Graphical Inversion" method with interpolation to estimate CDF^(-1).
                % NOTE: Interpolation may fail if multiple values for 'fv' become too small, 
                % i.e. more than one value is assigned a value of zero.  If this error occurs,
                % try tightening the domain limits so that 'fv' doesn't completely vanish.
                xm = interp1(fv,xv,urd1,'linear');
                %xm = max(xmin_value,xm) + rand(1)*1e-10;                    
                % NOTE: 'cubic' interpolation is more accurate but occasionally fails. 
                %'Linear' is safer. 
            end
            % Special Case.
            if(strcmp(distrib,'tweedie') == 1)
                zp = exp(-p_value(1));
                if(urd1 <= zp)
                    xm = 0;                          
                elseif(urd1 > zp)
                    xm = interp1(fv,xv,urd1,'linear','extrap');                    
                end
            end 
            % Store output for plotting.
            xhold(js) = xm;
            ghold(js) = urd1;
            jhold(js) = js;
            % Output Counter.
            if(mod(js,jscreen) == 0)
                numprnt = num2str(js);
                PS2 = [' ',distrib,' samples have been generated.'];
                PS_string = ['  ',numprnt,PS2];
                ProgressString(PS_string);
            end 
        end  % for loop over samples.

        % Store more outputs for plotting.
        xv = xmin_value:(xmax_value-xmin_value)/(nsamples-1):xmax_value;
        chold = xv;
        fhold = pdf2d(xv, d_indx, p_value, distrib);
        
        
    % Scalar Inverted Continuous CDFs.  
    % Samples are generates by numerical inversion of analytical CDFs. 
    % Note that PDFs and CDFs are scalars.
    elseif(ftype == 4)     

        ftol = 1e-8;  					% convergence criterion.
        itmax = 100;  					% iteration limit.

        % Scalar CDF requires normalization? 
        if(isempty(c_value))
            c_value = 0;
            flast = 1;
        else %(Crommelin)
            flast = cdf3d(xmax_value, d_indx, p_value, distrib, c_value);
        end

        % Main Loop Over Samples for Scalar Inverted Continuous Distributions.
        for js = 1:nsamples
            %js
            urd1 = rand(1);    			% uniform random deviate (0 : 1)
            % Correction for zero-truncated CDFs.
            f0 = 0;
            if(strcmp(distrib,'volodin') == 1)
                x0 = xmin_value;
                f0 = cdf3d(x0, d_indx, p_value, distrib, c_value)/flast;
                urd1 = f0 + urd1*(1-f0);
            end                
            % Sample the distribution's PDF by using MFP to numerically find the zeros of 
            % the function: CDF - URD.  CDF is evaluated explictly for each iteration.
            % This is the standard solution method for sampling most distributions.
            x0 = xmin_value;
            xn = xmax_value;  
            n = 0;
            f0 = cdf3d(x0, d_indx, p_value, distrib, c_value)/flast - urd1;
            xi = xn;
            fi = cdf3d(xn, d_indx, p_value, distrib, c_value)/flast - urd1;
            while (abs(fi) > ftol && n <= itmax)  
                xn = xi;
                fn = fi;
                xi = (x0*fn - xn*f0) / (fn - f0);
                fi = cdf3d(xi, d_indx, p_value, distrib, c_value)/flast - urd1;
                if(abs(fi) <= ftol)
                    break
                end
                n = n + 1;
                if(n > itmax)
                    WARNING = 'n > itmax,'
                    Distribution = distrib
                    break 
                end
                if((fi * fn) > 0)
                    f0 = 0.5 * f0;
                else
                    x0 = xn;
                    f0 = fn;
                end
            end % while Loop
            
            % Store Output for plotting.            
            fi = pdf3d(xi, d_indx, p_value, distrib, c_value);
            gi = cdf3d(xi, d_indx, p_value, distrib, c_value)/flast;
            xhold(js) = xi;
            fhold(js) = fi;
            ghold(js) = gi;
            jhold(js) = js;
            % Output counter.
            if(mod(js,jscreen) == 0)                
                numprnt = num2str(js);
                PS2 = [' ',distrib,' samples have been generated.'];
                PS_string = ['  ',numprnt,PS2];
                ProgressString(PS_string);
            end
            
        end % for Loop over samples for Real Distributions.

    end  % if Choice among PPF Inverse, Special Cases, Vector Quadrature, and Scalar Inverted CDFs.     

end % Choice between Real or Integer (Discrete) Distributions.

toc
    
return

Contact us