Code covered by the BSD License  

Highlights from
Whittle Surrogate

image thumbnail

Whittle Surrogate

by

 

05 Feb 2013 (Updated )

Generates a symbol sequence with a specified transition count.

whittle_surrogate_uncond(f,w,u,v)
function seq = whittle_surrogate_uncond(f,w,u,v)
% WHITTLE_SURROGATE 
%
% seq = whittle_surrogate_uncond(f,w,u,v);
%
% Given a transition matrix produces a random sequence 
% with exactly the same transition count. All possible 
% sequences occur with equal probability.
%
% INPUTS (from trans_count.m)
% f:            a transition count matrix
% w:          a list of words
% u:           the start word
% v:            the end word
%
% OUTPUTS
% seq:      a random sequence of words 
%               that have transition count f
%
% EXAMPLE:
% >> d = [0 1 1 0 1 0 1 1 1 0 0 1 0];
% >> [f w u v] = trans_count(d,1);
% >> seq = whittle_surrogate_uncond(f,w,u,v)
% seq = 0   1   0   1   1   1   0   1   0   1   1   0   0
% >> seq = whittle_surrogate_uncond(f,w,u,v)
% seq = 1   1   0   1   0   1   0   1   0   0   1   1   1
%
% Sucessive calls produce sequences sampled 
% randomly and uniformly from the set of all sequences
% with the same transition count.
%
% Shawn Pethel, 2012

% Fixed zeroth order case 10/8/2013

N = size(f,1);
sym = (1:N);
if size(f,2)==1 %if f is not a matrix then use random permutation
    dum = [];
    for i=1:N
        dum = [dum i*ones(1,f(i))];
    end
    indx = randperm(sum(f));
    seq = dum(indx);
    return
end
n = sum(sum(f))+1;
% If u ~= v, then the start and end words are determined.
% Otherwise, go through all u = v possibilities
if u == v
    whitnum = zeros(1,size(f,1));
    for i = 1:size(f,1)
        whitnum(i) = whittle_number(f,i,i);
    end
    ec = exp(whitnum-max(whitnum));
    ec = ec/sum(ec);
    rng = [0 cumsum(ec)];
    sr = rand(1);
    indx = (sr >= rng(1:end-1)) & (sr <= rng(2:end));
    u = sym(indx);
    v = u;
end
% Generate a LUT for stirling's formula
stlut = stirl((0:n));
% tf = f;   %Uncomment here and below for error checking if wanted
seye = sparse(1:N,1:N,1);
ytrial = zeros(1,n);
ytrial(end) = v;
% Precompute as much as possible before entering inner loop
rsum = sum(f,2);
term1 = sum(stlut(rsum+1))- sum(sum(stlut(nonzeros(f)+1))); %The +1 is for correct indexing in the stlut
rsum(rsum==0) = 1;
sd = sparse(1:N,1:N,1./rsum); %Much faster than spdiags!
rsum = sum(f,2);
for mm=1:n-2
    ytrial(mm) = u;
    % Update row sum
    rsum(u) = rsum(u) -1;
    if rsum(u) > 0
        sd(u,u) = 1/rsum(u);
    else
        sd(u,u) = 0;
    end
    rw = sym(f(u,:) > 0); %These are the remaining options
    numb = zeros(1,3);
    cnt = 1;
    % Loop through all options for the next number
    for i=1:length(rw)
        ut = rw(i);
        g = f;
        g(u,ut) = g(u,ut) - 1;
        fs = seye - sd*g;
        fs(v,:) = [];
        fs(:,ut) = [];
        cf = abs(det(fs)); % Possibility of accuracy issue here
        if cf > 0
            % g differs from f in only one element, so we take the
            % precomputed sum, remove the original element, and replace
            % with the corresponding one from g
            numb(cnt,3) = term1+stlut(f(u,ut)+1)-stlut(g(u,ut)+1);
            numb(cnt,2) = ut;
            numb(cnt,1) = numb(cnt,3) + log(cf);
            % computing log(cf) is safer, but slower with
            % sum(log(abs(diag(u)))), where [l u] = lu(fs)
            cnt = cnt + 1;
        end
    end
    % Compute relative probabilities of the different options
    ec = exp(numb(:,1) - max(numb(:,1)));
    ec = ec/sum(ec);
    rng = [0 cumsum(ec)'];
    % Choose one in accordance with their probabilites
    sr = rand(1);
    indx = (sr >= rng(1:end-1)) & (sr <= rng(2:end));
    ut = numb(indx,2);
    f(u,ut) = f(u,ut) - 1;
    u = ut;
    term1 = numb(indx,3);
end
ytrial(n-1) = numb(cnt-1,2);

% Uncomment  for error checking if wanted
% f = zeros(N,N);
% a = embed(ytrial,2);
% for i = 1:size(a,1)
%     f(a(i,1),a(i,2)) = f(a(i,1),a(i,2)) + 1;
% end
% if any(any(tf - f))
%     fprintf('Error!\n')
% end

% Convert number sequence back to symbol sequence
ls = size(w,2);
seq = zeros(1,n+ls-1);
seq(1:ls) = w(ytrial(1),:);
for i=2:n
    seq(ls+i-1) = w(ytrial(i),ls);
end

function s = stirl(x)
% Computes log(x!)
% Uses a Stirling series approximation for x > 16
i = (x <= 16);
j = (x > 16);
a = x(i);
b = x(j);
s1 = log(factorial(a));
s2 = b.*log(b) - b + 0.5*log(2*pi*b) + 1./(12*b)- 1./(360*b.^3) + 1./(1260*b.^5)-1./(1680*b.^7);
s = zeros(length(x),1);
s(i) = s1;
s(j) = s2;

Contact us