Code covered by the BSD License

# Causal State Modeller Toolbox

### David Kelly (view profile)

11 Oct 2011 (Updated )

Allows users to infer minimal and optimal statistical predictors from time series data.

[StatComp, Var, EntRate, RelEntRate, RelEnt, numstates]...
```function [StatComp, Var, EntRate, RelEntRate, RelEnt, numstates]...
= MetricCalculator(Tmatrix, freq, freq2, L_Max, numstates, DFName, options)

NumStates = size(Tmatrix,2);
AlphabetLength = size(Tmatrix,1);

sd = CalculateSD(Tmatrix);

if options(4) == 1
%calculate the statistical complexity (in bits)
StatComp = 0;
for i = 1:length(sd)
if sd(i) ~= 0
StatComp = StatComp -sd(i)*log2(sd(i));
end
end
end

if options(5) == 1
%Calculate entropy rate (for inferred model)
EntRate=0;
for i = 1:NumStates
mu = sd(i);
for j = 1:NumStates
for k = 1:AlphabetLength
if Tmatrix(k,i,j) ~=0
EntRate = EntRate - mu*Tmatrix(k,i,j)*log2(Tmatrix(k,i,j));
end
end
end
end
end

if sum(options(6:8)) >0
%Now, to calculate the measures of the difference between the empirical
%word distribution and the word distribution generated by the inferred
%model, we use the old and new frequency matrices, freq & freq2.
freq = freq/sum(freq);
freq2 = freq2/sum(freq2);

%We will define the total variational distance as the sum of half the
%absolute difference between the probabilities of observing each of the
%words of length L_Max
absdiff = abs(freq2-freq);
firststring(1) = '1';
firststring(2:L_Max+1) = '0';
sumbegin = base2dec(firststring, AlphabetLength);
laststring(1) = '1';
laststring(2:L_Max+1)= num2str(AlphabetLength-1);
sumlimit = base2dec(laststring, AlphabetLength);
Var = sum(absdiff(sumbegin:sumlimit));

%Likewise we will define the divergence or relative entropy as the
%kullback-leibler diverence from the empirical to the inferred
%distributions of word of length L_Max (this is done as the last stage of
%the calculation of relative entropy rate)

%The relative entropy rate is the increase in relative entropy per symbol
%so the change in relative entropy rate in calculating it for the
%distributions over longer word lengths. It is the limit as the word length
%tends to infinity so we will approximate it by taking it as the last
%difference (ie. the difference between relative entropy for distributions
%calculated over length L-Max +1 and L_Max.

RelEnts = zeros(L_Max,1);
for wordlength = 1:L_Max

firststring = '1';
firststring(2:wordlength+1) = '0';
sumbegin = base2dec(firststring, AlphabetLength);
laststring = '1';
laststring(2:wordlength+1)= num2str(AlphabetLength-1);
sumlimit = base2dec(laststring, AlphabetLength);

RelEnt = 0;
for i = sumbegin:sumlimit
if freq(i) ~= 0
if freq2(i)~=0
RelEnt = RelEnt+freq(i)*log2(freq(i)/freq2(i));
else
RelEnt = -1;
break
end
end
end

RelEnts(wordlength) = RelEnt;

end

if RelEnt == -1
RelEnt = 1/0;
end

RelEntDiffs(1:L_Max-1) = RelEnts(2:L_Max)-RelEnts(1:L_Max-1);
RelEntRate = RelEntDiffs(L_Max-1);

locations = find(RelEnts==-1,1);

if isempty(locations) ==0
RelEntRate = 1/0;
end
end

%print metrics file
fid = fopen(strcat(DFName,'_Metrics.txt'), 'wt');
if options(4) == 1
fprintf(fid, 'Statistical Complexity: %6f\n', StatComp);
end
if options(5) == 1
fprintf(fid, 'Entropy Rate: %6f\n', EntRate);
end
if options(3) == 1
fprintf(fid, 'Number of States: %i\n', numstates);
end
if options(6) == 1
fprintf(fid, 'Relative Entropy: %6f\n', RelEnt);
end
if options(7) == 1
fprintf(fid, 'Relative Entropy Rate: %6f\n', RelEntRate);
end
if options(8) == 1
fprintf(fid, 'Total Variational Distance: %6f\n', Var);
end
fclose(fid);

```