Code covered by the BSD License  

Highlights from
Huffman Encoder

Huffman Encoder

by

 

Input-number of occurrences or probabilities in a row matrix form Output-avelength,codewords,probab

huffmankasan.m
function[codex tau]=huffmankasan(extension)
%Created by Jamil Kasan from Manila,Philippines
%This is a huffman encoder which you can either input probablities or number 
%of occurences
%Input should be in a row matrix form e.g. huffmankasan([11,22,111,33])
%The code outputs the probabilities, average length and the codewords
%This is faster than huffmandict and has lesser limitations

%Proof of speed
%aa=[0.1,0.2,0.3,0.4]
%aa =
%   0.1000    0.2000    0.3000    0.4000
%tic;
%huffmankasan(aa)
%toc
%      Probabilities        Length
%   0.400000000000000   1.000000000000000
%   0.300000000000000   2.000000000000000
%   0.200000000000000   3.000000000000000
%   0.100000000000000   3.000000000000000
%|-----------------------------------------------------------|
%         Average Codeword Length = 1.9 bits/symbol
%|-----------------------------------------------------------|
%
%ans = 
%
%    {1x1 cell}
%    {1x1 cell}
%    {1x1 cell}
%    {1x1 cell}
%Elapsed time is 0.006584 seconds.

%tic;
%huffmandict(1:4,aa)
%toc
%ans = 
%    [1]    [1x3 double]
%    [2]    [1x3 double]
%    [3]    [1x2 double]
%    [4]    [         1]
%Elapsed time is 0.019976 seconds.

%To try its limitations play the huffmancode file and type 2 for 2nd
%extension, it's  about 7569 probabilities
%huffmandict errors with that file

probs=sum(extension);
%total count
actualprobs=sort(extension./probs,'descend');
%count per char/total count, probability sorted from highest
lext=length(extension);
%number of probabilities or occurences
codeallot=zeros(1,lext);
x=sort(extension,'descend');
%occurence sorted from highest
xsort=x;
%copy of sorted extension made as basis
Hs=0;

for i=1:lext
   Hs=Hs+ xsort(i)*log2(1/xsort(i));
   %compute for the entropy
end

while length(x)~=1
    %adds all probabilities until there's only one value
    lx=length(x);
    q=lext; 
    %for this particular file, the first value of q=87 if "1" is chosen
    %a copy of the total chars used
    j=xsort(lext);
    %smallest probability value (first val: 87th)
    while x(lx)~=j
        %two matrices were compared, x(lx) denotes the smallest probability
        %of the given
        q=q-1;
        %decrements the count of total characters used i.e. q=87-1=86
        j=j+xsort(q); 
        %i.e. 87th + 86th value of the probabilities
        %the last probability value i.e the smallest probability value is
        %added to the probability before it (the next probability with the
        %smallest value)
    end
    for o=q:lext
        %i.e first val: 86:87
        %codeallot = code allotment matrix; 
        if codeallot(o)~=0
            %"7" denotes the code bit "1"
            codeallot(o)=codeallot(o)+7*10^ceil(log10(codeallot(o)));
            %if the present codeallot(o) line say, codeallot(86) already has a
            %value, for example "7", another bit will be added i.e.
            %previous val:7 multiplied to 10^ceil(log10(7))
            %7 + 70 = 77, "77" denotes the code "11"
        else
            codeallot(o)=7;
            %if there is no previous value, codeallot(o) automatically gives
            %a value of "7" 
        end
    end
    p=q-1;
    %i.e first val: p=86-1=85
    %decrements q again but with a different variable
    k=xsort(p);
    %first val: k=xsort(85)
    %k= pth value of the extension
    while k~=x(lx-1)
        %if the value of k say k=xsort(85) is not yet equal to the value of
        %x(lx-1) say x(86), k will be added to the decremented pth value say
        %xsort(84) and so on until the value of k matches the value of x(lx-1)
        p=p-1;
        k=k+xsort(p);
    end
    for o=p:q-1
        %for the grouped values xsort(p:q-1) i.e xsort(84) to
        %xsort(86) if their sum exceeded the preceeding probabilities,
        %say sum(xsort(p:q-1))>xsort(p-1), their corresponding xsort(p:q-1)
        %code allotment matrix will be given a value of "6" w/c denotes the
        %code bit"0"
        if codeallot(o)~=0
            codeallot(o)=codeallot(o)+6*10^ceil(log10(codeallot(o)));
            %i.e, 6+60 = 66 denotes "00"
        else
            codeallot(o)=6;
        end
    end
    xx=x(lx)+x(lx-1);
    %smallest probabilities added
    x(lx-1)=xx;
   
    h=1;
    x=sort(x(1:lx-1),'descend');
    while x(h)==xsort(h)
        %if basis (xsort = original probab matrix) is still equal to x
        %(matrix with added probabilities)
        h=h+1;    
    end
    if length(x)~=1
        if p~=h
            %probabilities move w/r to x position together with its alloted
            %codewords
            xsort=[xsort(1:h-1) xsort(p:lext) xsort(h:p-1)];
            codeallot=[codeallot(1:h-1) codeallot(p:lext) codeallot(h:p-1)];
        end
    end
 
end
codeallot=sort(codeallot);
tau=0;
lente=zeros(1,lext);
for t=1:lext
    lente(t)=ceil(log10(codeallot(t)));
    %length of each codeword
    tau=tau+ceil(log10(codeallot(t)))*actualprobs(t);
end
actprobs=transpose(actualprobs);
codexx=transpose(codeallot);
lcodex=transpose(lente);
c={'1','0'};
%It is only a creation of strings of 1's and 0's to correct the bits made
%e.g. 66 ==> 00 7677 ==> 1011
for i=1:length(actprobs)
    u=1;
   for t=lcodex(i):-1:1
       f=floor(codexx(i)/10^(t-1));
       codexx(i)=mod(codexx(i),10^(t-1));       
       if f==6
           if u==1
                d=c{2};
           else
                d=[d c{2}];
           end
       else
           if u==1
                d=c{1};
           else
                d=[d c{1}];
           end
       end
       codex{i,:}={d};
       u=u+1;
   end
end
format long
all=[actprobs, lcodex];
disp(['      Probabilities','        Length'])
disp(all)
format short
disp('|-----------------------------------------------------------|')
disp(['         Average Codeword Length = ',num2str(tau),' bits/symbol'])
disp('|-----------------------------------------------------------|')

Contact us