Code covered by the BSD License  

Highlights from
CAIM Discretization Algorithm

from CAIM Discretization Algorithm by Guangdi Li
CAIM (class-attribute interdependence maximization) is designed to discretize continuous data.

CAIM_Discretization( OriginalData, C )
function [ DiscreData,DiscretizationSet ] = CAIM_Discretization( OriginalData, C )
%CAIM Algorithm
%Given: M examples described by continuous attributes Fi, S classes
%		For every Fi do: 
%Step 1 
%	1.1 find maximum (dn) and minimum (do) values
%	1.2 sort all distinct values of Fi in ascending order and initialize all possible interval 	boundaries, B, with the minimum, maximum, and all the midpoints of all adjacent pairs in the set 
%	1.3 set the initial discretization scheme to D:{[do,dn]}, set variable GlobalCAIM=0 
%Step 2 
%	2.1 initialize k=1 
%	2.2 tentatively add an inner boundary, which is not already in D, from set B, and 	calculate the corresponding CAIM value 
%	2.3 after all tentative additions have been tried, accept the one with the highest 	corresponding value of CAIM
%	2.4 if (CAIM >GlobalCAIM  or  k<S) then update D with the accepted, in step 2.3, 	boundary and set the GlobalCAIM=CAIM, otherwise terminate 
%	2.5 set k=k+1 and go to 2.2
%Result:	Discretization scheme D 

%Paper: Kurgan, L. and Cios, K.J. (2002). CAIM Discretization Algorithm, IEEE Transactions of Knowledge and Data Engineering, 16(2): 145-153
% This code is implemented by Guangdi Li, 2009/06/04


% OriginalData is organized as F1,F2,...,Fm,C1,C2,...,Cn
F = size( OriginalData,2 ) - C ;
M = size( OriginalData,1 );
DiscreData = zeros( M,C+F ); 
DiscreData( :,F+1:F+C ) = OriginalData( :,F+1:F+C );
% Assume the maximum number of interval is M/(3*C)
MaxNumF = floor(M/(3*C));
% Save all the discretization intervals, which is saved in column
DiscretizationSet = zeros( MaxNumF,F );

for p = 1:F

    % Step 1
    %Dn = max( OriginalData( :,p )); % the maximum boundary 
    %Do = min( OriginalData( :,p )); % the minimum boundary   
    SortedInterval = unique( OriginalData( :,p ) );
    B = zeros( 1,length( SortedInterval )-1 );
    Len = length( B );
    for q = 1:Len
        B( q ) = ( SortedInterval( q ) + SortedInterval( q+1 ) )/ 2;
    end
    %B  
    D = zeros( 1,MaxNumF ); % D save all discretizations for variable Fi
    %D( 1 ) = Do; D( 2 ) = Dn; 
    GlobalCAIM = -Inf;
    
    %Step 2
    k=0; % save the number of discretizations in D, the initiate state is 2 
    while true
          CAIM = - Inf; Local = 0;
          for q = 1:Len
              if isempty( find( D( 1:k )==B(q), 1 ) ) == 1                  
                 DTemp = D;
                 DTemp( k+1 ) = B( q );
                 DTemp( 1:( k+1 ) ) = sort( DTemp( 1:( k+1 ) ) );
                 CAIMValue = CAIM_Evaluation( OriginalData,C,p,DTemp( 1:( k+1 ) ) );
                 if CAIM < CAIMValue 
                    CAIM = CAIMValue;
                    Local= q;
                 end           
              end        
          end
         % CAIM
         % GlobalCAIM
          if GlobalCAIM < CAIM && k < MaxNumF 
             GlobalCAIM = CAIM;
             k = k + 1;
             D( k ) = B( Local );
             D( 1:k ) = sort( D( 1:k ) );
          elseif  k <= MaxNumF && k <= C
             k = k + 1;
             D( k ) = B( Local );
             D( 1:k ) = sort( D( 1:k ) );              
          else
              break;
          end   
    end
    DiscretizationSet( 1:k,p )= D( 1:k )';
    % do the discretization process according to intervals in D. 
    DiscreData( :,p ) = DiscretWithInterval( OriginalData,C,p,D( 1:k ) );
    
end

end

Contact us at files@mathworks.com