Code covered by the BSD License  

Highlights from
Semi-supervised Affinity Propagation clustering

from Semi-supervised Affinity Propagation clustering by Kaijun Wang
embed Silhouette index into iterations of Affinity propagation clustering to supervise its running

test_APclustering.m
% Improved programs of Affinity Propagation clustering;
% original Affinity Propagation (see Frey & Dueck, Science, Feb. 2007)
% Note: Statistics Toolbox of Matlab needs to be installed
% Kaijun WANG: sunice9@yahoo.com, April 2007.

clear;
alg = 1;      % 1 --- Improved AP, 0 --- original AP
type = 1;   % 1 - using Euclidean distances for general data;
% 2 - Pearson correlation coefficients for gene data (see row 35)

% selecting a data set, rows - data points, columns - dimensions
id = 1;

switch id
%(1) general simulated data, Euclidean distances, true labels in 1st column
  case 1
     sw='4k2_far.txt';  % true number of clusters is 4
  case 2
     sw='yourdata.txt';

  case 11           % true labels being unknown (1st column is data too)
     sw='yourdata.txt'; 
     
    
%(2) real gene data, Pearson distances, true labels in 1st column
  case 21
     sw='leuk72_3k.txt';  % true number of clusters is 3
  case 22
     sw='yourdata.txt'; %

  case 31            % true labels being unknown (1st column is data too)
     sw='yourdata.txt'; 
end

% initialization
if id > 20
   type = 2;
end
data = load(sw);
[nrow, dim] = size(data);
truelabels = ones(nrow,1);
if id < 11 || (id > 20 && id < 30)  % when 1st column is class labels
   truelabels = data(:,1); 
   data = data(:,2:dim);
   dim = dim-1;
end

if type == 1
   [Dist, dmax] = similarity_euclid(data,2);
   R = 'euclidean';  
else
   Dist = 1-(1+similarity_pearson(data'))/2;
   dmax = 1;
   R = 'correlation';
end

nap = nrow*nrow-nrow;
M = zeros(nap,3);
j=1;
for i=1:nrow
   for k = [1:i-1,i+1:nrow]
     M(j,1) = i;
     M(j,2) = k; 
     M(j,3) = -Dist(i,k);
     j = j+1;
   end;
 end;
Dist = [];

p = median(M(:,3));                     % Set preference to median similarity
pmin = -dmax;
pstep = pmin*0.1;                       % decreasing step of p


disp(' '); disp('==> Clustering is running, please wait ...');
if alg
  [label,NC,Sil] = apcluster_imp(M, p, pstep, pmin, ...
    data, R, 'convits', 300,'maxits',1000); %'plot',
  fprintf('\n## Clustering solution by Improved Affinity Propagation:\n');

else
  [label,n,d,e,unconverged] = apcluster(M, p, 'convits', 300, 'maxits',1000);   
  if unconverged
  fprintf('\n*** Dampfact is increased to 0.7, and clustering is re-running ...');
  label = apcluster(M, p, 'convits', 300, 'maxits',1000', 'dampfact', 0.7);
  end
  C = unique(label);
  NC = length(C);
  [C, label] = ind2cluster(label);
  fprintf('$$ Clustering solution by original Affinity Propagation:\n');
end
M = [];

fprintf('Number of clusters is %d\n',NC);
C = valid_external(label, truelabels);
fprintf('Fowlkes-Mallows validity index: %f\n', C(4));
valid_errorate(label, truelabels);

Contact us at files@mathworks.com