Code covered by the BSD License  

Highlights from
VChooseKO

image thumbnail
from VChooseKO by Jan Simon
Choose K elements from a vector without repetitions and with order [MEX]

TestVChooseKO(doSpeed)
function TestVChooseKO(doSpeed)
% Automatic test: VChooseKO
% This is a routine for automatic testing. It is not needed for processing and
% can be deleted or moved to a folder, where it does not bother.
%
% TestVChooseKO(doSpeed)
% INPUT:
%   doSpeed: Optional logical flag to trigger time consuming speed tests.
%            Argument omitted: long test.
%            TRUE: short speed test, FALSE: no speed test.
% OUTPUT:
%   On failure the test stops with an error.
%   A table of speed measurements is written to the command window.
%
% The long speed test is really slow, if less than 512 MB free RAM is available.
% Either drink a cup of coffee, buy more RAM or start this function with the
% argument TRUE to reduce the speed testing. Exhausting the physical RAM can
% slow down even subsequent tests with small arrays.
%
% Speed and results of VChooseKO are compared with some other programs from the
% FEX, if they are found in the path. Some of them are not compatible with
% Matlab 6.5 and "crash" is displayed in this case.
%
% The local function VChooseKO_M is a Matlab version of the algorithm. It was
% created to debug the implementation in C and not efficient in Matlab.
% VChooseKO_M_shift was used to examine the method suggested by Knuth for
% calculating 1 permutation. It is neither used for processing nore for testing.
%
% Tested: Matlab 6.5, 7.7, 7.8, WinXP
% Author: Jan Simon, Heidelberg, (C) 2010 matlab.THISYEAR(a)nMINUSsimon.de
% License: BSD (use/copy/modify on own risk, but mention author)

% $JRev: R0l V:011 Sum:Fu0BAIcjLSSb Date:14-Jan-2010 17:26:30 $
% $File: Published\VChooseK_\TestVChooseKO.m $

% Initialize: ==================================================================
ErrID = ['JSim:', mfilename];
LF = char(10);

% Default for the input:
if nargin == 0
   % Include the local M version VChooseKO_M in the speed test:
   % (actually not needed, but might be interesting)
   testLocalMVersion = false;  % M-version for debugging/teaching only
   doSpeedTest       = true;
   longSpeedTest     = true;
else  % One input argument:
   testLocalMVersion = false;
   doSpeedTest       = any(doSpeed);
   longSpeedTest     = false;
end

% Times for testing:
randDelay = 5.0;       % [sec], time for random tests
if longSpeedTest
   SpeedTime = 1.0;    % [sec], time for speed tests
else
   SpeedTime = 0.25;
end

% Hello:
whichFunc = which('VChooseKO');
disp(['==== Test VChooseKO:  ', datestr(now, 0), LF, ...
      'Version: ', whichFunc, LF]);

% Start tests: -----------------------------------------------------------------
% Loop over accepted classes:
ClassList = {'double', 'single', 'uint32', 'int16', 'int8', 'char'};
for iClass = 1:length(ClassList)
   aClass = ClassList{iClass};
   disp(['== ', upper(aClass)]);
   
   % Standard tests - empty input, cell without populated elements, small
   % known answer test:
   S = VChooseKO(local_cast([], aClass), 0);
   if isempty([]) && isa(S, aClass)
      disp('  ok: empty matrix, K=0');
   else
      error(ErrID, 'Failed for empty matrix, K=0.');
   end
   
   S = VChooseKO(local_cast([], aClass), 2);
   if isempty([]) && isa(S, aClass)
      disp('  ok: empty matrix, K=2');
   else
      error(ErrID, 'Failed for empty matrix, K=2.');
   end

   S = VChooseKO(local_cast(1, aClass), 0);
   if isempty([]) && isa(S, aClass)
      disp('  ok: (1, 0)');
   else
      error(ErrID, 'Failed for (1, 0).');
   end

   S = VChooseKO(local_cast(1:4, aClass), 0);
   if isempty([]) && isa(S, aClass)
      disp('  ok: (1:4, 0)');
   else
      error(ErrID, 'Failed for (1:4, 0).');
   end
   
   S = VChooseKO(local_cast(1, aClass), 1);
   if isequal(S, local_cast(1, aClass)) && isa(S, aClass)
      disp('  ok: 1');
   else
      error(ErrID, 'Failed for 1.');
   end
   
   S = VChooseKO(local_cast(1:2, aClass), 1);
   if local_Test(S, 2, 1) && isa(S, aClass) ...
         && isequal(S, local_cast([1;2], aClass))
      disp('  ok: 1:2');
   else
      error(ErrID, 'Failed for 1:2.');
   end
   
   S = VChooseKO(local_cast(1:2, aClass), 2);
   if local_Test(S, 2, 2) && isa(S, aClass) && ...
         isequal(S, local_cast([1, 2; 2, 1], aClass))
      disp('  ok: 1:2');
   else
      error(ErrID, 'Failed for 1:2.');
   end

   S = VChooseKO(local_cast(1:3, aClass), 2);
   if local_Test(S, 3, 2) && isa(S, aClass) && ...
         isequal(S, VChooseKO_M(local_cast(1:3, aClass), 2))
      disp('  ok: (1:3, 2)');
   else
      error(ErrID, 'Failed for (1:3, 2).');
   end
   
   S = VChooseKO(local_cast(1:3, aClass), 3);
   if local_Test(S, 3, 3) && isa(S, aClass) && ...
         isequal(S, VChooseKO_M(local_cast(1:3, aClass), 3))
      disp('  ok: (1:3, 3)');
   else
      error(ErrID, 'Failed for (1:3, 3).');
   end

   for K = 1:6
      S = VChooseKO(local_cast(1:6, aClass), K);
      if local_Test(S, 6, K) && isa(S, aClass) && ...
            isequal(S, VChooseKO_M(local_cast(1:6, aClass), K))
         fprintf('  ok: (1:6, %d)\n', K);
      else
         error(ErrID, 'Failed for (1:6, %d).', K);
      end
   end
   
   S = VChooseKO(local_cast(0:127, aClass), 2);
   if local_Test(S, 128, 2) && isa(S, aClass) && ...
         isequal(S, VChooseKO_M(local_cast(0:127, aClass), 2))
      disp('  ok: (0:127, 2)');
   else
      error(ErrID, 'Failed for (0:127, 2).');
   end

end  % for list of classes

% Random tests: ----------------------------------------------------------------
% Catch cases I haven't thought of during the known answer tests above:
fprintf('\n== Random tests (%g sec):\n', randDelay);

iniTime = cputime;
nTest   = 0;
while cputime - iniTime < randDelay
   N = 3 + round((rand * 4) ^ 3);
   K = N;
   for iK = 3:N
      OutLen = prod(N - iK + 1:N);
      if OutLen > 1e6 || rand > 0.9
         K = iK - 1;
         break;
      end
   end
   fprintf('  N=%3d  K=%3d\n', N, K);
   
   X = 127 * rand(1, N);  % < 127 for INT8
   S = VChooseKO(X, K);
   if ~local_Test(S, N, K)
      error(ErrID, 'Failed for random test data.');
   end
   clear('S');
   nTest = nTest + 1;
end
fprintf('  ok: VChooseKO passed %d random tests arrays.\n', nTest);

% Invalid input: ---------------------------------------------------------------
fprintf('\n== Check rejection of bad input:\n');
tooLazy = false;

try  % Bad type of empty input:
   dummy   = VChooseKO({}, 1);  %#ok<*NASGU>
   tooLazy = true;
catch
   disp(['  ok: {} rejected: ', LF, '      ', strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '{} not rejected.');
end

try  % Bad type of input:
   dummy   = VChooseKO({1, 2}, 1);
   tooLazy = true;
catch
   disp(['  ok: {1, 2} rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '{1, 2} not rejected.');
end

try  % Output too large:
   dummy   = VChooseKO(zeros(1, 10), 15);
   tooLazy = true;
catch
   disp(['  ok: Too large output rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, 'Too large ouput not rejected.');
end

try  % Too few inputs:
   dummy   = VChooseKO(1);
   tooLazy = true;
catch
   disp(['  ok: No input rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, 'No input not rejected.');
end

try  % Too many inputs:
   dummy   = VChooseKO(1:2, 3, 4);
   tooLazy = true;
catch
   disp(['  ok: 3 inputs rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '3 inputs not rejected.');
end

% Further ideas for senseful invalid inputs?

% Speed: -----------------------------------------------------------------------
if doSpeedTest
   SpeedTest(testLocalMVersion, longSpeedTest, SpeedTime);
end

% Ready:
fprintf('\n==> VChooseKO seems to work fine.\n');

return;

% ******************************************************************************
function SpeedTest(testLocalMVersion, longSpeedTest, SpeedTime)

ErrID   = ['JSim:', mfilename];
MatlabV = sscanf(version, '%f', 1);

fprintf('\n== Speed test (test time: %g sec):\n', SpeedTime);
      
% List of function to compare with:
FuncList = { ...
      'PERMS',       'Matlab toolbox (only K==N)'; ...
      'PICK',        ...
      'Stefan Stoll, 20-Oct-2006 (same speed for int/single/double)'; ...
      'COMBINATOR',  'Matt Fig, 30-May-2009'; ...
      'VChooseKO_M', 'Jan Simon, Matlab version for testing, 15-Jan-2009'; ...
      'VChooseKO',   'Jan Simon, Mex, 15-Jan-2009'};

% Loop over data size and input classes:
if longSpeedTest
   % No UINT(8/16/32) to support COMBINATOR:
   DataClass        = {'int8', 'int16', 'single', 'double'};
   DataClass_sizeof = [1, 2, 4, 8];  % In Bytes
   
   DataSize = { ...
         2, [2, 4, 8, 16, 32, 64, 128, 1024]; ...
         3, [3, 4, 8, 16, 32, 64, 128]; ...
         4, [4, 8, 16, 32]; ...
         5, [5, 8, 16]; ...
         6, [6, 8]; ...
         7, [7, 8]; ...
         8, 8; ...
         9, 9};
   
else  % Reduced tests only:
   DataClass        = {'int8', 'double'};
   DataClass_sizeof = 8;  % In Bytes
   
   DataSize = { ...
         2, [2, 4, 8, 16, 32, 64, 128, 256, 512]; ...
         4, [4, 8, 16, 32]; ...
         6, [6, 8]; ...
         8, 8};
   
   % Test only COMBINATOR and VChooseKO:
   tmpInd   = ismember(FuncList(:, 1), {'PERMS', 'COMBINATOR', 'VChooseKO'});
   FuncList = FuncList(tmpInd, :);
end

% Search for these tools:
Found = true(size(FuncList, 1), 1);
for iFunc = 1:size(FuncList, 1)
   Found(iFunc) = ~isempty(which(FuncList{iFunc, 1}));
end
FuncList = FuncList(Found, :);

% Remove local Matlab version from list of functions if wanted:
if ~testLocalMVersion
   tmpInd              = strcmp(FuncList(:, 1), 'VChooseKO_M');
   FuncList(tmpInd, :) = [];
end

% Show function names with authors:
FuncList_T = transpose(FuncList);
fprintf('%-12s:  %s\n', FuncList_T{:});

TooSlow = sprintf(' %8s', 'slow');
Crashed = sprintf(' %8s', 'crash');
Unknown = sprintf(' %8s', '-');
Omitted = sprintf(' %8s', 'omit');

fprintf('\n"slow": omitted, because previous data length took > 1 second\n');
fprintf('"-":    Not matching data type/size or Matlab version.\n'); 
fprintf('"omit": Caused a time-consuming crash on my computer ever.');
fprintf('\n');

for iSize = 1:length(DataSize)
   K       = DataSize{iSize, 1};
   DataLen = DataSize{iSize, 2};
   
   fprintf('\n%-22s', sprintf('K=%d, data length:', K));
   fprintf(' %8d', DataLen);
   fprintf('\n');
   for iFunc = 1:size(FuncList, 1)
      aFunc = FuncList{iFunc, 1};
      
      for iClass = 1:length(DataClass)
         aClass = DataClass{iClass};
         
         % Test PICK for one class only, because the speed does not change:
         if strcmpi(aFunc, 'pick') && ~strcmpi(aClass, 'double');
            continue;
         end
         
         % PERMS has same speed for all input types:
         if strcmpi(aFunc, 'perms') && ~strcmpi(aClass, 'double');
            continue;
         end
         
         % NPerSec stores the duration of the test for the last Data length. It
         % helps to avoid too slow computations to limit the duration of the
         % speed test:
         NPerSec = Inf;  % Set to arbitrary large value initially
         
         % Show the class andthe data lengths and start the loop:
         fprintf('  %-12s %-6s ', aFunc, aClass);
         for aDataLen = DataLen
            if NPerSec < 1  % Give up, if the former loop was too slow already:
               fprintf(TooSlow);
               continue;
            end
            
            % Create vector for specific class:
            aData = local_cast(mod(1:aDataLen, 127), aClass);
            N     = 0;
            try
               switch aFunc
                  case 'COMBINATOR'
                     aClass_aData = local_cast(aDataLen, aClass);
                     if double(aClass_aData) == aDataLen && (MatlabV > 7.0)
                        N = GetLoops('combinator', aClass_aData, K, 'p');
                        tic;
                        for iN = 1:N
                           S = combinator(aClass_aData, K, 'p');
                           clear('S');
                        end
                        NPerSec = N / (toc + eps);  % Loops per second
                        PrintLoop(NPerSec);
                     else  % The current DataLen exceeds INTMAX of the class:
                        fprintf(Unknown);
                     end
                     
                  case 'PICK'
                     % Stefan Stoll, ETH Zurich, 20 October 2006
                     % GetLoops fails for PICK sometimes on my computer (512MB
                     % RAM) most likely due to disk caching. Calling it twice
                     % reduces the risk of a wrong measurement, but it does not
                     % really solve the problem.
                     if K >= 8  % Time-consuming crash on my computer...
                        fprintf(Omitted)
                     else
                        N1 = GetLoops('pick', aData, K, 'o');
                        N2 = GetLoops('pick', aData, K, 'o');
                        N  = max(N1, N2);
                        
                        tic;
                        for iN = 1:N
                           S = pick(aData, K, 'o');  clear('S');
                        end
                        NPerSec = N / (toc + eps);  % Loops per second
                        PrintLoop(NPerSec);
                     end
                     
                  case 'VChooseKO'
                     N = GetLoops('VChooseKO', aData, K);
                     tic;
                     for iN = 1:N
                        S = VChooseKO(aData, K);  clear('S');
                     end
                     NPerSec = N / (toc + eps);  % Loops per second
                     PrintLoop(NPerSec);
                     
                  case 'VChooseKO_M'
                     % Matlab implementation as proof of concept:
                     % See flag: testLocalMVersion
                     N = GetLoops('VChooseKO_M', aData, K);
                     tic;
                     for iN = 1:N
                        S = VChooseKO_M(aData, K);  clear('S');
                     end
                     NPerSec = N / (toc + eps);  % Loops per second
                     PrintLoop(NPerSec);
                     
                  case 'PERMS'
                     if K == aDataLen
                        N = GetLoops('perms', aData);
                        tic;
                        for iN = 1:N
                           S = perms(aData);  clear('S');
                        end
                        NPerSec = N / (toc + eps);  % Loops per second
                        PrintLoop(NPerSec);
                     else
                        fprintf(Unknown);
                     end
                     
                  otherwise
                     error(ErrID, 'Unknown function.');
               end
               
            catch
               % Most likely the called function is not compatible to the used
               % Matlab version (happened for 6.5 only):
               fprintf(Crashed);
            end
            drawnow;  % Give CTRL-C a chance!
         end  % for aDataLen
         fprintf('  loops/s\n');
         
         % Extra pause to allow the virtual memory to relax:
         if NPerSec < 0.1
            pause(5);
         end
      end  % for DataLen
   end  % for iFunc
end  % for iData: DataList

return;

% ******************************************************************************
function PrintLoop(N)
if N > 10
   fprintf(' %8.0f', N);
else
   fprintf(' %8.1f', N);
end

return;

% ******************************************************************************
function A = local_cast(A, ClassName)
% Simulate CAST for Matlab 6
A = feval(ClassName, A);

return;

% ******************************************************************************
function Ok = local_Test(X, N, K)
% Test validity of permutations

Ok = true;
if K == 0 && isempty(X)
   return;
end

% 1. After sorting, all columns are equal. This implies equal column sums.
X  = double(X);  % No SINGLE and INT arithmetics in Matlab 6.5.
S  = sort(X, 1);
S1 = S - S(:, ones(1, size(S, 2)));
if any(S1(:))
   Ok = false;
end

% 2. Rows must be unique:
if 1
   if ~all(any(diff(X, 1, 1), 2))
      Ok = false;
   end
else  % Alternative method:
   C = max(max(X(:)), K);           %#ok<UNRCH>
   M = X * C .^ transpose(0:K - 1);
   if ~all(diff(M))
      Ok = false;
   end
end

% 3. Size must be [N*(N-1)*..*(N-K+1), K]
if ~isequal(size(X), [prod(N-K+1:N), K])
   Ok = false;
end

return;

% ******************************************************************************
function N = GetLoops(FcnName, Arg1, Arg2, Arg3, Arg4)
% Time to estimate the number of loops to get about 1 sec measurement:
TestTime = 0.2;  % [s]

% Call function through a handle:
Fcn = str2func(FcnName);

N     = 0;
iTime = cputime;
switch nargin
   case 2
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1);  clear('S');
         N = N + 1;
      end
      
   case 3
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2);  clear('S');
         N = N + 1;
      end
      
   case 4
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2, Arg3);  clear('S');
         N = N + 1;
      end
      
   case 5
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2, Arg3, Arg4);  clear('S');
         N = N + 1;
      end
      
   otherwise
      error(['JSim:', mfilename, ':GetLoops'], 'Bad number of inputs.');
end

N = N / (cputime - iTime);  % Loops per second
N = max(1, round(N));

return;


% ******************************************************************************
function Y = VChooseKO_M_shift(V, k)  %#ok<DEFNU>
% Method suggested by Knuth.
% The result is not lexicographically ordered.
% Slower than the nested loop approach.
% This function is not called for the testing, but stored here for completeness.
%
% Tested: Matlab 6.5, 7.7, 7.8
% Author: Jan Simon, Heidelberg, (C) 2010 matlab.THISYEAR(a)nMINUSsimon.de

if k == 1
   Y = V(:);
   return;
end

V = reshape(V, 1, []);
n = numel(V);
km1 = k - 1;
km2 = k - 2;
kp1 = k + 1;

nk1  = n - k + 1;
nY   = prod(nk1:n);     % Size of output: N*(N-1)*..*(N-K+1)
Y    = zeros(nY, k);

Step  = prod(1:nk1);
T     = zeros(1, km1);
T2    = T;
Limit = n-1:-1:n-k+1;

general = 0;

S  = V;
S2 = S;
iY = 0;   % Index for output
while iY < nY
   if general
      for j = 1:km2
         tempJ = T2(j);
         if tempJ == 1
            jt    = j + 1;  % Simple SWAP:
            tempS = S(jt);
            S(jt) = S(j);
            S(j)  = tempS;
         elseif tempJ
            jt        = j + tempJ;  % Cyclical shift to the right: S(j:jt)
            tempS     = S(jt);
            S(j+1:jt) = S(j:jt-1);
            S(j)      = tempS;
         end
      end
      S2 = S;
   end
   
   tempJ = T2(km1);
   if tempJ == 1
      jt    = k;  % Simple SWAP:
      tempS = S(jt);
      S(jt) = S(km1);
      S(km1)  = tempS;
   elseif tempJ
      jt      = km1 + tempJ;  % Cyclical shift to the right: S(j:jt)
      tempS   = S(jt);
      S(k:jt) = S(km1:jt-1);
      S(km1)  = tempS;
   end
   
   iY       = iY + 1;  % Output of first K elements of S
   Y(iY, :) = S(1:k);
   
   for w = kp1:n       % Insert elements K+1:N of S as last element
      iY           = iY + 1;
      Y(iY, 1:km1) = S(1:km1);
      Y(iY, k)     = S(w);
   end
   
   z = km1;
   while z > 0
      if T(z) < Limit(z)
         T(z) = T(z) + 1;
         break;
      else
         T(z) = 0;
         z    = z - 1;
      end
   end
   
   % If the last element is changed, the former [S] can be used again to save
   % the time for the first K-2 permutations:
   if z == km1
      general = 0;
      T2 = T;
      T2(1:km1-1) = 0;
      S  = S2;
   else
      general = 1;
      T2 = T;
      S  = V;
   end
end

return;

% ******************************************************************************
function Y = VChooseKO_M(X, k)
% Nested loop approach.
% This Matlab implementation can compete with Matt Fig's COMBINATOR for small N
% and K, e.g. (3, 3), but not for real world problems.
% This is thought for testing, teaching and debugging only. The algorithm is
% equivalent to the implementation in VChooseKO.c.
%
% Tested: Matlab 6.5, 7.7, 7.8
% Author: Jan Simon, Heidelberg, (C) 2010 matlab.THISYEAR(a)nMINUSsimon.de

on  = true;
off = false;

if k == 1
   Y = X(:);
   return;
end

X = reshape(X, 1, []);
n = numel(X);

km1 = k - 1;
nY  = prod(n - k + 1:n);     % Size of output: N*(N-1)*..*(N-K+1)
Y   = zeros(nY, k);

iY  = 0;
Q   = true(1, n);
Q(1:km1) = off;

V = 1:k;   % List of indices: K for loops
S = X(1:k);
while iY < nY
   % Copy first K-1 values from current S, insert last N-K+1 values 
   % to the last position:
   for iN = 1:n
      if Q(iN)
         S(k)     = X(iN);
         iY       = iY + 1;
         Y(iY, :) = S;
      end
   end
   
   % Find right most loop which hasn't reached the limit:
   c     = km1;
   found = off;
   while 1
      cK    = V(c);
      Q(cK) = on;
      if cK < n
         for jK = cK + 1:n
            if Q(jK)
               V(c)  = jK;
               Q(jK) = off;
               S(c)  = X(jK);
               found = on;
               break;
            end
         end
         if found
            break;
         end
      end
      c = c - 1;
      if c == 0
         return;
      end
   end
   
   % Restart higher loops:
   for c = c + 1:km1
      for jK = 1:n
         if Q(jK)
            V(c)  = jK;
            S(c)  = X(jK);
            Q(jK) = off;
            break;
         end
      end
   end
end

return;

Contact us at files@mathworks.com