Code covered by the BSD License  

Highlights from
VChooseKR

image thumbnail
from VChooseKR by Jan Simon
Choose K elements from a vector with repetitions and without order [MEX]

TestVChooseKR(doSpeed)
function TestVChooseKR(doSpeed)
% Automatic test: VChooseKR
% This is a routine for automatic testing. It is not needed for processing and
% can be deleted or moved to a folder, where it does not bother.
%
% TestVChooseKR(doSpeed)
% INPUT:
%   doSpeed: Optional logical flag to trigger time consuming speed tests.
%            Argument omitted: long test.
%            TRUE: short speed test, FALSE: no speed test.
% OUTPUT:
%   On failure the test stops with an error.
%   A table of speed measurements is written to the command window.
%
% The long speed test is really slow, if less than 512 MB free RAM is available.
% Either drink a cup of coffee, buy more RAM or start this function with the
% argument TRUE to reduce the speed testing. Exhausting the physical RAM can
% slow down even subsequent tests with small arrays.
%
% Speed and results of VChooseKR are compared with some other programs from the
% FEX, if they are found in the path. Some of them are not compatible with
% Matlab 6.5 and "crash" is displayed in this case.
%
% The local function VChooseKR_M is a Matlab version of the algorithm. It was
% created to debug the implementation in C and not efficient in Matlab.
%
% Tested: Matlab 6.5, 7.7, 7.8, WinXP
% Author: Jan Simon, Heidelberg, (C) 2009-2010 matlab.THISYEAR(a)nMINUSsimonDOTde
% License: BSD (use/copy/modify on own risk, but mention author)

% $JRev: R0j V:009 Sum:4JCtaJNAqz28 Date:01-Jan-2009 02:48:51 $
% $File: User\JSim\Published\VChooseK_\TestVChooseKR.m $

% Initialize: ==================================================================
ErrID = ['JSim:', mfilename];
LF = char(10);

% Default for the input:
if nargin == 0
   % Include the local M version VChooseKR_M in the speed test:
   % (actually not needed, but might be interesting)
   testLocalMVersion = false;
   doSpeedTest       = true;
   longSpeedTest     = true;
else  % One input argument:
   testLocalMVersion = false;
   doSpeedTest       = any(doSpeed);
   longSpeedTest     = false;
end

% Times for testing:
randDelay = 5.0;       % [sec], time for random tests
if longSpeedTest
   SpeedTime = 1.0;    % [sec], time for speed tests
else
   SpeedTime = 0.25;
end

% Hello:
whichFunc = which('VChooseKR');
disp(['==== Test VChooseKR:  ', datestr(now, 0), LF, ...
      'Version: ', whichFunc, LF]);
[dum1, dum2, fExt] = fileparts(whichFunc);  %#ok<ASGLU>
isMex              = strcmpi(strrep(fExt, '.', ''), mexext);

% Start tests: -----------------------------------------------------------------
% Loop over some K and accepted classes:
KList     = 1:6;
ClassList = {'double', 'single', 'uint32', 'int16', 'int8', 'char'};
for K = KList
   for iClass = 1:length(ClassList)
      aClass = ClassList{iClass};
      disp(['== ', upper(aClass), ', ', sprintf('K = %d:', K)]);
      
      % Standard tests - empty input, cell without populated elements, small
      % known answer test:
      S = VChooseKR(local_cast([], aClass), K);
      if isempty([]) && isa(S, aClass)
         disp('  ok: empty matrix');
      else
         error(ErrID, 'Failed for empty matrix.');
      end
      
      % Choose 0 elements => reply empty matrix:
      S = VChooseKR(local_cast(1:K, aClass), 0);
      if isempty([]) && isa(S, aClass)
         disp('  ok: (1:K, 0)');
      else
         error(ErrID, 'Failed for (1:K, 0).');
      end
      
      % Reply ONES(1, K) if X is [1]:
      S = VChooseKR(local_cast(1, aClass), K);
      if isequal(S, local_cast(ones(1, K), aClass)) && isa(S, aClass)
         disp('  ok: 1');
      else
         error(ErrID, 'Failed for 1.');
      end
      
      S = VChooseKR(local_cast(1:2, aClass), K);
      if local_Test(S, 2, K) && isa(S, aClass)
         disp('  ok: 1:2');
      else
         error(ErrID, 'Failed for 1:2.');
      end

      S = VChooseKR(local_cast(1:K, aClass), K);
      if local_Test(S, K, K) && isa(S, aClass) && ...
            isequal(S, VChooseKR_M(local_cast(1:K, aClass), K))
         disp('  ok: 1:K');
      else
         error(ErrID, 'Failed for 1:K.');
      end
      
      S = VChooseKR(local_cast(1:K + 1, aClass), K);
      if local_Test(S, K+1, K) && isa(S, aClass) && ...
            isequal(S, VChooseKR_M(local_cast(1:K + 1, aClass), K))
         disp('  ok: 1:K+1');
      else
         error(ErrID, 'Failed for 1:K+1.');
      end
      
      if K > 2
         S = VChooseKR(local_cast(1:K - 1, aClass), K);
         if local_Test(S, K - 1, K) && isa(S, aClass) && ...
               isequal(S, VChooseKR_M(local_cast(1:K - 1, aClass), K))
            disp('  ok: 1:K-1');
         else
            error(ErrID, 'Failed for 1:K-1.');
         end
      end
   end  % for list of classes
end  % for list of K

% Random tests: ----------------------------------------------------------------
fprintf('\n== Random tests (%g sec):\n', randDelay);

iniTime = cputime;
nTest   = 0;
while cputime - iniTime < randDelay
   K = round(rand * 5);
   X = 127 * rand(1, floor(rand * 10 + 1));  % < 127 for INT8
   S = VChooseKR(X, K);
   if ~local_Test(S, length(X), K)
      error(ErrID, 'Failed for random test data.');
   end
   nTest = nTest + 1;
end
fprintf('  ok: VChooseKR passed %d random tests arrays.\n', nTest);

% Invalid input: ---------------------------------------------------------------
% The MEX function checks the input, the Matlab version not.
if isMex
   BadInputTest;
end

% Speed: -----------------------------------------------------------------------
if doSpeedTest
   SpeedTest(testLocalMVersion, longSpeedTest, SpeedTime);
end

% Ready:
fprintf('\n==> VChooseKR seems to work fine.\n');

return;

% ******************************************************************************
function BadInputTest

ErrID = ['JSim:', mfilename];
LF    = char(10);

fprintf('\n== Check rejection of bad input:\n');
tooLazy = false;

try  % Bad type of empty input:
   dummy   = VChooseKR({}, 1);  %#ok<*NASGU>
   tooLazy = true;
catch
   disp(['  ok: {} rejected: ', LF, '      ', strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '{} not rejected.');
end

try  % Bad type of input:
   dummy   = VChooseKR({1, 2}, 1);
   tooLazy = true;
catch
   disp(['  ok: {1, 2} rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '{1, 2} not rejected.');
end

try  % Output too large:
   dummy   = VChooseKR(zeros(1, 20), 20);
   tooLazy = true;
catch
   disp(['  ok: Too large input rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, 'Too large input not rejected.');
end

try  % Too few inputs:
   dummy   = VChooseKR(1);
   tooLazy = true;
catch
   disp(['  ok: No input rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, 'No input not rejected.');
end

try  % Too many inputs:
   dummy   = VChooseKR(1:2, 3, 4);
   tooLazy = true;
catch
   disp(['  ok: 3 inputs rejected: ', LF, '      ', ...
      strrep(lasterr, LF, '; ')]);
end
if tooLazy
   error(ErrID, '3 inputs not rejected.');
end

% Further ideas for senseful invalid inputs?
return;

% ******************************************************************************
function SpeedTest(testLocalMVersion, longSpeedTest, SpeedTime)

ErrID = ['JSim:', mfilename];

fprintf('\n== Speed test (test time: %g sec):\n', SpeedTime);
      
% List of function to compare with:
FuncList = { ...
      'COMBINATOR',  'Matt Fig, 30-May-2009'; ...
      'PICK',        'Stefan Stoll, 20-Oct-2006'; ...
      'VChooseKR_M', 'Jan Simon, Matlab version for testing, 04-Jan-2010'; ...
      'VChooseKR',   'Jan Simon, Mex, 04-Jan-2010'};

% Loop over data size and input classes:
if longSpeedTest
   % No UINT(8/16/32) to support COMBINATOR:
   DataClass        = {'int8', 'int16', 'single', 'double'};
   DataClass_sizeof = [1, 2, 4, 8];  % In Bytes

   DataList = { ...
      2, [2, 8, 32, 128, 512, 2048]; ...
      3, [2, 4, 8, 16, 32, 64, 128]; ...
      4, [2, 4, 8, 16, 32]; ...
      5, [2, 4, 8, 16]; ...
      6, [2, 4, 8, 16]; ...
      16, 2; 32, 2; 64, 2};
   
else  % Minimal test only:
   DataClass        = {'double'};
   DataClass_sizeof = 8;  % In Bytes
   
   % Test only NCHOOSEK and VChooseKR:
   tmpInd   = ismember(FuncList(:, 1), {'COMBINATOR', 'VChooseKR'});
   FuncList = FuncList(tmpInd, :);
   
   DataList = { ...
      2, [2, 8, 32, 128, 512]; ...
      3, [2, 4, 8, 16, 32, 64]; ...
      4, [2, 4, 8, 16]; ...
      5, [2, 4, 8]; ...
      6, [2, 4, 8]; ...
      16, 2; 32, 2; 64, 2};
end

% Search for these tools:
Found = true(size(FuncList, 1), 1);
for iFunc = 1:size(FuncList, 1)
   Found(iFunc) = ~isempty(which(FuncList{iFunc, 1}));
end
FuncList = FuncList(Found, :);

% Remove local Matlab version from list of functions if wanted:
if ~testLocalMVersion
   tmpInd              = strcmp(FuncList(:, 1), 'VChooseKR_M');
   FuncList(tmpInd, :) = [];
end

% Show function names with authors:
FuncList_T = transpose(FuncList);
fprintf('%-12s:  %s\n', FuncList_T{:});

TooSlow = sprintf(' %7s', 'slow');
Crashed = sprintf(' %7s', 'crash');
Unknown = sprintf(' %7s', '?');

fprintf('\n"slow": omitted, because previous input length took > 1 second\n');
fprintf('For valid tests > 400MB free RAM is needed!\n');

for iData = 1:size(DataList, 1)
   K       = DataList{iData, 1};
   DataLen = DataList{iData, 2};
   
   fprintf('\n%-22s', sprintf('K=%d, input length:', K));
   fprintf('%8d', DataLen);
   fprintf('\n');
   for iFunc = 1:size(FuncList, 1)
      aFunc = FuncList{iFunc, 1};
      
      for iClass = 1:length(DataClass)
         % NPERMUTEK works for DOUBLE and SINGLE only:
         aClass = DataClass{iClass};
         if strcmpi(aFunc, 'npermutek') && ...
               not(strcmp(aClass, 'double') || strcmp(aClass, 'single'))
            continue;
         end
         
         fprintf('  %-12s %-6s ', aFunc, aClass);
         NPerSec = Inf;
         for aDataLen = DataLen
            if NPerSec < 1  % Give up, if the former loop was too slow already:
               fprintf(TooSlow);
               continue;
            end
            
            % Create vector for specific class:
            aData = local_cast(mod(1:aDataLen, 127), aClass);
            N     = 0;
            try
               switch aFunc
                  case 'COMBINATOR'
                     aClass_aData = local_cast(aDataLen, aClass);
                     if double(aClass_aData) == aDataLen
                        N = GetLoops('combinator', aClass_aData, K, 'c', 'r');
                        tic;
                        for iN = 1:N
                           S = combinator(aClass_aData, K, 'c', 'r');
                           clear('S');
                        end
                        NPerSec = N / toc;  % Loops per second
                        PrintLoop(NPerSec);
                     else  % The current DataLen exceeds INTMAX of the class:
                        fprintf(Unknown);
                     end
                     
                  case 'PICK'
                     % Stefan Stoll, ETH Zurich, 20 October 2006
                     N = GetLoops('pick', aData, K, 'r');
                     tic;
                     for iN = 1:N
                        S = pick(aData, K, 'r');  clear('S');
                     end
                     NPerSec = N / toc;  % Loops per second
                     PrintLoop(NPerSec);
   
                  case 'VChooseKR'
                     N = GetLoops('VChooseKR', aData, K);
                     tic;
                     for iN = 1:N
                        S = VChooseKR(aData, K);  clear('S');
                     end
                     NPerSec = N / toc;  % Loops per second
                     PrintLoop(NPerSec);
                     
                  case 'VChooseKR_M'
                     % Matlab implementation as proof of concept:
                     % See flag: testLocalMVersion
                     N = GetLoops('VChooseKR_M', aData, K);
                     tic;
                     for iN = 1:N
                        S = VChooseKR_M(aData, K);  clear('S');
                     end
                     NPerSec = N / toc;  % Loops per second
                     PrintLoop(NPerSec);
                     
                  otherwise
                     error(ErrID, 'Unknown function.');
               end
               
            catch
               % Most likely the called function is not compatible to the used
               % Matlab version (happened for 6.5 only):
               fprintf(Crashed);
            end
            drawnow;  % Give CTRL-C a chance!
         end  % for aDataLen
         fprintf('  loops/s\n');
         
         % Extra pause to allow the virtual memory to relax:
         if NPerSec < 0.1
            pause(5);
         end
      end  % for DataLen
   end  % for iFunc
end  % for iData: DataList

return;

% ******************************************************************************
function PrintLoop(N)
if N > 10
   fprintf(' %7.0f', N);
else
   fprintf(' %7.1f', N);
end

return;

% ******************************************************************************
function A = local_cast(A, ClassName)
% Simulate CAST for Matlab 6
A = feval(ClassName, A);

return;

% ******************************************************************************
function Ok = local_Test(X, N, K)
% Test validity of combinations with repetitions

Ok = true;
if K == 0 && isempty(X)
   return;
end

% 1. All numbers must appear with the same frequency:
F = histc(X(:), unique(X(:)));
if ~all(F == F(1))
   Ok = false;
end

% 2. Rows must be unique:
if 1
   % DIFF of Matlab 6.5 needs DOUBLE input:
   if sscanf(version, '%d', 1) <= 6
      X = double(X);
   end
   
   if ~all(any(diff(sortrows(X), 1, 1), 2))
      Ok = false;
   end
else  % Alternative method:
   C = max(max(X(:)), K);           %#ok<UNRCH>
   M = X * C .^ transpose(0:K - 1);
   if ~all(diff(M))
      Ok = false;
   end
end

% 3. Size:
if ~isequal(size(X), [NoverK(N + K - 1, K), K])
   Ok = false;
end

return;

% ******************************************************************************
function N = GetLoops(FcnName, Arg1, Arg2, Arg3, Arg4)
% Time to estimate the number of loops to get about 1 sec measurement:
TestTime = 0.2;  % [s]

% Call function through a handle:
Fcn = str2func(FcnName);

N  = 0;
switch nargin
   case 3
      iTime = cputime;
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2);  clear('S');
         N = N + 1;
      end
      N = N / (cputime - iTime);  % Loops per second
      
   case 4
      iTime = cputime;
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2, Arg3);  clear('S');
         N = N + 1;
      end
      N = N / (cputime - iTime);  % Loops per second
   
   case 5
      iTime = cputime;
      while cputime - iTime < TestTime
         S = feval(Fcn, Arg1, Arg2, Arg3, Arg4);  clear('S');
         N = N + 1;
      end
      N = N / (cputime - iTime);  % Loops per second
      
   otherwise
      error(['JSim:', mfilename, ':GetLoops'], 'Bad number of inputs.');
end

N = max(1, round(N));

return;

% ******************************************************************************
function Y = VChooseKR_M(X, K)
% VChooseKR - Combinations of K elements, repetitions [Matlab]
% See VChooseKR for help.
% In Matlab 7.8 the function run fastest for DOUBLEs.
%
% Surprising: VChooseK_M is faster than COMBINATOR for small K and large N, e.g.
% (int16(1:2048), 2): 35 times faster than COMBINATOR!
% But in general the speed of COMBINATOR exceeds this Matlab-version remarkable,
% e.g. (uint8(1:2), 64): COMBINATOR is 32 times faster than VChooseK_M.
% Of course, VChooseK.mex should be used for speed in real calculations.
%
% Tested: Matlab 6.5, 7.7, 7.8, WinXP, [UnitTest]
% Author: Jan Simon, Heidelberg, (C) 2009-2010 matlab.THISYEAR(a)nMINUSsimonDOTde
% License: BSD (use/copy/modify on own risk, but mention author)

% ==============================================================================
% Slower M-version as proof of concept - no input checks here.
X  = X(:);         % Give X a column shape
nX = numel(X);
Y  = X([]);        % Duplicate class of the input
if nX == 0 || K == 0
   return;
end

switch K
   case 1
      Y = X;
      
   case 2
      % Matlab 7: Y = zeros(nY, 3, class(X));
      nY       = round(((nX + 1) * nX) / 2);
      Y(nY, 2) = 0;  % Class of input is kept
      a  = 1;
      for k = 1:nX
         b         = a + nX - k;
         Y(a:b, 1) = X(k);
         Y(a:b, 2) = X(k:nX);
         a         = b + 1;
      end
      
   case 3  % Just insert another loop:
      % Matlab 7: Y = zeros(nY, 3, class(X));
      nY       = round(((nX + 2) * (nX + 1) * nX) / 6);
      Y(nY, 3) = 0;
      a  = 1;
      for k1 = 1:nX
         for k2 = k1:nX
            b         = a + nX - k2;
            Y(a:b, 1) = X(k1);
            Y(a:b, 2) = X(k2);
            Y(a:b, 3) = X(k2:nX);
            a         = b + 1;
         end
      end
      
   case 4   % Insert another loop again:
      % 4.4 times faster than Matlab's NCHOOSEK, but the MEX is 240 times faster
      % for (1:20).
      % Matlab 7: Y = zeros(nY, 3, class(X));
      nY = round(((nX + 3) * (nX + 2) * (nX + 1) * nX) / 24);
      Y(nY, 4) = 0;
      a = 1;
      for k1 = 1:nX
         for k2 = k1:nX
            for k3 = k2:nX
               b         = a + nX - k3;
               Y(a:b, 1) = X(k1);
               Y(a:b, 2) = X(k2);
               Y(a:b, 3) = X(k3);
               Y(a:b, 4) = X(k3:nX);
               a         = b + 1;
            end
         end
      end
      
   otherwise
      % It is trivial to expand this more and more by inserting loops over
      % k4, k5, k6... The general algorithm is more challenging:
      
      % Calculate number of rows and create output:
      nY       = NoverK(nX + K - 1, K);
      Y(nY, K) = 0;  % Keep type of input
      
      % The algorithm is designed in C-style, because it was used to debug the
      % C-Mex function. Just the loop over the K.th column is vectorized. With
      % using the power of Matlab, a much faster algorithm would be possible
      % (see e.g. COMBINATOR of Matt Fig, FEX: 24325). But this is just a dummy
      % for the much faster MEX!
      Index    = ones(1, K);             % Current index
      Limit    = nX(ones(1, K));
      Limit(K) = 0;
      a        = 1;
      while 1
         b = a + nX - Index(K);          % Write index for last column
         for i = 1:(K - 1)               % Write the left K-1 columns
            Y(a:b, i) = X(Index(i));
         end
         Y(a:b, K) = X(Index(K):nX);     % Write the K.th column
         a         = b + 1;              % Move the write pointer
         
         % Search the last column index, which is not exhausted:
         newLoop = find(Index < Limit);
         if isempty(newLoop)             % All columns are filled:
            return;                      % Ready!
         end
         newLoop = newLoop(length(newLoop));
         
         % Fill new Index with new value encreasing by 1:
         Index(newLoop:K) = Index(newLoop) + 1;
      end
end  % switch K

return;

% ******************************************************************************
function m = NoverK(n, k)
% Use a loop to calculate the length of the output. All intermediate values are
% integers to reduce rounding problems.

% (n over k) == (n over n-k), so use the shorter sequence:
if k + k > n
   k = n - k;
end

% Catch K==0 and K==1:
if k <= 1
   if k == 1
      m = n;
   else
      m = 1;
   end
   return;
end

% Idea:
% Intermediate values are not integers:
%   m = prod(((n-k+1):n) ./ (1:k))
% Intemediate values are integers:
%   m = prod((...((n-k+1) * (n-k+2) / 2) * (n-k+3) / 3) * ... * n / k)
% Calculate (n-k+1) * (n-k+2) / 2:
%   m_1 = (n-k+1)
%   m_2 = (n-k+1) * (n-k+2) / 2 = 
%       = (n-k+1) + (n-k+1) * (n - k) / 2 = m_1 + m_1 * (n - k) / 2
d = n - k;
m = d + 1;
for i = 2:k
   m = m + (m * d) / i;
end

return;

Contact us