Code covered by the BSD License  

Highlights from
CStrAinBP

image thumbnail

CStrAinBP

by

 

09 Jun 2009 (Updated )

Overlapping elements of 2 cell strings. 10-20 times faster than INTERSECT/ISMEMBER/SETDIFF.

TestCStrAinBP(doSpeed)
function TestCStrAinBP(doSpeed)
% Test CStrAinBP
% Unit-test for CStrAinBP: Call the function with a variety of valid and invalid
% inputs and compare the results with the expectations.
% This function stops with an error if a problem occurs.
%
% Tested: Matlab 6.5, 7.7, 7.8
% Author: Jan Simon, Heidelberg, (C) 2009 J@n-Simon.De

% $JRev: R0i V:052 Sum:52B1CE4E Date:13-Sep-2009 02:09:06 $
% $File: Tools\UnitTests_\TestCStrAinBP.m $

% ==============================================================================
% Test speed if no argument used:
if nargin == 0
   doSpeed = true;
end

% Parameters to control, which tests are processed:
if doSpeed
   TestTime = 2.0;  % Seconds for testing sequence
else
   TestTime = 0.1;
end

% Hello:
disp(['==== Test CStrAinBP:  ', datestr(now, 0)]);
pause(0.001);  % Funny Matlab6.5 acceleration! Java sync problem?!

% Display which version (Matlab or MEX) is currently enabled:
[dum, fcnName, fcnExt] = fileparts(which('CStrAinBP')); %#ok<ASGLU>
if strcmpi(fcnExt, '.m')
   disp(['  Matlab version: ', fcnName, fcnExt]);
else
   disp(['  MEX version: ', fcnName, fcnExt]);
end
fcnType = strrep(fcnExt, '.', '');

% ------------------------------------------------------------------------------
disp('== Specific tests:');
ai = CStrAinBP({}, {});
if length(ai)
   error([mfilename, ': Problem: ai = CStrAinBP({}, {})']);
end
[ai, bi] = CStrAinBP({}, {});
if length(ai) || length(bi)
   error([mfilename, ': Problem: [ai, bi] = CStrAinBP({}, {})']);
end

ai = CStrAinBP({'1', '2'}, {'1'});
if isequal(ai, 1) == 0
   error([mfilename, ': Problem: ai = CStrAinBP({1, 2}, {1})']);
end
[ai, bi] = CStrAinBP({'1', '2'}, {'1'});
if isequal(ai, 1) == 0 || isequal(bi, 1) == 0
   error([mfilename, ': Problem: [ai, bi] = CStrAinBP({1, 2}, {1})']);
end
ai = CStrAinBP({'1', '2'}, {'1'}, 'i');
if isequal(ai, 1) == 0
   error([mfilename, ': Problem: ai = CStrAinBP({1, 2}, {1}, i)']);
end
[ai, bi] = CStrAinBP({'1', '2'}, {'1'}, 'i');
if isequal(ai, 1) == 0 || isequal(bi, 1) == 0
   error([mfilename, ': Problem: [ai, bi] = CStrAinBP({1, 2}, {1}, i)']);
end

ai = CStrAinBP({'1', '2'}, {'2'});
if isequal(ai, 2) == 0
   error([mfilename, ': Problem: ai = CStrAinBP({1, 2}, {2})']);
end
[ai, bi] = CStrAinBP({'1', '2'}, {'2'});
if isequal(ai, 2) == 0 || isequal(bi, 1) == 0
   error([mfilename, ': Problem: [ai, bi] = CStrAinBP({1, 2}, {2})']);
end
ai = CStrAinBP({'1', '2'}, {'2'}, 'i');
if isequal(ai, 2) == 0
   error([mfilename, ': Problem: ai = CStrAinBP({1, 2}, {2}, i)']);
end
[ai, bi] = CStrAinBP({'1', '2'}, {'2'}, 'i');
if isequal(ai, 2) == 0 || isequal(bi, 1) == 0
   error([mfilename, ': Problem: [ai, bi] = CStrAinBP({1, 2}, {2}, i)']);
end

ai = CStrAinBP({'1', '2'; '3', '4'}, {'1', '4', '3', '2'});
if isequal(ai, 1:4) == 0
   error([mfilename, ': Problem: ai = CStrAinBP({1, 2; 3, 4}, {1, 4, 3, 2})']);
end
[ai, bi] = CStrAinBP({'1', '2'; '3', '4'}, {'1', '4', '3', '2'});
if isequal(ai, 1:4) == 0 || isequal(bi, [1, 3, 4, 2]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({1, 2; 3, 4}, {1, 4, 3, 2})']);
end
ai = CStrAinBP({'1', '2'; '3', '4'}, {'1', '4', '3', '2'}, 'i');
if isequal(ai, 1:4) == 0
   error([mfilename, ...
         ': Problem: ai = CStrAinBP({1, 2; 3, 4}, {1, 4, 3, 2}, i)']);
end
[ai, bi] = CStrAinBP({'1', '2'; '3', '4'}, {'1', '4', '3', '2'}, 'i');
if isequal(ai, 1:4) == 0 || isequal(bi, [1, 3, 4, 2]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({1, 2; 3, 4}, {1, 4, 3, 2}, i)']);
end

ai = CStrAinBP({'1'; '2'; '3'; '4'}, {'4', '1', '3', '2'});
if isequal(ai, 1:4) == 0
   error([mfilename, ': Problem: ', ...
         'ai = CStrAinBP({1; 2; 3; 4}, {4, 1, 3, 2})']);
end
[ai, bi] = CStrAinBP({'1'; '2'; '3'; '4'}, {'4', '1', '3', '2'});
if isequal(ai, 1:4) == 0 || isequal(bi, [2, 4, 3, 1]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({1; 2; 3; 4}, {4, 1, 3, 2})']);
end
ai = CStrAinBP({'1'; '2'; '3'; '4'}, {'4', '1', '3', '2'}, 'i');
if isequal(ai, 1:4) == 0
   error([mfilename, ': Problem: ', ...
         'ai = CStrAinBP({1; 2; 3; 4}, {4, 1, 3, 2}, i)']);
end
[ai, bi] = CStrAinBP({'1'; '2'; '3'; '4'}, {'4', '1', '3', '2'}, 'i');
if isequal(ai, 1:4) == 0 || isequal(bi, [2, 4, 3, 1]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({1; 2; 3; 4}, {4, 1, 3, 2}, i)']);
end

% Special tests for case-insensitive comparison:
[ai, bi] = CStrAinBP({'a', 'b', 'c', 'd'}, {'C', 'A', 'B', 'D'}, 'i');
if isequal(ai, 1:4) == 0 || isequal(bi, [2, 3, 1, 4]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({a, b, c, d}, {C, A, B, D}, i)']);
end

[ai, bi] = CStrAinBP({'a', 'bB', 'Cc', 'd'}, ...
   {'aC', 'bb', 'B', 'D', 'cC', 'd'}, 'i');
if isequal(ai, 2:4) == 0 || isequal(bi, [2, 5, 4]) == 0
   error([mfilename, ': Problem: ', ...
         '[ai, bi] = CStrAinBP({a, bB, Cc, d}, {aC, bb, B, D, cC, d}, i)']);
end
disp('  ok: specific tests passed');

% Call function with test data: ------------------------------------------------
% Two arbitrary cell strings, partially overlapping strings:
%   Adjust to your typical demands freely!
%   Let them be not unique for real testing!
Ac = {'asd', 'a1', 'a1', 'a2', 'a3', 'a5', 'a6', 'a7', 'a8', ...
      'd1', 'a8', 'a9', 'b1', 'b2', 'b3', ...
      '%NAVMedDumTib', '%LCLLatDumTib', '%CCLLatDumTib', ...
      '%P5TLatDumTib', '%NAVLatDumTib'};

Bc = {'asd', 'sdf', 'dfg', 'ert', 'tzu', 'fgh', 'hjk', 'nhz', ...
      'cde', 'xsw', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', ...
      'x8', 'x9', 'a9', 'a1', 'a5', 'a6', 'b1', 'b3', 'b2', ...
      'c1', 'a7', 'a2', 'a3', 'a4', ...
      'LEP', 'MEP', 'TTU', 'SH1', 'SH2', 'LML', 'MML', ...
      'LEP', 'MEP', 'TTU', 'SH1', 'SH2', 'LML', 'MML', ...
      'LCL', 'MCL', 'CCL', 'P1T', 'D1T', 'P5T', 'D5T', ...
      'HLX', 'NAV', 'TibFlexA', 'TibAbdA', 'TibRotA', 'MalDisA', ...
      'oldAnkA', '%LCLMedDumTib', '%CCLMedDumTib', ...
      '%P5TMedDumTib', '%NAVMedDumTib', '%LCLLatDumTib', ...
      '%CCLLatDumTib', '%P5TLatDumTib', '%NAVLatDumTib', ...
      'TibTalA', 'TalGlobA', 'CalcA', 'ArchA', 'ArchTiltA', ...
      'MalDisRot', 'CalDisRot', 'HlxA', 'CCC', '%NavCal', ...
      '%P5TCal', '%MCLMedDumTib', '%MCLLatDumTib', 'WalkDir', ...
      'TibGlobA'};

Cc = {'asd'};
Dc = {'QQQ'};

% Test {1 x N} cells:
disp('== Apply test data:');
disp(['  A: {1 x ', sprintf('%d', length(Ac)), ' strings}   ', ...
      'B: {1 x ', sprintf('%d', length(Bc)), ' strings}']);
SequenceTest(Ac, Bc);
fprintf(1, '\n');

% Test in different order:
disp(['  A: {', sprintf('%d', length(Bc)), ' strings}   ', ...
      'B: {', sprintf('%d', length(Ac)), ' strings}']);
SequenceTest(Bc, Ac);
fprintf(1, '\n');

% Test {N x 1} cells:
disp(['  A: {', sprintf('%d', length(Ac)), ' x 1 strings}   ', ...
      'B: {', sprintf('%d', length(Bc)), ' x 1 strings}']);
SequenceTest(transpose(Ac), transpose(Bc));
fprintf(1, '\n');

% Test mixed orientation:
disp(['  A: {1 x ', sprintf('%d', length(Ac)), ' strings}   ', ...
      'B: {', sprintf('%d', length(Bc)), ' x 1 strings}']);
SequenceTest(Ac, transpose(Bc));
fprintf(1, '\n');

disp(['  A: {', sprintf('%d', length(Ac)), ' x 1 strings}   ', ...
      'B: {1 x ', sprintf('%d', length(Bc)), ' strings}']);
SequenceTest(transpose(Ac), Bc);
fprintf(1, '\n');

% Test with empty 2nd cell:
disp(['  A: {', sprintf('%d', length(Ac)), ' strings}   B: {} empty']);
SequenceTest(Ac, {});
fprintf(1, '\n');

% Test with empty 1st cell:
disp(['  A: {} empty   B: {', sprintf('%d', length(Bc)), ' strings}']);
SequenceTest({}, Bc);
fprintf(1, '\n');

% Test with both cells empty:
disp('  A: {} empty   B: {} empty');
SequenceTest({}, {});
fprintf(1, '\n');

% Test with 1 cell with 1 element:
disp('  A: {1 string}  B: {} empty');
SequenceTest(Cc, {});
fprintf(1, '\n');

% Test with cell and 1 cell with 1 element:
disp(['  A: {1 string}   B: {', sprintf('%d', length(Bc)), ' strings}']);
SequenceTest(Cc, Bc);
fprintf(1, '\n');

disp(['  A: {', sprintf('%d', length(Bc)), ' strings}   B: {1 string}']);
SequenceTest(Bc, Cc);
fprintf(1, '\n');

% Test boths cells with same element:
disp('  A: {1 string}   B: {1 string} (equal)');
SequenceTest(Cc, Cc);
fprintf(1, '\n');

% Test boths cells with same element:
disp('  A: {1 string}   B: {1 string} (different)');
SequenceTest(Cc, Dc);
fprintf(1, '\n');

% Not overlapping data:
disp('  A: {1, 2}  B: {3, 4, 5}');
SequenceTest({'1', '2'}, {'3', '4', '5'});
fprintf(1, '\n');

disp('  A: {1; 2}  B: {3, 4, 5}');
SequenceTest({'1'; '2'}, {'3', '4', '5'});
fprintf(1, '\n');

disp('  A: {1, 2}  B: {3; 4; 5}');
SequenceTest({'1', '2'}, {'3'; '4'; '5'});
fprintf(1, '\n');

disp('  ok: Test data sets passed');

% Test bad inputs: -------------------------------------------------------------
disp([char(10), '== Provoke errors:']);
errBak   = lasterr;

for CaseType = 1:2
   % Test with and without sensitivity for case (although I really do not
   % expect, that this has any effects!):
   if CaseType == 1
      CaseSen = 's';
      disp('  Case-sensitive:');
   else
      CaseSen = 'i';
      disp('  Not case-sensitive:');
   end
   
   try
      V = CStrAinBP({'A'}, {'B'}, CaseSen, 0);  %#ok<*NASGU>
      error(['*** ', mfilename, ': 4 inputs accepted?!']);
   catch
      disp('  Ok: 4 inputs rejected.');
   end
   
   try
      V = CStrAinBP('string', {'cell'}, CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': String accepted as input?!']);
   catch
      disp('  Ok: (string, {cell}) rejected.');
   end
   
   try
      V = CStrAinBP([], {'cell'}, CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': Empty matrix accepted as input?!']);
   catch
      disp('  Ok: ([], {cell}) rejected.');
   end
   
   try
      V = CStrAinBP(rand(2, 3), {'cell'}, CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': Matrix accepted as input?!']);
   catch
      disp('  Ok: (rand(2, 3), {cell}) rejected.');
   end
   
   try
      V = CStrAinBP({'cell'}, 'string', CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': String accepted as input?!']);
   catch
      disp('  Ok: ({cell}, string) rejected.');
   end
   
   try
      V = CStrAinBP({'cell'}, [], CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': Empty matrix accepted as input?!']);
   catch
      disp('  Ok: ({cell}, []) rejected.');
   end
   
   try
      V = CStrAinBP({'cell'}, rand(2, 3), CaseSen);  %#ok<*NASGU>
      error(['*** ', mfilename, ': Matrix accepted as input?!']);
   catch
      disp('  Ok: ({cell}, rand(2, 3)) rejected.');
   end
   
   % The mex version checks if elements are strings:
   if strcmpi(fcnType, mexext)
      % Not initialized cell elements must be refused:
      Cell0     = cell(1, 1);
      Cell0x    = cell(1, 3);
      Cell0x{1} = 'asd';
      Cell0x{3} = 'bsd';
      InputList = {Cell0, Cell0x};
      
      for iType = 1:2
         aCell = InputList{iType};
         try
            [Ex, Seq] = CStrAinBP({'cell'}, aCell, CaseSen);
            error(['*** ', mfilename, ': NULL accepted?!']);
         catch  % do nothing
         end
         
         try
            [Ex, Seq] = CStrAinBP(aCell, {'cell'}, CaseSen);
            error(['*** ', mfilename, ': NULL accepted?!']);
         catch  % do nothing
         end
         
         try
            [Ex, Seq] = CStrAinBP(aCell, aCell, CaseSen);
            error(['*** ', mfilename, ': NULL accepted?!']);
         catch  % do nothing
         end
      end
   end
end  % for CaseType

disp('  Ok: Bad input rejected.');
lasterr(errBak);

% Speed tests: -----------------------------------------------------------------
% Even if doSpeed is disabled, these tests are performed to ensure a complete
% test suite. But the runtimes are minimized in this case, which will result in
% stupid round off errors for the time comparisons.
disp([char(10), '== Test speed:']);
A = strread(sprintf('%.4d ', fix(rand(1, 100) * 10000)), ...
   '%s', 'delimiter', ' ');
B = strread(sprintf('%.4d ', fix(rand(1, 10)  * 10000)), ...
   '%s', 'delimiter', ' ');
B = cat(1, B, A(1:10:100));
B = reshape(B, 1, []);

% This is well performed by Matlab's methods due to sorting:
disp('  100 and 10 strings with random 4 digits:');
SpeedTest(TestTime, A, B);
fprintf(1, '\n');

% A set of strings with different length (advantage for the CStr routines,
% because CHAR(A) of ISMEMBER wastes memory):
A = strread(path, '%s', 'delimiter', pathsep);
B = A(end:-3:1);
disp(['  Current Matlab path and every 3rd path', ...
      sprintf(' (%d and %d strings):', numel(A), numel(B))]);
SpeedTest(TestTime, A, B);

% No crash until here - fine.
disp([char(10), 'CStrAinBP tested successfully.']);

return;


% ******************************************************************************
function SequenceTest(A, B)

disp('  :: CStrAinBP(A,B) (Index of A in B):');
[ai2, bi2] = CStrAinBP(A, B);

if isequal(A(ai2), B(bi2))
   disp('     A(ai) == B(bi) ok.');
elseif isequal(A(ai2), transpose(B(bi2))) && xor(isNx1(A), isNx1(B))
   disp('     A(ai) == B(bi)'' ok.');
else
   error([mfilename, ': Bad CStrAinBP: A(ai) ~= B(bi)']);
end
if isequal(ai2, sort(ai2)) == 0
   error([mfilename, ':  CStrAinBP: Not sorted AI']);
end

disp('  :: CStrAinBP(A,B,i) (Index of A in B, not case-sensitive):');
% [ai2, bi2] = CStrAinBP(A, B);  done already!
[aii, bii] = CStrAinBP(lower(A), upper(B), 'i');
if isequal(aii, ai2) && isequal(bii, bi2)
   disp('    ai(lower(A)) == ai, bi(upper(B)) == bi');
else
   error([mfilename, ': CStrAinBP(lower,UPPER,i) differs from CStrAinBP!']);
end

[aii, bii] = CStrAinBP(upper(A), lower(B), 'i');
if isequal(aii, ai2) && isequal(bii, bi2)
   disp('    ai(upper(A)) == ai, bi(lower(B)) == bi');
else
   error([mfilename, ': CStrAinBP(UPPER,lower,i) differs from CStrAinBP!']);
end

return;

% ******************************************************************************
function SpeedTest(TestTime, A, B)

% Better precision for accelerated version, although this increases the overhead
% of the loop:
loopMult = 4;

iLoop     = 0;
startTime = cputime;
while cputime - startTime < 0.25
   [AB, ai, bi] = intersect(A, B);
   iLoop        = iLoop + 1;
end
nLoops = TestTime * 100 * ceil(iLoop / ((cputime - startTime) * 100));
disp([sprintf('  %d', nLoops), ' loops on this machine.']);

tic;
for i = 1:nLoops
   [Aex, bi] = ismember(A, B);
end
ismemberT = eps + toc;  % No DivBy0 even under extrem conditions

tic;
for i = 1:nLoops
   [AB, ai, bi] = intersect(A, B);
end
intersectT = eps + toc;  % No DivBy0 even under extrem conditions

tic;
for i = 1:nLoops * loopMult
   [ai, bi] = CStrAinBP(A, B);
end
AinBPT = toc / loopMult;

tic;
for i = 1:nLoops * loopMult
   clear('AB');
   [ai, bi] = CStrAinBP(A, B);
   AB       = A(ai);
end
AinBPwithABT = toc / loopMult;

tic;
for i = 1:nLoops * loopMult
   [ai, bi] = CStrAinBP(A, B, 'i');
end
AinBPiT = toc / loopMult;

tic;
for i = 1:nLoops
   [AB, ai, bi] = intersect(lower(A), lower(B));
end
intersectiT = eps + toc;  % No DivBy0 even under extrem conditions

disp(['    ISMEMBER:  ', sprintf('%.2f sec', ismemberT),  char(10), ...
      '    INTERSECT: ', sprintf('%.2f sec', intersectT), char(10), ...
      '    CStrAinBP: ', sprintf('%.2f sec', AinBPT), ...
      ' not getting strings', ...
      char(10), '                ==> ', ...
      sprintf('%.1f%% of INTERSECT with getting strings\n', ...
      100 * AinBPT / intersectT), ...
      '                ==> ', ...
      sprintf('%.1f%% of ISMEMBER\n', 100 * AinBPT / ismemberT), ...
      ...
      '    CStrAinBP: ', sprintf('%.2f sec', AinBPT), ...
      ' with getting strings', ...
      char(10), '                ==> ', ...
      sprintf('%.1f%% of INTERSECT with getting strings\n', ...
      100 * AinBPwithABT / intersectT), ...
      ...
      '    CStrAinBP: ', sprintf('%.2f sec', AinBPiT), ...
      ' ignoring the case', ...
      char(10), '                ==> ', ...
      sprintf('%.1f%%', 100 * AinBPiT / intersectiT), ...
      ' of INTERSECT(lower, lower)', char(10)]);

return;

% ******************************************************************************
function R = isNx1(A)
% Has A a column shape?
[s1, s2] = size(A); %#ok<ASGLU>
R = (s2 == 1);
return;

Contact us