Code covered by the BSD License

# Fast Binary Search

by

### Avi (view profile)

21 Feb 2011 (Updated )

Binary Search algorithm to search for values in a sorted array. Written in C (.mex) for speed.

testBinarySearch
```function testBinarySearch
%     idx = binarySearch(data, items, dirIfFound, dirIfNotFound)

% This function tests the binarySearch algorithm to verify that it
% gives the correct output using various different parameters. In the
% cases where the indexes are not known beforehand, the
% output is checked by comparing it with Matlab's built-in linear
% search ("find") method.

fprintf('Running checks ... ');
N = 1000;

% 0. Basic test - exactly 1 occurence of each item in the data array.
data = sort(randn(1,N));
idx_orig = randi(N, 1, 10);
items = data(idx_orig);
idx_bsearch = binarySearch(data, items);
assert(isequal(idx_orig, idx_bsearch)); % make sure the indices match the original indices

% 1. Test behavior if multiplies copies are found (using dirIfFound parameter)
n = 10;
data = sort(randi(n, 1, N)); % multiple copies of the numbers 1..10
items = 1:n;

firstPos_linSearch = arrayfun(@(x) find(data == x, 1, 'first'), items);
firstPos_bSearch   = binarySearch(data, items, 'first');
assert(isequal(firstPos_linSearch, firstPos_bSearch));

lastPos_linSearch  = arrayfun(@(x) find(data == x, 1, 'last'),  items);
lastPos_bSearch    = binarySearch(data, items, 'last');
assert(isequal(lastPos_linSearch, lastPos_bSearch));

% 2. test behavior if items are not found (using dirIfNotFound)

% 2a. test  dirIfNotFound = 0  (== return 0 if item not found)
s = [-3:.5:13];  % [ranges from 1 to 9.999]
pos = binarySearch(data, s, [], 'exact');
assert( all( s(pos == 0) == setdiff(s, data) ) );

% 2b. test  dirIfNotFound = down/up
data = [1:10];
items = 1 + rand(N,1) * 9;  % [ranges from 1 to 9.999]
pos = binarySearch(data, items, [], 'down');
actualPos = floor(items);
assert(isequal(pos, actualPos));

actualPos = ceil(items);
pos = binarySearch(data, items, [], 'up');
assert(isequal(pos, actualPos));

% 2c. test  dirIfNotFound = 0.5
pos = binarySearch(data, items, [], 'frac');
assert(isequal(pos, items));

% 3. test the sort checking option.
data_notSorted = 10:-1:1;
items = [5,6];
% 4a. verify that we don't get an error if the first argument is not sorted
try
binarySearch(data_notSorted, items);
catch MErr
error('Should not have generated an error');
end

% 4b. verify that we *do* get an error if we tell the algorithm
% to check the sorting.
checkFlag = 1;
try
binarySearch(data_notSorted, items, [], [], checkFlag);
error('Should have generated an error');
catch MErr
assert(strcmp(MErr.message, 'Input 1 (data) must be sorted.'));
end
fprintf(' All checks passed!\n');

fprintf('Running speed test...\n');

% Speed test:
% compare speed with Matlab's "find" function.
Ns = 10.^[2:6];
%     t_ratios = zeros(1,size(Ns));
for i = 1:length(Ns)
data = sort(randn(1,Ns(i)));
idx_orig = randi(Ns(i), 1, 10);
items = data(idx_orig);
% regular matlab "find"
tic;
idx_find = arrayfun(@(x) find(data == x, 1), items);
t1 = toc;
assert(isequal(idx_orig, idx_find)); % make sure the indices match the original indices
% binary search; (use "dontCheckFlag" to skip checking b/c we know
% the data is sorted.
tic;
idx_bsearch = binarySearch(data, items);
t2 = toc;
assert(isequal(idx_orig, idx_find)); % make sure the indices match the original indices
assert(isequal(idx_orig, idx_bsearch)); % make sure the indices match the original indices
t_ratio = t1/t2;
fprintf('For array size N =% 9d, binarySearch is ~%.1f times faster than find\n', Ns(i), t_ratio);
end

% check single / double handling
data_d = sort(randn(1, 1000));
items_d = rand(1, 100);
data_f = single(data_d);
items_f = single(items_d);

idx_dd = binarySearch(data_d, items_d);
idx_ff = binarySearch(data_f, items_f);
idx_df = binarySearch(data_d, items_f);
idx_fd = binarySearch(data_f, items_d);

assert(isequal(idx_dd, idx_ff));
assert(isequal(idx_dd, idx_fd));
assert(isequal(idx_dd, idx_df));
3;

end```