function [c,aind]=multisetdiff(a,b,rowflag)
% multisetdiff: finds difference of two multisets
% usage: c=multisetdiff(a,b)
% c=multisetdiff(a,b,'rows')
% c=multisetdiff(a,b,rowflag)
% [c,ind]=multisetdiff(...)
%
% MULTISETDIFF works like the built-in SETDIFF except that any repeated
% elements of A are only removed once for each time they occur in B.
%
% For reasons of personal preference ROWFLAG can be specified as a boolean
% or the empty matrix (with true equivalent to 'rows' and the default being
% false); also NaN values are treated differently (see below).
%
% Example:
% multisetdiff([1,1,2,2],[1,1,2]) % returns 2
% multisetdiff(unique(a),unique(b)) % same as setdiff(a,b) unless NaN
% multisetdiff(nan,nan) % returns []
%
% A and B can be cell arrays or 1- or 2-dimensional arrays, provided they
% are the same type. A and B can contain infinite or NaN elements (with
% NaN values treated as equal). The arrays can be any objects for which
% SORT, SIGN, MINUS, EQ, LT and GT are defined, e.g. fraction objects (see
% below).
%
% To work with cell arrays of strings or with rowflag = 'rows' or true
% requires LEXCMP (see below; for inf/NaN support use the 14/9/2009 update)
% This version does not work for non-string cell arrays, unless you have an
% implementation of sort for cell arrays. Future versions may allow
% general sort and comparision functions to be passed as inputs.
%
% See also:
% setdiff
% lexcmp (http://www.mathworks.com/matlabcentral/fileexchange/23035)
% Fractions Toolbox (http://www.mathworks.com/matlabcentral/fileexchange/24878)
% Author: Ben Petschel 9/9/2009
% Version history:
% 9/9/2009 - first release
% 14/9/2009 - added support for NaN values
if nargin<2
error('multisetdiff:nargin','multisetdiff requires 2 arguments');
elseif nargin<3 || isempty(rowflag)
rowflag = false;
elseif ischar(rowflag)
rowflag=strcmp(rowflag,'rows');
elseif ~islogical(rowflag) || numel(rowflag)~=1
error('multisetdiff:rowflag','rowflag must be a string or logical variable');
end;
if iscell(a) || iscell(b)
if xor(iscell(a),iscell(b))
error('multisetdiff:cellin','a and b must be same type');
end;
if rowflag
error('multisetdiff:cellrow','row style not implemented for cell arrays');
end;
end;
if rowflag
[as,aind]=sortrows(a);
bs=sortrows(b);
% implementation for vectors and cell arrays of strings
na=size(as,1);
nb=size(bs,1);
keepflag=true(na,1);
i=1;
j=1;
% use string comparison (lexcmp to determine ordering)
while i<=na && j<=nb
switch lexcmp(as(i,:),bs(j,:))
case -1
% as(i,:)<bs(j,:)
i=i+1;
case 1
% as(i,:)>bs(j,:)
j=j+1;
case 0
% as(i,:)==bs(j,:)
keepflag(i)=false;
i=i+1;
j=j+1;
otherwise
error('multisetdiff:badcmp','unexpected value returned by string comparison');
end;
end;
c=as(keepflag,:);
if nargout==2
aind=aind(keepflag);
end;
else
% implementation for vectors and cell arrays of strings
[as,aind]=sort(a(:));
bs=sort(b(:));
keepflag=true(size(as));
na=numel(as);
nb=numel(bs);
i=1;
j=1;
if iscell(a)
% use string comparison (lexcmp to determine ordering)
while i<=na && j<=nb
switch lexcmp(as{i},bs{j})
case -1
% as{i}<bs{j}
i=i+1;
case 1
% as{i}>bs{j}
j=j+1;
case 0
% as{i}==bs{j}
keepflag(i)=false;
i=i+1;
j=j+1;
otherwise
error('multisetdiff:badcmp','unexpected value returned by string comparison');
end;
end;
else
% non-cell array, rowflag=false
while i<=na && j<=nb
if as(i)<bs(j)
i=i+1;
elseif as(i)>bs(j)
j=j+1;
elseif as(i)==bs(j)
keepflag(i)=false;
i=i+1;
j=j+1;
else
% elements are NaN, or couldn't be compared
% use convention NaN > x except when x is nan
anan=isnan(as(i));
bnan=isnan(bs(j));
if anan && bnan
% equal NaNs
keepflag(i)=false;
i=i+1;
j=j+1;
elseif anan && ~bnan
% NaN > x
j=j+1;
elseif ~anan && bnan
% x < NaN
i=i+1;
else
error('multisetdiff:badcmp','unexpected result of comparison');
end;
end;
end;
end;
c=as(keepflag);
if nargout==2
aind=aind(keepflag);
end;
end;