Code covered by the BSD License  

Highlights from
Unique Rows for a cell array

from Unique Rows for a cell array by Jim Hokanson
Find unique rows of a cell array containing columns with strings or scalars, or N-D matrices

uniqueRowsCA(iCA,TREAT_NAN_EQUAL,FIRST_LAST)
function [uCA,ndx,pos] = uniqueRowsCA(iCA,TREAT_NAN_EQUAL,FIRST_LAST)
%uniqueRowsCA  unique Rows for a Cell Array
%   U_CA = uniqueRowsCA(I_CA) returns Unique rows of the Input Cell Array, I_CA
%
%   [U_CA,I,J] = uniqueRowsCA(...) also returns index vectors I and J such
%   that U_CA = iCA(I,:) and iCA = B(J,:).
%
%   uniqueRowsCA(I_CA,TREAT_NAN_EQUAL), treats NaN comparisions as equal if
%   TREAT_NAN_EQUAL is true, the default is to treat NaN as equal
%
%   uniqueRowsCA(I_CA,[],'first'), returns the first instance
%   of the match of the index where I_CA matches U_CA, the default
%   is 'last'
%   
%   NOTE: This function only handles strings and scalars, and N-D matrices
%   that are the same size for every row within the same column
%      
%   See also: isequal, isequalwithequalnans

    %INPUT HANDLING
    %===========================================================
    if nargin == 1, TREAT_NAN_EQUAL = true; FIRST_LAST = 'last'; 
    elseif nargin == 2,
        if isempty(TREAT_NAN_EQUAL)
            TREAT_NAN_EQUAL = true;
        end
    elseif nargin ~= 3
        error('Incorrect number of input arguments to %s',mfilename)
    end

    %SOME BASIC INITIALIZATION
    %=====================================================
    [M,N] = size(iCA);
    COL_ORDER = 1:N; %Could make this an input argument, 
    %affects order of unique output => see sortrows

    %HANDLING THE SIMPLE CASE
    %======================================================
    if M == 1, uCA = iCA; ndx = 1; pos = 1; return, end
  
    
    %ERROR CHECKING - CHECK CONSISTENCY FOR EACH COLUMN
    %===========================================================
    for iCol = 1:N
        %NOT YET DONE
    end

    
    %THE SORT_CELL_BACK_TO_FRONT FUNCTION & SORTROWS
    %=================================================================
    %Note, cell2mat is very slow, rewrote sortrows
    
    ndx = (1:M)';
    for k = N:-1:1
        colUse = abs(COL_ORDER(k));
        %NOTE: right here we make an assumption about the types being consistent in a given column
        if isnumeric(iCA{1, colUse})
            sz = cellfun('prodofsize',iCA(:,colUse)); %sz = cellfun(@numel,iCA(:,k));
            if isempty(find(sz > 1,1))
                tmp = fastC2M(iCA(ndx,colUse));
                ind = sortrowsc(tmp, sign(COL_ORDER(k))*1); 
            else %We are dealing with matrices
                
                %SOME ERROR CHECKING ON THE MATRICES
                %=========================================
                szS = cellfun(@size,iCA(:,colUse),'UniformOutput',false);
  
                %1) DIMENSION SAME CHECK
                %----------------------------------------------
                nDims = cellfun('ndims',szS);
                if length(unique(nDims)) ~= 1
                    error('%s not setup to handle varying size matrices yet',mfilename)
                end
                
                %2) EACH DIMENSION THE SAME CHECK
                %----------------------------------------------
                szMat = fastC2M(szS); %NOTE: we had to first check that each input is 
                %the same size, i.e. that the dimensions were the same otherwise fastC2M barfs
                %We are checking dimensions here instead of number of elements as perhaps someone
                %cares if the dimensions are different, even though the linearized contents might
                %not be ex. a = [1 3;2 4] vs b = [1 2 3 4], size(a) = [2 2], size(b) = [1 4], but
                %all(a(:) == b(:)) is true, note that the matrix code linearizes the inputs for
                %sorting purposes
                curCol = 1;
                notMatchSz = [];
                while curCol <= nDims(1) && isempty(notMatchSz)
                    notMatchSz = find(diff(szMat(:,curCol)) ~= 0,1);
                    curCol = curCol + 1;
                end
                
                if ~isempty(notMatchSz)
                    error('%s not setup to handle varying size matrices yet',mfilename)
                end
                %=============================================
                
                tmp = fastC2M(iCA(ndx,colUse));
                ind = sortrowsc(tmp, sign(COL_ORDER(k))*(1:size(tmp,2))); 
            end
            
        else
            tmp = char(iCA(ndx,k)); %Nice and quick
            ind = sortrowsc(tmp, sign(COL_ORDER(k))*(1:size(tmp,2)));
        end
        ndx = ndx(ind);
    end
    
    %NOW RESORTING AND OBTAINING UNIQUE
    %=========================================
    %NOTE: ndx is the 2nd output of sortrows
    iCA = iCA(ndx,:);
    
    sameVector = false(M,N);
    
    if TREAT_NAN_EQUAL
        for iCol = 1:N
            sameVector(2:end,iCol) = arrayfun(@isequalwithequalnans,iCA(2:end,iCol),iCA(1:end-1,iCol));
        end 
    else
        for iCol = 1:N
            sameVector(2:end,iCol) = arrayfun(@isequal,iCA(2:end,iCol),iCA(1:end-1,iCol));
        end
    end
    
    d = ~all(sameVector,2);
    uCA = iCA(d,:);
    
    %ADDITIONAL OUTPUT HANDLING
    %===========================================
    if nargout == 3
        pos = cumsum(d);
        pos(ndx) = pos;             % Re-reference POS to indexing of SORT.
    end
    
    % Create indices if needed.
    if nargout > 1
        if FIRST_LAST(1) == 'l'
            ndx = ndx([d(2:end); true]);  % Final element is always a member of unique list.
        else
            ndx = ndx(d); % First element is always a member of unique list.
        end
    else
        ndx = [];
        pos = [];
    end   
end

function m = fastC2M(CA)
%ASSUMPTIONS -> all elements of CA have the same size
%TO REPLACE SLOW CELL2MAT FOR MATRICES

    szCA = size(CA);
    sz = numel(CA{1});
   
    m = zeros(szCA(1),sz(1));
    for iRow = 1:szCA(1)
        m(iRow,:) = CA{iRow}(:);
    end
end

Contact us at files@mathworks.com