Code covered by the BSD License  

Highlights from
Bidirectional Branch and Bound Minimum Singular Value Solver (V2)

image thumbnail
from Bidirectional Branch and Bound Minimum Singular Value Solver (V2) by Yi Cao
A branch and bound solver to find the largest minimum singular values among all submatrices.

[B,sset,ops]=b3msv(Q,nc)
function [B,sset,ops]=b3msv(Q,nc)
%B3MSV Bidirectional Branch and Bound with Minimum Singular Value Criterion for Subset Selection
%
%       Consider the following subset selection problem:
%       Given a tall (m x n, m>n) matrix, A, to find n rows of A 
%       such that the resulted n x n square submatrix has the 
%       largest MSV among all possible n x n submatrices.
%       This problem has many applications, where one wishes to 
%       square down a non-square matrix to get a well-posted problem
%       (non-singular, hence maximizing the MSV).
%       This problem has been studied in Linear Algebra for many decades.
%       Many intuitive solutions have been proposed either analytically or 
%       numerically. But none of them, except two approaches, the 
%       exhaustive search and the branch and bound (BB) can guarantee the global
%       optimality.
%       Exhaustive search can only be used for very small m and n.
%       Traditional BB is unidirectional, either upwards, where the subset
%       is gragually expending until reaching the desired size, or
%       downwards, where subset is shrinking one by one until the target
%       size. The performance of both are very limited.
%       In B3, the search is carried out in both directions hence is much
%       more efficient than unidirectional BB. Moreover, a novel
%       determinant based pruning algorithm is implemented to replace
%       time-consuming singular value decomposition so that the overall
%       efficincy is about several orders of magnitude faster than
%       unidirectional approaches.
%Usage:
%       [B,subset,ops]=b3msv(A,r)
%Input: 
%       A, m x n matrix, m>n (row selection) or n>m (column selection)
%       r, number of best subsets required, default is 1
%Output:
%       B, a r-length vector contains the best MSVs
%       subset, a r x n index matrix for the r best n-element subsets
%       ops, various counters for total number of different evaluations
%       for algorithm developers.
%
% See also b3wc, pb3wc

% Reference
% Y. Cao and V. Kariwala, Bidirectional Branch and Bound for Controlled
% Variable Selection Part I: Principles and Minimum Singular Value
% Criterion, Computers and Chemical Engineering, 32(2008), 2306-2319
%
% By Yi Cao at Cranfield University, 07 November 2007, 
%     revised on 09/01/2009 for large size problems

%Examples:
%Ex1 check correctness
%{
      A = randn(10,5);            % Consider a 5 out of 10 problem:
      V = nchoosek(1:10,5);       % All 252 combinations 
      B = zeros(252,1);       
      for k=1:252                 % exhaustive search
          B(k) = min(svd(A(V(k,:),:)));
      end
      [B1,k]=sort(B,'descend');   % the solution
      [B2,subset]=b3msv(A,10);
      norm(B1(1:10)-B2)           % should be zero or very small
      isequal(subset,V(k(1:10),:))      % should be 1
%}
%Ex2 check efficiency
%{
      A = randn(40,20);           % 20 out of 40 problem
      tic, b3msv(A); toc          % 7.5 s with 2.0GHz Core Duo T2500 processor
      tic, 
      for k=1:100,                % test time required for MSV 
          B=min(svd(A(1:20,:)));
      end
      t1=toc                      % 0.2 seconds
      year=nchoosek(40,20)*t1/100/3600/24/365 % over 2 years!
%}
%Ex3 check special cases
%{
      A = randn(400,5);           % 5 out of 400
      tic, b3msv(A); toc          % 0.9 seconds
      A = randn(400,395);         % 395 out of 400
      tic, b3msv(A); toc          % 11.4 seconds
%}
%Ex4 a very large case
%{
     n = 100000;
     m = 20;
     A = 1./randn(n,m);
     tic, 
     [B,sset,op] = b3msv(A);     % 15 + 142 nodes evaluated
     toc                         % about 3 seconds                       % 
%}

%Error checking
    if nargin > 2
        error('Too many input arguments');
    end
% if call without input to display help
    if ~nargin                      
        eval(['help ',eval('mfilename')])
        return
    end
    if nargin<2,                    % number of best subsets
        nc=1;
    end
    [r,n]=size(Q);
    if r==n                         % trivial case
        B=min(svd(Q));
        sset=1:n;
        ops=1;
        return
    end
    if r<n                          % column selection
        Q=Q';
        [r,n]=size(Q);
    end
    bi=2;
    Q2=Q.*Q;                        % preparation
    q2=sum(Q2,2);
    h2=q2';
    g2=h2;
    B=zeros(nc,1);
    sset=zeros(nc,n);
    ib=1;
    bound=0;
    ops=[0 0 0 0 0];                    % ops count for research
    fx=false(1,r);                      % fixed set
    rem=true(1,r);                      % candidate set
    nf=0;                               % size of fixed set
    n2=n;                               % n2 = n - nf
    m2=r;                               % size of candidate set
    upf=false;                          % upwards pruning flag
    downf=false;                        % downward pruning flag
    f=false;                            % fail flag
    bf=false;                           % bound upodate flag
    bbmsvsub(fx,rem);                   % main recursive solver
    [B,idx]=sort(sqrt(B),'descend');    % post processing
    sset=sort(sset(idx,:),2);
    
    function bbmsvsub(fx0,rem0)
        ops(4)=ops(4)+1;
        fx=fx0;
        rem=rem0;
        nf=sum(fx);
        m2=sum(rem);
        n2=n-nf;
%         bf=true;
        while ~f && m2>=n2 && n2>=0     % Pruning loops
            while ~f && m2 >= n2 && n2>=0 && (~downf || ~upf || ~bf)
                if ~upf || ~bf
                    upprune;            % upwards pruning
                end
                if ~f && m2 >= n2 &&  (~downf || ~bf)
                    downprune;          % downward pruning
                end
                bf=true;
            end %pruning loop
            if f || m2<=n2 || n2<=0    
                break                   % invalid or trivial problems
            end
            if n2==1                    % select one problems
% not very efficient for some problems                
%                 if bound
%                     h2(~rem)=Inf;
%                     [b,idk]=min(h2);
%                 else
                    g2(~rem)=0;
                    [b,idk]=max(g2);
%                 end
                s=fx;
                s(idk)=true;
                update(s);
                downf=false;
                rem(idk)=false;
                m2=m2-1;
                continue
            end
            if m2-n2==1,                % discard one problems
                h2(~rem)=0;
                [b,idk]=max(h2);
                rem(idk)=false;
                s=fx|rem;
                update(s);
                fx(idk)=true;
                upf=false;
                nf=nf+1;
                n2=n2-1;
                m2=m2-1;
                continue
            end
            fx1=fx;
            rem1=rem;
            b0=bound;
            h0=h2;
            g0=g2;
            if (n2<0.5*m2 && bi) || bi==1   %upward branching
                if bound
                    h2(~rem)=Inf;
                    [b,id]=min(h2);
                else
                    g2(~rem)=0;
                    [b,id]=max(g2);
                end
                rem1(id)=false;
                fx(id)=true;
                upf=false;
                bbmsvsub(fx,rem1);
                upf=true;
                downf=false;
                rem=rem1;
                fx=fx1;
            else                            %downward branching
                if bound && n<100
                    g2(~rem)=Inf;
                    [b,id]=min(g2);
                else
                    h2(~rem)=0;
                    [b,id]=max(h2);
                end
                rem1(id)=false;
                fx1(id)=true;
                downf=false;
                bbmsvsub(fx,rem1);
                if g0(id)<=bound
                    return
                end
                downf=true;
                upf=false;
                fx=fx1;
                rem=rem1;
            end
            if b0<bound
                bf=false;
                L=min(g0,q2')<=bound;
                if any(L)
                    rem=rem&~L;
                    downf=false;
                end
            end
            f=false;
            h2=h0;
            g2=g0;
            nf=sum(fx);
            n2=n-nf;
            m2=sum(rem);
        end
        if ~f                 % update bound for trivial problems
            if m2==n2
                update(fx|rem);
            elseif ~n2
                update(fx);
            end
        end
    end
    
    function upprune
        f=false;
        if ~nf
            g2(rem)=q2(rem);
        elseif nf==1
            if q2(fx)<bound
                f=true;
                return;
            end
            x=Q(rem,:)*Q(fx,:)';
            g2(rem)=(x.*x)/(bound-q2(fx))+q2(rem);
        else
            Z=Q(fx,:)*Q(fx,:)';
            [R,f]=chol(Z-bound*eye(nf));
            if f,
                return;
            end
            if m2>=n
                X=(R'\Q(fx,:))*Q(rem,:)';
            else
                X=R'\(Q(fx,:)*Q(rem,:)');
            end
            g2(rem)=q2(rem)-sum(X.*X,1)';
        end
        ops(2)=ops(2)+1;
        ops(3)=ops(3)+m2;
        g2(~rem)=Inf;
        upf=true;
        L=g2<=bound;
        if any(L)
            downf=false;
            rem=rem&~L;
            m2=sum(rem);
        end
    end

    function downprune
        s=fx|rem;
        h2(:)=Inf;
        [R,f]=chol(Q(s,:)'*Q(s,:)-bound*eye(n));
        if f
            return
        end
        G=R'\Q(rem,:)';
        h2(rem)=1-sum(G.*G,1);        
        ops(2)=ops(2)+1;
        ops(3)=ops(3)+m2;
        downf=true;
        L=h2<0;
        if any(L)
            upf=false;
            fx=fx | L;
            rem=rem & ~L;
            nf=sum(fx);
            m2=sum(rem);
            n2=n-nf;
        end
    end
       
    function bf0=update(s)              % update bound
        lambda=eig(Q(s,:)*Q(s,:)');
        ops(1)=ops(1)+1;
        bf0=sum(lambda<bound);
        if ~bf0
            B(ib)=min(lambda);          %avoid sorting
            sset(ib,:)=find(s);
            [bound,ib]=min(B);
            bf=false;
        end
    end
end

Contact us at files@mathworks.com