Code covered by the BSD License  

Highlights from
Statistical Learning Toolbox

from Statistical Learning Toolbox by Dahua Lin
Functions for statistical learning, pattern recognition and computer vision, covering many topics.

slcovlarge(X, w, vmean, cachesize)
function C = slcovlarge(X, w, vmean, cachesize)
%SLCOVLARGE Computes large covariance matrix using memory-efficient way
%
% $ Syntax $
%   - C = slcovlarge(X, w, vmean, cachesize)
%
% $ Arguments $
%   - X:            the sample matrix
%   - w:            the weights of samples (default = [])
%                   if it is specifed, it should be a 1 x n vector
%   - vmean:        the pre-computed mean (default = [])
%   - cachesize:    the size of working cache (in the unit of Mbytes)
%
% $ Description $
%   - C = slcovlarge(X, w, vmean, cachesize) computes the large covariance 
%     matrix within a memory-limited context. Compared to slcov, which is
%     faster at the expense of using more memory, it is slower, however
%     the memory used is under control. Thus it can effectively prevent
%     from the situation of out of memory. 
%
% $ Remarks $
%   - The memory increased in the function will not exceed 
%     the size of the covariance matrix plus the cachesize.
%
%   - For vmean
%       - if it is empty, then a mean vector will be calculated
%       - if it is zero, then the data is assumed to be centralized
%       - if it is specified, then its size should be d x 1.
%
%   - The memory estimated to be used for each section is
%     if has no weight
%           2 x sd x sn + 2 x (sd + sn). 
%     else
%           3 x sd x sn + 2 x (sd + sn).
%     end
%     Here sd and sn are the sub-dimension
%     and sub-number of that section.
%
% $ History $
%   - Created by Dahua Lin, on Aug 17, 2006
%

%% parse and verify input arguments

if nargin < 4
    raise_lackinput('slcovlarge', 4);
end

if ndims(X) ~= 2 || ~isnumeric(X)
    error('sltoolbox:invalidarg', ...
        'X should be a 2D numeric matrix');
end

[d, n] = size(X);

if ~isempty(w)
    if ~isequal(size(w), [1, n])
        error('sltoolbox:sizmismatch', ...
            'The w should be an 1 x n vector');
    end
end

% convert cache size
cacheelems = floor(cachesize * 1e6 / 8);

if isempty(vmean)
    if cacheelems < d
        error('sltoolbox:notenoughmem', ...
            'The cache size is not enough to hold even a mean vector');
    end    
    cacheelems = cacheelems - d;
    vmean = slmean(X, w);
else
    if ~isequal(vmean, 0)
        if ~isequal(size(vmean), [d, 1])
            error('sltoolbox:sizmismatch', ...
                'The vmean should be a d x 1 vector');
        end
    end
end


%% decide partition struct

if cacheelems > 2*d*n + 2*(d+n)   % can compute without partitioning
    ps = [];            
else     
    if isempty(w)
        cblks = 2;
    else
        cblks = 3;
    end
            
    sd = floor( (cacheelems - 2 * n) / (cblks * n + 2) );
    divide_row = false;
    if sd <= 0
        sd = 1;
        if cacheelems < cblks + 4
            error('sltoolbox:notenoughmem', ...
                'The cache is not large enough');
        end
        sn = floor((cacheelems - 2) / (cblks + 2));
        divide_row = true;
    end
            
    ps = slpartition(d, 'maxblksize', sd);
    if divide_row
        rps = slpartition(d, 'maxblksize', sn);
    end        
end


%% compute

if isempty(ps)
    
    if ~isempty(w);
        X = weight_x(X, w);
    end
    X = shift_x(X, vmean);
    Xt = X';
    C = X * Xt;
    
else
    
    if ~divide_row      % each row is taken as integral
        
        C = zeros(d, d);        
        nsecs = length(ps.sinds);
        
        for i = 1 : nsecs
            for j = 1 : nsecs
                si = ps.sinds(i);
                ei = ps.einds(i);
                sj = ps.sinds(j);
                ej = ps.einds(j);
                                   
                if isempty(w)
                    curXj = X(sj:ej, :);  
                    curXj = shift_x(curXj, vmean, sj, ej);
                    curXjt = curXj';
                    clear curXj;
                    
                    curXi = X(si:ei, :);    
                    curXi = shift_x(curXi, vmean, si, ei);
                else
                    curXj = X(sj:ej, :);
                    curXj = shift_x(curXj, vmean, sj, ej);               
                    curXj = weight_x(curXj, w);
                    clear curwj;
                    curXjt = curXj';
                    clear curXj;
                    
                    curXi = X(si:ei, :);
                    curXi = shift_x(curXi, vmean, si, ei);
                    curXi = weight_x(curXi, w);
                    clear curwi;
                end
                
                C(si:ei, sj:ej) = curXi * curXjt;

            end
        end
                                
    else                % even each row need to be divided        
        
        slignorevars(rps);
        
        error('sltoolbox:rterror', ...
            'In current implementation, row-division is not implemeted yet');        
    end    
    
end
              

%% scale down

if isempty(w)
    C = C / n;
else
    tw = sum(w);
    C = C / tw;
end


%% Core computing function

function sx = weight_x(sx, w)

sn = size(sx, 2);
for i = 1 : sn
    sx(:, i) = sx(:, i) * sqrt(w(i));
end

function sy = shift_x(sx, vmean, sidx, eidx)

[sd, sn] = size(sx);
if ~isequal(vmean, 0)
    
    if nargin == 2
        curmean = vmean;
    else
        curmean = vmean(sidx:eidx);
    end    
    
    sy = zeros(sd, sn);
    for i = 1 : sn
        sy(:,i) = sx(:,i) - curmean;
    end
else
    sy = sx;
end




        

        

Contact us at files@mathworks.com