image thumbnail

Fast Linear binary SVM classifier

by

 

04 Nov 2011 (Updated )

Fast implementation of Linear binary SVM via BLAS/OpenMP API

sampling.m
function [Itrain , Itest , Ivalid] = sampling(X , y , options);

% Various Data sampling methods for evaluate Classifier Performances.
% 
% X                           : data (d x N)
% y                           : labels(1 x N)
% options.valid               : 1 = 3 sets in output : train, test, valid. 0 = 2 output : train, test
% options.randomize           : 1 = do a randomize permutation before splitting set
% options.fraction            : 0<x<1 : build set only with the fraction of the entiere set (for huge dataset)
% options.maxperclass         : Max samples per class. A scalar or a (1 x m) vector
% options.method              : 1 = Hold out
%                               2 = Bootstrap
%                               3 = K Cross-validation
%                               4 = Leave One Out
%                               5 = Stratified Cross Validation
%                               6 = Balanced Stratified Cross Validation
%                               7 = Stratified Hold out
%                               8 = Stratified Boot Strap
%
%
% load iris
% [Itrain , Itest , Ivalid] = sampling(X , y);
% 
% Author : Sbastien PARIS : sebastien.paris@lsis.org
% -------

if (nargin < 3)

    options.valid       = 0;

    options.randomize   = 1;

    options.fraction    = 1;
    
    options.maxperclass = inf;
    
    options.seed        = -1;

    options.method      = 3;

    options.cv.K        = 10;

else

    if (~any(strcmp(fieldnames(options) , 'valid')))

        options.valid         = 0;

    end

    if (~any(strcmp(fieldnames(options) , 'randomize')))

        options.randomize     = 1;

    end

    if (~any(strcmp(fieldnames(options) , 'fraction')))

        options.fraction      = 1;

    end

    if (~any(strcmp(fieldnames(options) , 'maxperclass')))

        options.maxperclass   = inf;

    end
    
    if (~any(strcmp(fieldnames(options) , 'seed')))

        options.seed   = -1;

    end
    
    
    
    if (~any(strcmp(fieldnames(options) , 'method')))

        options.method        = 3;

    end

    if (~any(strcmp(fieldnames(options) , 'cv.K')))

        options.cv.K         = 10;

    end


end

[d , N]               = size(X);



if(options.seed ~= -1)
    
   rand('state' , options.seed); 
    
end

if (options.randomize)

    indN              = randperm(N);

    X                 = X(: , indN);

    y                 = y(indN);

end

if ((options.fraction > 0) && (options.fraction < 1))

    
    N                = round(options.fraction*N);

    indN             = (1 : N);
    
    X                = X(: , indN);

    y                = y(indN);

end

if(any(~isinf(options.maxperclass)))
   
    label            = unique(y);
    
    L                = length(label);
    
    if(numel(options.maxperclass) == 1)
        
        options.maxperclass = options.maxperclass*ones(1 , L);
        
    end
        
    ind              = cell(1 , L);
    
    indice           = [];

    for i = 1 : L

        ind{i}       = find(y == label(i));
        
        indice       = [indice , ind{i}(1:(min(length(ind{i}) , options.maxperclass(i))))];

    end

    X                = X(: , indice);
    
    y                = y(: , indice);
    
    [d , N]          = size(X);
    
    
end

indN             = (1 : N);


Ivalid               = [];

if(options.method == 0)
   
    Itrain           = indN;
    
    Itest            = [];
    
   
end

if(options.method == 1) %holding method sans remise

    rho              = options.holding.rho;

    K                = options.holding.K;

    if options.valid

        if(prod(size(rho)) == 1)

            rho     = [rho , (1-rho)/2 , (1-rho)/2];

        end

    end



    NN               = round(rho*N);

    Ntrain           = NN(1);

    if(options.valid)

        Ntest        = NN(2);

        Nvalid       = N - (Ntrain + Ntest);

        Ivalid       = zeros(K , Nvalid);

    else

        Ntest        = N - Ntrain;

    end

    Itrain           = zeros(K , Ntrain);

    Itest            = zeros(K , Ntest);


    for i = 1 : K

        temp          = randperm(N);

        Itrain(i , :) = temp(1:Ntrain);

        if(options.valid)

            Itest(i , :)  = temp((Ntrain+1):(Ntrain + Ntest));

            Ivalid(i , :) = temp(Ntrain + Ntest + 1:N);


        else

            Itest(i , :)  = temp(Ntrain + 1:N);

        end

    end

end

if(options.method == 2) %Bootstrap method avec remise


    rho               = options.bootstraping.rho;

    K                 = options.bootstraping.K;

    if options.valid

        if(prod(size(rho)) == 1)

            rho     = [rho , (1-rho)/2 , (1-rho)/2];

        end

    end


    NN               = round(rho*N);

    Ntrain           = NN(1);

    if(options.valid)

        Ntest        = NN(2);

        Nvalid       = N - (Ntrain + Ntest);

        Ivalid       = zeros(K , Nvalid);

    else

        Ntest        = N - Ntrain;

    end

    Itrain           = zeros(K , Ntrain);

    Itest            = zeros(K , Ntest);


    for i = 1 : K

        temp          = ceil(N*rand(1 , N));

        Itrain(i , :) = temp(1:Ntrain);

        if(options.valid)

            Itest(i , :)  = temp((Ntrain+1):(Ntrain + Ntest));

            Ivalid(i , :) = temp(Ntrain + Ntest + 1:N);


        else

            Itest(i , :)  = temp(Ntrain + 1:N);

        end


    end

end

if(options.method == 3) %K Fold Cross-Validation

    K      = options.cv.K;

    S      = floor(N/K);

    indN   = (1:N);

    Itrain = zeros(K , N - S - options.valid*S);

    Itest  = zeros(K , S);

    if (options.valid)

        Ivalid = zeros(K , S);

    end

    for i = 1 : K

        Itest(i , :)  = indN((i-1)*S+1:i*S);

        temp          = indN([i*S+1:N , 1:(i-1)*S]);

        if(options.valid)

            Ivalid(i , :) = temp(1:S);

            Itrain(i , :) = temp(S+1:end);

        else

            Itrain(i , :) = temp;

        end

    end

end


if(options.method == 4) % Leave One Out

    K      = N;

    S      = 1;

    indN   = (1:N);

    Itrain = zeros(K , N - S - options.valid*S);

    Itest  = zeros(K , S);

    if (options.valid)

        Ivalid = zeros(K , S);

    end

    for i = 1 : K

        Itest(i , :)  = indN((i-1)*S+1:i*S);

        temp          = indN([i*S+1:N , 1:(i-1)*S]);

        if(options.valid)

            Ivalid(i , :) = temp(1:S);

            Itrain(i , :) = temp(S+1:end);

        else

            Itrain(i , :) = temp;

        end

    end


end

if(options.method == 5) % Stratified Cross Validation

    K                              = options.cv.K;

    label                          = unique(y);

    select                         = histc(y , label);



    L                               = length(label);

    ind                             = cell(1 , L);

    n                               = zeros(1 , L);

    for i = 1 : L

        ind{i}                      = find(y == label(i));

    end

    Ntestc                          = floor((1/K).*select);

    if(options.valid)

        Nvalidc                       = Ntestc;

        Ntrainc                       = select  - 2*Ntestc;

        Ntrain                        = sum(Ntrainc);

        Ntest                         = sum(Ntestc);

        Nvalid                        = sum(Nvalidc);

        Ivalid                        = zeros(K , Nvalid);


    else

        Ntrainc                         = select  - Ntestc;

        Ntrain                          = sum(Ntrainc);

        Ntest                           = sum(Ntestc);

    end


    Itrain                          = zeros(K , Ntrain);

    Itest                           = zeros(K , Ntest);


    for j = 1 : K

        temptrain                   = [];

        temptest                    = [];

        tempvalid                   = [];

        for ii = 1 : L

            temptest                = [temptest , ind{ii}(1+(j-1)*Ntestc(ii):j*Ntestc(ii))];

            temp                    = [1+j*Ntestc(ii): select(ii) , 1:(j-1)*Ntestc(ii) ];

            if(options.valid)

                tempvalid               = [tempvalid , ind{ii}(temp(1:Nvalidc(ii)))];

                temptrain               = [temptrain , ind{ii}(temp(Nvalidc(ii)+1:end))];

            else

                temptrain               = [temptrain , ind{ii}(temp)];

            end

        end

        Itest(j , :)                   = temptest;

        Itrain(j , :)                  = temptrain;

        if(options.valid)

            Ivalid(j , :)                   = tempvalid;

        end

    end

end

if(options.method == 6) % Balanced Stratified Cross Validation


    K                              = options.cv.K;

    label                          = unique(y);

    select                         = histc(y , label);

    [d , N]                        = size(X);


    L                               = size(label , 2);

    ind                             = cell(1 , L);

    list                            = cell(1 , L);

    ind_list                        = cell(1 , L);

    Ni                              = zeros(1 , L);

    Ntestc                          = floor((1/K).*select);


    if(options.valid)

        Nvalidc                       = Ntestc;

        Ntrainc                       = select  - 2*Ntestc;

        Ntrain                        = sum(Ntrainc);

        Ntest                         = sum(Ntestc);

        Nvalid                        = sum(Nvalidc);

        Ivalid                        = zeros(K , Nvalid);


    else

        Ntrainc                         = select  - Ntestc;

        Ntrain                          = sum(Ntrainc);

        Ntest                           = sum(Ntestc);

    end

    Itrain                          = zeros(K , Ntrain);

    Itest                           = zeros(K , Ntest);


    for i = 1 : L

        ind{i}                      = find(y == label(i));

        Ni(i)                       = length(ind{i});

        list{i}                     = zeros(d , Ni(i) + 1);

        ind_list{i}                 = zeros(1 , Ni(i));

    end

    temp                            = 0;


    for i = 1 : L

        temp                             = temp + 1;

        a                                = X(: , ind{i});

        list{i}(: , 1)                   = min(a , [] , 2);

        for j=1:Ni(i)

            b                            = list{i}(: , j);

            tmp                          = b(: , ones(1 , size(a , 2))) - a;

            dist                         = sqrt(sum(tmp.*tmp));

            [minval , s]                 = min(dist);

            list{i}(: , j + 1)           = a(: , s);

            ind_list{i}(j)               = ind{i}(s);

            a(: , s)                     = [];

            ind{i}(s)                    = [];
        end

    end


    T_ind                                = cell(1 , K);

    Listremain                           = [];



    for i=1:temp

        b_ind                        = ind_list{i} ;

        if ~isempty(b_ind)
            while (size(b_ind,2) >= K)

                for j=1:K

                    T_ind{j}         = [T_ind{j} ; b_ind(1)];

                    b_ind(1)         = [];

                end
            end
            Listremain               = [Listremain , b_ind];

            b_ind                    = [];
        end

    end


    %     while ~isempty(Listremain)
    %         for i=1:K
    %             if ~isempty(Listremain)
    %
    %                 T_ind{i}              = [T_ind{i} ; Listremain(1)];
    %
    %                 Listremain(1)        = [];
    %             end
    %         end
    %     end

    %     Ntest                           = length(T_ind{1});
    %
    %     Ntrain                          = (K - 1)*Ntest + length(Listremain);

    Iindice                          = options.valid.*[[zeros(K-1,1) ; 1] , eye(K,K-1) ] + eye(K);

    for i=1:K


        tmp                          = Iindice(i , :)==1;

        indtest                      = find(tmp);

        indtrain                     = find(~tmp);

        Itrain_temp                  = [];


        for j=1:length(indtrain)

            Itrain_temp              = [Itrain_temp ; T_ind{indtrain(j)}];

        end

        [ignore,p]                   = sort(rand(1 , Ntrain));

        temp                         = [Itrain_temp' , Listremain];

        Itrain(i , :)                = temp(p);

        Itest(i , :)                 = T_ind{indtest(1)}';

        if(options.valid)

            Ivalid(i , :)             = T_ind{indtest(2)}';

        end

    end

end

if(options.method == 7) % Stratified Hold out

    rho              = options.holding.rho;

    K                = options.holding.K;

    if options.valid

        if(prod(size(rho)) == 1)

            rho     = [rho , (1-rho)/2 , (1-rho)/2];

        end

    end



    label            = unique(y);

    select           = histc(y , label);

    L                = size(label , 2);

    ind              = cell(1 , L);

    for i = 1 : L

        ind{i}  = find(y == label(i));

    end



    Ntrainc          = round(rho(1)*select);

    Ntrain           = sum(Ntrainc);

    if(options.valid)

        Ntestc       = round(rho(2)*select);

        Ntest        = sum(Ntestc);


        Nvalidc      = select - Ntrainc - Ntestc;

        Nvalid       = sum(Nvalidc);

        Ivalid       = zeros(K , Nvalid);

    else
        Ntestc           = select - Ntrainc;

        Ntest            = sum(Ntestc);

    end

    Itrain           = zeros(K , Ntrain);

    Itest            = zeros(K , Ntest);


    for j = 1 : K

        temptrain        = [];

        temptest         = [];

        tempvalid        = [];

        for i = 1 : L

            [ignore , pi] = sort(rand(1 , select(i)));

            temptrain     = [temptrain , ind{i}(pi(1:Ntrainc(i)))];

            if(options.valid)

                temptest       = [temptest , ind{i}(pi(Ntrainc(i)+1:Ntrainc(i) + Ntestc(i)))];

                tempvalid      = [tempvalid , ind{i}(pi(Ntrainc(i) + Ntestc(i)+1:select(i)))];


            else

                temptest      = [temptest , ind{i}(pi(Ntrainc(i)+1:select(i)))];

            end


        end

        Itrain(j , :)    = temptrain;

        Itest(j , :)     = temptest;

        if(options.valid)

            Ivalid(j , :) = tempvalid;

        end

    end

end


if(options.method == 8) % Stratified Boot Strap

    rho              = options.bootstraping.rho;

    K                = options.bootstraping.K;

    if options.valid

        if(prod(size(rho)) == 1)

            rho     = [rho , (1-rho)/2 , (1-rho)/2];

        end

    end


    label            = unique(y);

    select           = histc(y , label);

    L                = size(label , 2);

    ind              = cell(1 , L);

    for i = 1 : L

        ind{i}  = find(y == label(i));

    end


    Ntrainc          = round(rho(1)*select);

    Ntrain           = sum(Ntrainc);

    if(options.valid)

        Ntestc       = round(rho(2)*select);

        Ntest        = sum(Ntestc);


        Nvalidc      = select - Ntrainc - Ntestc;

        Nvalid       = sum(Nvalidc);

        Ivalid       = zeros(K , Nvalid);

    else
        Ntestc           = select - Ntrainc;

        Ntest            = sum(Ntestc);

    end

    Itrain           = zeros(K , Ntrain);

    Itest            = zeros(K , Ntest);

    for j = 1 : K

        temptrain   = [];

        temptest    = [];

        tempvalid   = [];


        for i = 1 : L

            pi            = ceil(select(i)*rand(1 , select(i)));

            temptrain     = [temptrain , ind{i}(pi(1:Ntrainc(i)))];

            if(options.valid)

                temptest       = [temptest , ind{i}(pi(Ntrainc(i)+1:Ntrainc(i) + Ntestc(i)))];

                tempvalid      = [tempvalid , ind{i}(pi(Ntrainc(i) + Ntestc(i)+1:select(i)))];

            else

                temptest      = [temptest , ind{i}(pi(Ntrainc(i)+1:select(i)))];

            end


        end

        Itrain(j , :)    = temptrain;

        Itest(j , :)     = temptest;

        if(options.valid)

            Ivalid(j , :) = tempvalid;

        end
    end

end




Contact us