classdef LinearClassifier < handle
%A linear classifier that is defined by a 2-dimensional weight vector and
%a threshold value. The decision boundary is perpendicular to the weight
%vector, offset from the origin by an amount proportional to -theta/|w|^2.
%The weight vector and threshold value
%The decision boundary in terms of gradient/intercept (i.e. y=mx+c)
%Callback function for invoke on weight change
%Indices of support vectors
%Whether the classifier has been trained (i.e. an SVM), or has been
trained = false;
function a = LinearClassifier(weights, callback)
a.weights = weights';
%Initial threshold is arbitrarily defined by the squared L2 norm of
%the weight vector (meaning that the boundary is offset from the
%origin by 1.
a.theta = -sum(a.weights .^ 2);
%Compute the boundary gradient
a.callback = callback;
function set_weights(a, weights, theta, sv_indices)
%If the classifier has been trained, the the SVs are passed in as a
if (nargin == 4)
a.sv_indices = sv_indices;
a.trained = true;
a.sv_indices = ;
a.trained = false;
%Ensure that the threshold and at least one of the weights are
if (sum(abs(weights)) == 0)
weights = [0.01 0.01];
if (theta == 0)
theta = 0.00000001;
%Don't put an upper threshold on theta for now.
%theta = min(max(theta, -10), 10);
a.theta = theta;
a.weights = weights';
%Recomputer the boundary gradient and invoke callback
%Scale the weights by -theta / |w|^2, thereby giving a vector
%with a magnitude that defines a point on the decision boundary.
scaled_weights = -(a.theta .* a.weights) ./ (norm(a.weights)^2);
%Get a gradient (prevent it from being infinity) m = y/x
a.boundary_grad = svm_demo.threshold(-scaled_weights(1) ./ scaled_weights(2));
%Compute intercept c = y - mx
a.boundary_inte = scaled_weights(2) - (scaled_weights(1) .* a.boundary_grad);
function [outputs sv] = classify(a, inputs)
%Classify some data
%Compute its offset from the decision boundary
offset = (inputs * a.weights) + a.theta;
%Find the support vectors
sv = a.sv_indices;
lt_indices = offset < 0;
%Because we require the indices of SVs, we can't take the minimum
%of the offset vector indexed by the required sign. Therefore,
%we'll just do a cheeky hack to ensure that offsets with the wrong
%sign won't become support vectors.
[v1 sv1] = min(offset + lt_indices .* 100000);
[v2 sv2] = max(offset + ~lt_indices .* -100000);
%In case there are no data points on one side of the line, ensure
%we don't pick the most distant point on the other side of the line
if (v1 > 50)
sv = sv2;
elseif (v2 < -50)
sv = sv1;
sv = [sv1; sv2];
%Take a lazy sign of the offset to classify. This ensures that points
%very close to the boundary are labelled 0. This makes it super-easy
%to compute classification patches.
outputs = -svm_demo.lsign(offset);
function train(a, inputs, outputs, mode)
if (size(inputs, 1) > 0)
%Automatically choose the best SVM training mode.
svm_location = which('svmtrain');
if strfind(svm_location, 'libsvm')
if (mode == 's')
mode = 'l';
elseif (mode == 'b')
error('Bioinformatics mode is selected, but LibSVM is further up the path. Please select LibSVM or change your path.');
if (mode == 's')
mode = 'b';
elseif (mode == 'l')
error('LibSVM is selected, but the bioinformatics toolbox is further up the path. Please select BioInformatics or change your path.');
elseif exist('quadprog', 'file')
mode = 'o';
error('Unable to find svmtrain or quadprog; make sure you have libsvm, bioinformatics toolbox, or optimization toolbox');
%Bioinformatics SVM solution
%svmtrain is a bioinformatics toolbox function that magically
%traing an svm. Uses least-squares by default, but can be made
%to use SMO or QP if that floats your boat. It doesn't return
%weights, but these can be derived. Using a linear kernal,
%the prediction is made by (inputs' * SV * Alpha) + Bias.
%Hence the weights are simply Alpha * SV'.
%SVM also has a parameter for soft-margins. Setting this to
%infinity causes the same behaviour as the other solutions.
struct = svmtrain(inputs, outputs, 'AutoScale', false, 'BoxConstraint', 50, 'Method', 'SMO');
w = (struct.Alpha' * struct.SupportVectors);
a.set_weights(w, struct.Bias, struct.SupportVectorIndices);
%Same as the bioinformatics solution, slightly different
%syntax. Set the largest long possible in C as the
%soft-margin parameter, since we want a hard margin.
%The SV indices aren't obviously available, but the SV values
%(i.e. inputs) are. It's another obfuscated arrayfun to
%extract the indices from the input matrix.
model = svmtrain(outputs, inputs, '-t 0 -c 50');
w = (model.sv_coef' * full(model.SVs));
sv_count = size(model.SVs, 1);
d_count = size(inputs, 1);
a.set_weights(w, -model.rho, ...
mod(find(cell2mat(arrayfun(@(i, j)sum(inputs(i, :) == full(model.SVs(j, :))), ...
repmat((1:d_count)', sv_count, 1), reshape(repmat(1:sv_count, d_count, 1), sv_count.*d_count, 1), ...
'UniformOutput', false))) - 1, d_count) + 1);
%Quadratic programming SVM solution.
%quadprog solves: min 0.5x'Hx + f'x s.t. Ax <= b
%An SVM requires us to minimise 0.5||w||^2 (i.e. the L2
%magnitude of the weight vector) subject to perfect
%classification: y(x'w) >= 1
%So the first parameter, H, needs to allow us to square the
%x/y components of the weight vector (which looks like
%[wx wy bias]). Hence we use an identity matrix with the
%third column nullified, giving us [wx wy].^2.
%f is zero, since the optimization has no linear component.
%The constraint is a greater than/equal, so we negate both
%sides giving -y(x'w) <= -1. We augment the input data to
%include a column corresponding to the bias, containing only
%ones (so [x1 x2] becomes [x1 x2 1]. Since the combination
%rule is (x1w1 + x2w2 + bias), multiplying x1, x2 and 1 by
%the label will make the combination positive or negative
%depending on whether it was correct or wrong. Finally, this
%gives something that can be optimized in the form Ax <= b.
[w fval ex op l] = quadprog([1 0 0; 0 1 0; 0 0 0], zeros(3, 1), ...
-([inputs ones(size(inputs, 1), 1)] .* repmat(outputs, 1, 3)), ...
-ones(size(inputs, 1), 1), , , , , , ...
optimset('LargeScale', 'off', 'MaxIter', 10000, 'Display', 'off'));
%The interesting output values are w and l. w is the
%optimized vector of weights - [w1 w2 bias] - and l contains
%the lapacian multipliers required to solve the qp problem.
%Non-zero lapacian multipliers indicate support vectors, so
%we extract their indices with find.
a.set_weights(-w(1:2)', -w(3), find(l.ineqlin)); %#ok<FNDSB>
%The perceptron training algorithm iteratively adjusts the
%decision boundary, reacting to misclassified examples. The
%weights and bias are moved to approach correct
%classification of any misclassified data by a small amount
%(the learning rate, in this case 0.05), and this is computed
%iteratively over the training set, and typically for some
%number of epochs (in this case, once for every call to the
%'train' method). There aren't 'Support Vectors' in a
%perceptron, but circle the errors, since these detmine the
%path taken by the perceptron (correct predictions have no
%impact on the decision boundary).
w = a.weights';
bias = a.theta;
if (a.classify(inputs(i, :)) ~= outputs(i))
w = w - 0.01 .* (outputs(i) .* inputs(i, :));
bias = bias - 0.01 .* outputs(i);
%a.set_weights(a.weights', a.theta, find(a.classify(inputs) ~= outputs)); %#ok<FNDSB>
a.set_weights(a.weights', a.theta, a.sv_indices);