Code covered by the BSD License

# Linear time Outlier Scoring via Random Walks

### michael kim (view profile)

I use a markov chain rejection sampling method to score outliers in linear time.

scoreOutliers(inputMat)
```function outputMat = scoreOutliers(inputMat)
% Linear Time Outlier Scoring via Random Walks
% Compute the outlier score for each p-dimensional data point
% The highest scores are possible outliers, scores between [0,1]
% The outputMat last column contains the scores
% Original scoring algorithm by Michael S Kim (mikeskim [at] gmail.com)
% Version 1.00 (10/02/2013) for Matlab ported from R
% not fully tested on Matlab, tested on GNU Octave and R
% warning('off','all');

sampleSize = 2000; %increase this value for higher precision, lower for faster computation times
[rowSize, colSize] = size(inputMat);
scores = zeros(rowSize,1);
inputMat = [inputMat, scores(:)];
lastColIndex = colSize + 1;

tmpSample = zeros(sampleSize,1);
tmpIndex = unidrnd(rowSize, (sampleSize+1),1);
for j =1:sampleSize
tmpSample(j) = sum(abs(inputMat(tmpIndex(j),:)-inputMat(tmpIndex((j+1),:))));
end
sampleMax = median(tmpSample) + 4*1.4826*mad(tmpSample,1);

for j = 1:rowSize
mcount = 0;
origin = inputMat(j,1:colSize);
tmpSampleIndex = unidrnd(rowSize, sampleSize, 1);
while mcount < sampleSize
mcount = mcount + 1;
tmpDestIndex = tmpSampleIndex(mcount);
destination =  inputMat(tmpDestIndex,1:colSize);
tmpDist = sum(abs(origin - destination))/sampleMax;
if unifrnd(0,1,1) > tmpDist
origin = destination;
inputMat(tmpDestIndex, lastColIndex) = inputMat(tmpDestIndex, lastColIndex) + 1;
end
end
end

tmpMax =  max(inputMat(:,lastColIndex));
inputMat(:,lastColIndex) = 1 - inputMat(:,lastColIndex)/tmpMax;
outputMat = sortrows(inputMat, lastColIndex);
end

%test function via
%tmpOut = scoreOutliers([randn(300,1), randn(300,1)])
%scatter3(tmpOut(:,1),tmpOut(:,2),tmpOut(:,3),10, tmpOut(:,3), "s")
```