%% Rolling statistics in MATLAB
% Getting the mean of a sample data is easy in MATLAB: you collect all the
% data in an array and simply call |mean| on it. But what if the data is
% far too large to be stored all at once? Or if it is generated live, and
% you do not know in advance when data generation will end?
%
% The utility class described here fill these two use cases. At the cost of
% time (using a class in MATLAB is costy), it generates the most important
% statistics descriptors of a dataset in a rolling fashion. Each data item
% is appended one by one. They are not stored in the memory, and therefore
% the memory cost is independant of the sample size. Statistics can
% accessed anytime. And as shown in case two, the class can transparently
% operate on data item that are scalars, arrays, matrices, and images.
%% Live data generation
% We simulate here the generation of data by an external process, and build
% statistics without storing individual items.
tic
hasNext = @(x) toc < 5; % Will generate data for 5 seconds
next = @(x) 1 + 2 * randn; % gaussian distribution with mean 1 and std 2
rg1 = rollingstats;
while hasNext()
y = next();
rg1 = rg1.add(y);
end
disp(rg1)
%% A simple Image Processing application
% Description of second code block
images = load('mri');
I = squeeze(images.D);
n_slices = size(I, 3);
rg2 = rollingstats;
for i = 1 : n_slices
% to avoid overflow in calculation, we must convert the image to double
J = double(I(:,:,i));
% Add it to the rolling stats
rg2 = rg2.add(J);
end
figure('Position', [50 50 400 400])
% Min intensity projection
subplot(221)
imshow(rg2.min, [])
title('Min')
% Mean intensity projection
subplot(222)
imshow(rg2.mean, [])
title('Mean')
% Kurtosis projection
subplot(223)
imshow(rg2.kurtosis, [])
title('Kurtosis')
% Max-min
subplot(224)
imshow(rg2.skewness, [])
title('Skewness')
%%
% Right now this image dataset is not a great example to show the benefits
% of |rollingstats|: There is 27 slices of |uint8| images, and
% |rollingstats| must store 6 double arrays the same size that of a single
% slice. Because of the size of |double| (8 bytes) versus |uint8| (1 byte),
% it would have been cheaper to compute the statistics on the whole image
% dataset at once. This won't be the case anymore for large data, loaded
% from the disk one by one.
rgwhos = whos('rg2');
imagewhos = whos('I');
fprintf('Total image size is %.1f kB.\n', imagewhos.bytes/1000)
fprintf('Stat size is %.1f kB.\n', rgwhos.bytes/1000)