Code covered by the BSD License

# Table Breakpoint Optimization

### Tucker McClure (view profile)

04 Apr 2012 (Updated )

A set of tools for finding the best way to reduce the size of a table.

find_best_table_nd(...
```function [x_f, z_f, mse_f, max_error] = find_best_table_nd(...
x_0, z_0, ...
n, ...
method)

%% find_best_table_nd
%
% This function finds the best way to reduce the size of a table to the
% specified number of points. That is, suppose you had a table of
% 100-by-10,000 points, but wanted to find the closest 9-by-10 table. This
% function would find what 9-by-10 points best represent the original
% table. This is accomplished using traditional constrained optimization
% techniques to best fit a table with the specified dimensions. "Goodness"
% will be measured by the difference between the original table and the new
% table sampled at the original data points. See find_best_table_demo.m for
% examples.
%
% Inputs:
%   x_0    Cell array of values for the first independent variables (e.g.,
%          {x1, x2, ... xn}.
%   z_0    Matrix of values for the dependent variable for all x_0 and
%          y_0 using ndgrid-style syntax (e.g., z_0(2, 3) corresponds
%          to x{1}(2) and x{2}(3)). Note that this is different from what
%          is produced by meshgrid.
%   n_x    Number of divisions of the first independent variable in output
%   n_y    Number of divisions of the second independent variable in output
%   method Interpolation method to use, e.g., 'linear' (see help interp2)
%
% Outputs:
%   x_f    Cell array of best divisions of independent variables found
%   z_f    Resulting fit of z_0 on x_f and y_f
%   mse_f  Mean square error of final table
%
% This builds off of work by Richard Willey, Stuart Kozola, Eric Johnson,
% Sarah Zaranek, and Peter Maloney at The MathWorks, Inc.
%
% Requires the Optimization Toolbox (TM).
% Supports the Parallel Computing Toolbox (TM).
%
%  Tucker McClure @ The MathWorks
%  Copyright 2013 The MathWorks, Inc.

% Convert from the spacing between the points to the grid points.
function x_i = spacing_to_xy(spacing_i)
x_i = cell(1, n_d);
li = 0;
for ki = 1:n_d
x_i{ki} = [[0, cumsum(spacing_i(li+(1:n(ki)-2)))] ...
+ x_0_starts(ki), x_0_ends(ki)];
li = li + n(ki)-2;
end
end

% Determine how well the current spacing fits the original data.
function [mse, max_error, z_i] = evaluate_spacing(spacing_i)

% Convert to grid points.
x_i = spacing_to_xy(spacing_i);

% Make me a mesh.
x_i_meshs = cell(1, n_d);
[x_i_meshs{1:end}] = ndgrid(x_i{:});

% Evaluate it and find the difference between the new mesh and z_0,
% evaluated at x_0 and y_0. Get the mean square error from these.
if use_interpolator
z_i = interpolator(x_i_meshs{:});
g   = griddedInterpolant(x_i_meshs{:}, z_i, method);
se  = (z_0 - g(x_0_meshs{:})).^2;
else
z_i = interpn(x_0_meshs{:}, z_0, x_i_meshs{:}, method);
se  = (z_0 - interpn(x_i_meshs{:}, z_i, ...
x_0_meshs{:}, method)).^2;
end
mse       = mean(se(:));
max_error = sqrt(max(se(:)));

end

% Do some quick error checking and set defaults for arguments.
if nargin == 3
method = 'linear';
elseif nargin > 4 || nargin < 3
error('Incorrect number of arguments.');
end

% Make sure numbers make sense.
if any(n < 3)
error(['Each dimension of table must have at least 3 data ' ...
'points (otherwise, it represents ' ...
'only the corners, so there''s nothing to solve.\n']);
end

% Number of dimensions
n_d = length(n);

% Our state will be the spacing between the xs and ys.
x_0_starts = cellfun(@(x) x(1),   x_0);
x_0_ends   = cellfun(@(x) x(end), x_0);
dx_i = (x_0_ends - x_0_starts)./(n - 1);

spacing_0 = zeros(1, sum(n) - 2*n_d);
last_index = 0;
for k = 1:n_d
spacing_0(last_index + (1:n(k)-2)) = dx_i(k) * ones(1, n(k) - 2);
last_index = last_index + n(k)-2;
end

% Set up constraints. We want all the xs and ys to add up to the table
% spans, and we can't let any dx or dy be <= 0.

% There are no equality constraints.
Aeq = [];
beq = [];

% Set the lower bound to be very small, 1e-3 * the difference in
% uniformly spaced x and y values.
lb = 1e-3 * min(dx_i) * ones(sum(n) - 2*n_d, 1);
ub = [];

% Total of distances between points needs to equal the span.
A = zeros(n_d, sum(n) - 2*n_d);
b = zeros(n_d, 1);
last_index = 0;
for k = 1:n_d
A(k, last_index + (1:n(k)-2)) = 1;
last_index = last_index + n(k)-2;
b(k) = x_0{k}(end) - x_0{k}(1) - lb(1);
end

% Set up the initial meshes. We'll use these frequently.
x_0_meshs = cell(n_d, 1);
[x_0_meshs{1:end}]= ndgrid(x_0{:});

% Using a griddedInterpolant is about 30% faster than interp2 on the
% example data. If that's available in the user's version, use it.
use_interpolator = exist('griddedInterpolant', 'class');
if use_interpolator
interpolator = griddedInterpolant(x_0_meshs{:}, z_0, method);
end

% Call an optimization routine.
spacing_f = fmincon(@evaluate_spacing, spacing_0, ...
A, b, Aeq, beq, lb, ub, [], ...
optimset('Algorithm',   'active-set', ...
'MaxFunEvals', 10000, ...
'UseParallel', 'always', ...
'Display',     'notify'));

% Convert back to grid points, and we're done.
x_f = spacing_to_xy(spacing_f);

% Output the final table.
[mse_f, max_error, z_f] = evaluate_spacing(spacing_f);

end
```