Code covered by the BSD License  

Highlights from
Table Breakpoint Optimization

image thumbnail

Table Breakpoint Optimization

by

 

04 Apr 2012 (Updated )

A set of tools for finding the best way to reduce the size of a table.

find_best_table_nd(...
function [x_f, z_f, mse_f, max_error] = find_best_table_nd(...
                                            x_0, z_0, ...
                                            n, ...
                                            method)

%% find_best_table_nd
%
% This function finds the best way to reduce the size of a table to the
% specified number of points. That is, suppose you had a table of 
% 100-by-10,000 points, but wanted to find the closest 9-by-10 table. This 
% function would find what 9-by-10 points best represent the original 
% table. This is accomplished using traditional constrained optimization 
% techniques to best fit a table with the specified dimensions. "Goodness" 
% will be measured by the difference between the original table and the new
% table sampled at the original data points. See find_best_table_demo.m for
% examples.
% 
% Inputs:
%   x_0    Cell array of values for the first independent variables (e.g., 
%          {x1, x2, ... xn}.
%   z_0    Matrix of values for the dependent variable for all x_0 and
%          y_0 using ndgrid-style syntax (e.g., z_0(2, 3) corresponds
%          to x{1}(2) and x{2}(3)). Note that this is different from what
%          is produced by meshgrid.
%   n_x    Number of divisions of the first independent variable in output
%   n_y    Number of divisions of the second independent variable in output
%   method Interpolation method to use, e.g., 'linear' (see help interp2)
% 
% Outputs:
%   x_f    Cell array of best divisions of independent variables found
%   z_f    Resulting fit of z_0 on x_f and y_f
%   mse_f  Mean square error of final table
%
% This builds off of work by Richard Willey, Stuart Kozola, Eric Johnson, 
% Sarah Zaranek, and Peter Maloney at The MathWorks, Inc.
%
% Requires the Optimization Toolbox (TM).
% Supports the Parallel Computing Toolbox (TM).
%
%  Tucker McClure @ The MathWorks
%  Copyright 2013 The MathWorks, Inc.
                                                 
    % Convert from the spacing between the points to the grid points.
    function x_i = spacing_to_xy(spacing_i)
        x_i = cell(1, n_d);
        li = 0;
        for ki = 1:n_d
            x_i{ki} = [[0, cumsum(spacing_i(li+(1:n(ki)-2)))] ...
                       + x_0_starts(ki), x_0_ends(ki)];
            li = li + n(ki)-2;
        end
    end

    % Determine how well the current spacing fits the original data.
    function [mse, max_error, z_i] = evaluate_spacing(spacing_i)

        % Convert to grid points.
        x_i = spacing_to_xy(spacing_i);
        
        % Make me a mesh.
        x_i_meshs = cell(1, n_d);
        [x_i_meshs{1:end}] = ndgrid(x_i{:});
        
        % Evaluate it and find the difference between the new mesh and z_0,
        % evaluated at x_0 and y_0. Get the mean square error from these.
        if use_interpolator
            z_i = interpolator(x_i_meshs{:});
            g   = griddedInterpolant(x_i_meshs{:}, z_i, method);
            se  = (z_0 - g(x_0_meshs{:})).^2;
        else
            z_i = interpn(x_0_meshs{:}, z_0, x_i_meshs{:}, method);
            se  = (z_0 - interpn(x_i_meshs{:}, z_i, ...
                                 x_0_meshs{:}, method)).^2;
        end
        mse       = mean(se(:));
        max_error = sqrt(max(se(:)));

    end

    % Do some quick error checking and set defaults for arguments.
    if nargin == 3
        method = 'linear';
    elseif nargin > 4 || nargin < 3
        error('Incorrect number of arguments.');
    end
    
    % Make sure numbers make sense.
    if any(n < 3)
        error(['Each dimension of table must have at least 3 data ' ...
               'points (otherwise, it represents ' ...
               'only the corners, so there''s nothing to solve.\n']);
    end
    
    % Number of dimensions
    n_d = length(n);
    
    % Our state will be the spacing between the xs and ys.
    x_0_starts = cellfun(@(x) x(1),   x_0);
    x_0_ends   = cellfun(@(x) x(end), x_0);
    dx_i = (x_0_ends - x_0_starts)./(n - 1);
    
    spacing_0 = zeros(1, sum(n) - 2*n_d);
    last_index = 0;
    for k = 1:n_d
        spacing_0(last_index + (1:n(k)-2)) = dx_i(k) * ones(1, n(k) - 2);
        last_index = last_index + n(k)-2;
    end
           
    % Set up constraints. We want all the xs and ys to add up to the table
    % spans, and we can't let any dx or dy be <= 0.
    
    % There are no equality constraints.
    Aeq = [];
    beq = [];
    
    % Set the lower bound to be very small, 1e-3 * the difference in
    % uniformly spaced x and y values.
    lb = 1e-3 * min(dx_i) * ones(sum(n) - 2*n_d, 1);
    ub = [];

    % Total of distances between points needs to equal the span.
    A = zeros(n_d, sum(n) - 2*n_d);
    b = zeros(n_d, 1);
    last_index = 0;
    for k = 1:n_d
        A(k, last_index + (1:n(k)-2)) = 1;
        last_index = last_index + n(k)-2;
        b(k) = x_0{k}(end) - x_0{k}(1) - lb(1);
    end
       
    % Set up the initial meshes. We'll use these frequently.
    x_0_meshs = cell(n_d, 1);
    [x_0_meshs{1:end}]= ndgrid(x_0{:});

    % Using a griddedInterpolant is about 30% faster than interp2 on the
    % example data. If that's available in the user's version, use it.
    use_interpolator = exist('griddedInterpolant', 'class');
    if use_interpolator
        interpolator = griddedInterpolant(x_0_meshs{:}, z_0, method);
    end
    
    % Call an optimization routine.
    spacing_f = fmincon(@evaluate_spacing, spacing_0, ...
                        A, b, Aeq, beq, lb, ub, [], ...
                        optimset('Algorithm',   'active-set', ...
                                 'MaxFunEvals', 10000, ...
                                 'UseParallel', 'always', ...
                                 'Display',     'notify'));

    % Convert back to grid points, and we're done.
    x_f = spacing_to_xy(spacing_f);
    
    % Output the final table.
    [mse_f, max_error, z_f] = evaluate_spacing(spacing_f);
    
end

Contact us