function[z] = joindst(x,y,type,varargin)
% JOIN Single-key equality join of two dataset arrays
% Inputs : x,y - joined datasets
% type - join type, string 'inner', 'left outer', 'full outer'
% key - [optional] key column name, string; if no key is
% provided, (assumed single) common column is used
% Outputs: z - dataset array
% Notes : NaN/Inf values are not matched, and excluded from inner join.
% Output rows are sorted by join key, with NaN/Inf values last.
% Within any row of z, columns of x are listed first. Unmatched
% rows are padded with NaNs
% Example: x = [[unidrnd(5,4,1); nan; inf] [10 20 30 40 50 60]']
% y = [[unidrnd(5,4,1); nan; inf] [15 25 25 45 55 55]']
% x = dataset({x,'index','x_value'})
% y = dataset({y,'index','y_value'})
% inner = joindst(x,y,'inner')
% left_outer = joindst(x,y,'left outer')
% full_outer = joindst(x,y,'full outer')
% Dimitri Shvorob, dimitri.shvorob@gmail.com, 8/18/08
if ~isa(x,'dataset')
error('??? Invalid type: ''%s'' not a dataset array.',inputname(1))
end
if ~isa(y,'dataset')
error('??? Invalid type: ''%s'' not a dataset array.',inputname(2))
end
rows = @(x) size(x,1);
xvars = get(x,'VarNames');
yvars = get(y,'VarNames');
xyvars = intersect(xvars,yvars);
assert(~isempty(xyvars),'??? No common columns found in joined datasets.')
assert(isscalar(xyvars),'??? More than one common column found in joined datasets.')
p = inputParser;
p.addOptional('key',[],@ischar);
p.parse(varargin{:});
key = p.Results.key;
if isempty(key)
key = char(xyvars);
else
if ~isscalar(key)
error('??? Multiple join keys not allowed.')
end
if ~strmatch(key,xvars)
error('??? Column ''%s'' not found in dataset ''%s''.',key,inputname(1))
end
if ~strmatch(key,yvars)
error('??? Column ''%s'' not found in dataset ''%s''.',key,inputname(2))
end
end
[vx,ix] = keys(x.(key));
[vy,iy] = keys(y.(key));
xnan = x(~isfinite(x.(key)),:);
ynan = y(~isfinite(y.(key)),:);
y.(key) = [];
yvars = get(y,'VarNames');
p = size(x,2); xp = nan(1,p);
q = size(y,2); yp = nan(1,q);
z = [x(1,:) y(1,:)]; z(1,:) = [];
switch type
case 'inner'
for i = 1:length(vx)
j = find(vy == vx(i));
if ~isempty(j)
m = ix{i};
n = iy{j};
xi = tile(x(m,:),rows(n));
yi = tile(y(n,:),rows(m));
z = [z; [xi yi]]; %#ok
end
end
case 'left outer'
for i = 1:length(vx)
xi = x(ix{i},:);
j = find(vy == vx(i));
if ~isempty(j)
yi = y(iy{j},:);
else
yi = dataset({yp,yvars{:}});
end
rx = rows(xi);
ry = rows(yi);
xi = tile(xi,ry);
yi = tile(yi,rx);
z = [z; [xi yi]]; %#ok
end
z = [z; ...
[xnan dataset({tile(yp,rows(xnan)),yvars{:}})]]; %#ok
case 'full outer'
v = union(vx,vy);
for i = 1:length(v)
j = find(vx == v(i));
if ~isempty(j)
xi = x(ix{j},:);
else
xi = dataset({xp,xvars{:}});
end
j = find(vy == v(i));
if ~isempty(j)
yi = y(iy{j},:);
else
yi = dataset({yp,yvars{:}});
end
xt = tile(xi,rows(yi));
yt = tile(yi,rows(xi));
z = [z; [xt yt]]; %#ok
end
w = dataset({tile(xp,rows(ynan)),xvars{:}});
w.(key) = [];
z = [z; ...
[xnan dataset({tile(yp,rows(xnan)),yvars{:}})]; ...
[ynan w] ]; %#ok
otherwise
error('??? Join type not recognized.')
end
end
function[key,ind] = keys(x)
key = unique(x(isfinite(x)));
ind = cellfun(@(k) find(x == k),num2cell(key),'UniformOutput',false);
end
function[out] = tile(x,n)
out = x;
for i = 2:n
out = [out; x]; %#ok
end
end