| upsert(conn,tableName,fieldNames,keyFields,data, varargin)
|
function insertMask = upsert(conn,tableName,fieldNames,keyFields,data, varargin)
% UPSERT inserts new and updates old data to a database table
%
% UPSERT(CONNECT,TABLENAME,FIELDNAMES,KEYFIELDS,DATA).
% CONNECT is a database connection object.
% TABLENAME is the database table.
% FIELDNAMES is a string array of database column names.
% KEYFIELDS is the list of primary key fields that must be matched to
% perform an UPDATE rather than an INSERT. It may be given as a logical
% array the same length as FIELDNAMES, or a string or cell array of
% strings of key column names (in which case KEYFIELDS must be a subset
% of FIELDNAMES).
% DATA is a MATLAB cell array.
%
% INSERTEDMASK = UPSERT(...) returns a logical vector with one element for
% each row of DATA, indicating whether the "upsert" operation meant that
% corresponding row of DATA was inserted (TRUE) or merely updated (FALSE).
%
% UPSERT(...,'updateFcn',FUNCTION_HANDLE) optionally allows a user to
% provide their own function to replace the default MATLAB "update".
%
% UPSERT(...,'debug',true) prints out diagnostic information.
%
% Example:
%
% Imagine a database table "PHONE_NOS" with data like:
% PERSONID | TYPE | NUMBER
% 1 'HOME' 1234567
% 1 'MOB' 1222222
% 2 'HOME' 9888888
%
% Then the MATLAB commands:
% newNos = {1 'MOB' 4444444
% 2 'MOB' 5555555};
% INS = upsert(conn, 'PHONE_NOS', {'PERSONID','TYPE','NUMBER'}, [1 1 0], newNos)
%
% Would result in the table having contents:
% PERSONID | TYPE | NUMBER
% 1 'HOME' 1234567
% 1 'MOB' 4444444
% 2 'HOME' 9888888
% 2 'MOB' 5555555
%
% The returned variable (INS) would be [0; 1], meaning the second row was
% updated, the first row was inserted.
% Author: Sven Holcombe, 2012-09-25
% Firstly, handle large data sets. Later we will use an IN clause with a
% comma-separated list to check key fields. Oracle has a limit of 1000
% items in a list, so let's process 1000 at a time at most. Not an optimal
% solution but best solutions will differ by database flavour so this is
% adequate for the moment.
numRows = size(data,1);
if numRows>1000
insertMask = true(numRows,1);
chunks = unique([1:999:numRows numRows+1]);
for i = 1:length(chunks)-1
dataInds = chunks(i):chunks(i+1)-1;
insertMask(dataInds) = ...
upsert(conn,tableName,fieldNames,keyFields,data(dataInds,:), varargin{:});
end
return;
end
% keyFields may be input as:
% - A string: 'id'
% - A cellstr: {'id','groupNo'}
% - A logical mask the same size as fieldNames (true where field is a key)
% - Indices into fieldNames of the key fields
% keyFields will be transformed to the indices representation below
if ischar(keyFields) || iscellstr(keyFields)
keyFields = ismember(upper(fieldNames), upper(keyFields));
end
if islogical(keyFields)
keyFields = find(keyFields);
end
% Currently it's easier (if perhaps slightly slower) to treat data as a
% cell regardless of what format it was provided as.
if isnumeric(data)
data = num2cell(data);
end
% Handle user configuration input
IP = inputParser;
IP.addParamValue('updateFcn', @update, @(x)isa(x,'function_handle'));
IP.addParamValue('debug', false);
IP.parse(varargin{:});
doPrint = IP.Results.debug;
updateFcn = IP.Results.updateFcn;
% Which fields are keyFields? Build lists of them for an SQL fetch
keyFieldsCell = fieldNames(keyFields);
keyFieldsIsnumeric = cellfun(@(x)isnumeric(x)||islogical(x), data(1,keyFields));
keyFieldsListStr = sprintf('%s,',keyFieldsCell{:});
% We don't want to gather the whole table. Only the rows matching the
% primary key fields. This is most generalised by building IN () lists from
% a single database query, rather than sending/recieving one query for
% every row of "data" being upserted.
inClauses = cell(length(keyFields),1);
for i=1:length(keyFields)
if keyFieldsIsnumeric(i)
inSet = unique([data{:,keyFields(i)}]);
inStr = sprintf('%g,',inSet);
elseif ischar(data{1,keyFields(i)})
inSet = unique(data(:,keyFields(i)));
inStr = sprintf('''%s'',',inSet{:});
else
error('upsert:badKey', 'Primary key field cannot contain %s data',class(data{1,keyFields(i)}))
end
if length(inSet)>1
inClauses{i} = sprintf('%s IN (%s)', keyFieldsCell{i}, inStr(1:end-1));
else
inClauses{i} = sprintf('%s = %s', keyFieldsCell{i}, inStr(1:end-1));
end
end
% Fetch all table rows potentially matching the data we want to upsert
fetchWhereClause = sprintf(' %s AND', inClauses{:});
fetchSqlStr = sprintf('SELECT %s FROM %s WHERE %s', keyFieldsListStr(1:end-1), tableName, fetchWhereClause(1:end-3));
if doPrint, fprintf('Fetching %s data in %s matching given data...', keyFieldsListStr(1:end-1), tableName), end
fetchedData = fetch(conn, fetchSqlStr);
if doPrint, fprintf(' done. (%d potential matches found)\n', size(fetchedData,1)), end
% Build a map of which rows to be upserted already exist in the table.
insertMask = true(size(data,1),1); % One
if ~isempty(fetchedData)
eqMap = false(size(data,1), size(fetchedData,1), length(keyFields));
for i = 1:length(keyFields)
if keyFieldsIsnumeric(i)
thisUpsertData = cell2mat(data(:,keyFields(i)));
thisFetchedData = cast(cell2mat(fetchedData(:,i)), class(thisUpsertData));
eqMap(:,:,i) = bsxfun(@eq, thisUpsertData, thisFetchedData');
else
thisUpsertData = data(:,keyFields(i));
thisFetchedData = fetchedData(:,i)';
eqCell = cellfun(@(x)strcmp(x, thisFetchedData), thisUpsertData, 'Un',0);
eqMap(:,:,i) = cat(1, eqCell{:});
end
end
pkeysMatchMap = all(eqMap,3);
insertMask = ~any(pkeysMatchMap,2);
end
% First find any data rows that do NOT yet exist in table. Insert them.
if any(insertMask)
if doPrint, fprintf('Inserting %d data rows not currently in %s...', nnz(insertMask), tableName), end
fastinsert(conn,tableName,fieldNames,data(insertMask,:));
if doPrint, fprintf(' done.\n'), end
end
% Next, update ALL rows to the values given in data. First build WHERE.
whereEqClauses = cell(numRows, length(keyFields));
for i=1:length(keyFields)
if keyFieldsIsnumeric(i)
whereEqClauses(:,i) = cellfun(@(dat)sprintf('%s = %g', keyFieldsCell{i}, dat), data(:,keyFields(i)),'Un',0);
else
whereEqClauses(:,i) = cellfun(@(dat)sprintf('%s = ''%s''', keyFieldsCell{i}, dat), data(:,keyFields(i)),'Un',0);
end
end
dataWhereClauses = cellfun(@(strs)sprintf(' %s AND',strs{:}), num2cell(whereEqClauses,2),'Un',0);
dataWhereClauses = cellfun(@(str)['WHERE ' str(1:end-3)], dataWhereClauses, 'Un',0);
% Next, run the update on all the NON-keyField fields (since the key fields
% themselves are being matched, so won't change). Note that the "update"
% function can be replaced by a user's modified update function.
otherFields = setdiff(1:length(fieldNames), keyFields);
if doPrint, fprintf('Updating %d data rows (%d new, %d old) in %s...', length(insertMask), nnz(insertMask), nnz(~insertMask), tableName), end
updateFcn(conn,tableName,fieldNames(otherFields),data(~insertMask,otherFields), dataWhereClauses(~insertMask))
if doPrint, fprintf(' done.\n'); end
|
|