| qsub_check_finish(tob, tev, jobfilename, outfilename, flagfilename, qsubname, finishcb) |
function qsub_check_finish(tob, tev, jobfilename, outfilename, flagfilename, qsubname, finishcb)
% QSUB_CHECK_FINISH Timer callback to check job status periodically
% This function is run in the MATLAB session where a job has been
% submitted via QSUB_SUBMIT_CM. It is called periodically by a timer object
% to check whether the associated job has finished. The "finished" status
% is indicated by evaluating the output of SUN gridengine qstat.
% Alternatively, QSUB_RUN_CM on the compute node creates a flag file at the
% end of a successful computation. The existence of this flag file
% indicates a successful completion. This solution is currently not used,
% and it is lacking support to monitor failure of jobs (if a job crashed,
% no flag file will ever be created).
% If there is an output file for this job, it will be loaded as a .mat
% file. If this file contains output from the job, finishcb will be run and
% the output cell array passed as only free argument. Otherwise, finishcb
% will be called with the MException stored in the output file. It is up to
% the callback to handle this exception.
%
% Input arguments:
% This function should never be called from outside a timer callback. All
% inputs are set in QSUB_SUBMIT_CM.
%
% See also QSUB_SUBMIT_CM, QSUB_RUN_CM
%
% This code has been developed as part of a batch job configuration
% system for MATLAB. See
% http://sourceforge.net/projects/matlabbatch
% for details about the original project.
%_______________________________________________________________________
% Copyright (C) 2007 Freiburg Brain Imaging
% Volkmar Glauche
% $Id: qsub_check_finish.m 409 2009-06-22 09:50:28Z glauche $
rev = '$Rev: 409 $'; %#ok
% check for job in recently finished jobs - this is for SUN gridengine
[sts qout] = unix(sprintf('qstat -s z |grep -q "^[[:blank:]]*%s[[:blank:]]"',qsubname));
% Alternatively: check for flag file
% sts = exist(flagfilename,'file') > 0; % flag file exists, job should be finished
if sts == 0 % grep found job in list - job should be finished without error
stop(tob);
delete(tob);
% load output file
out = load(outfilename, '-mat');
if isempty(out.err) % job did not throw error
finishcb(out.out); % run finish callback, pass cell array of outputs
else
% this should never happen
rethrow(out.err);
end
else
% check in running/pending list - this is for SUN gridengine
[sts qout] = unix(sprintf('qstat |grep "^[[:blank:]]*%s[[:blank:]]"',qsubname));
% No alternative for flag file here
if sts == 0
% job found in running/pending list
stat = textscan(qout,'%s');
if any(stat{1}{5} == 'E')
% error state - this is for SUN gridengine
stop(tob);
delete(tob);
out.err = [];
try
% try to load output file
out = load(outfilename, '-mat');
end
if isempty(out.err)
out.err = MException('qsub:jobfailed','Job %s failed.', qsubname);
end
finishcb(out.err);
end
end
end
|
|