function qsub_submit_cm(job, jobdir, jobname, finishcb) %#ok<INUSL>
% QSUB_SUBMIT_CM Submit MATLAB command to SGE/torque and collect output
% This function submits a job from a master MATLAB session to a remote
% MATLAB using qsub. Its counterpart QSUB_RUN_CM executes the job on the
% remote computer. If requested, a timer will be started on the master
% MATLAB to check periodically whether the remote computation has finished.
% Once it is finished, a user-defined callback is executed which can be
% used to continue computation on the returned data.
% This function is limited to an environment of Linux/Unix compute nodes.
% It has been tested with both SUN gridengine and torque cluster management
% software. All nodes need to have MATLAB installed. One MATLAB license is
% required per node. It is possible to create a compiled version for the
% remote nodes. In a compiled version, QSUB_RUN_CM must be the entry
% function. All .m files that are necessary for computation must be
% included in the compiled archive. The mlcmd below must be changed
% accordingly to call the compiled version instead of MATLAB on the compute
% nodes.
% The code assumes a shared filesystem, where all MATLAB code and data are
% available in exactly the same paths on all nodes.
%
% Input arguments:
% job - struct with fields
% .fun - a function handle that does the computation
% .job - a cell array of input arguments to job.fun
% .noutputs - the number of output arguments of job.fun
% .ctx - job context. This is a struct with (currently) one field:
% .path - the full matlabpath to be set in the MATLAB before execution
% of job.fun
% jobdir - job directory. This must reside on a shared network
% filesystem. Communication between master and remote MATLAB
% is via files in this folder:
% - The job will be saved in a job .mat file. Its filename
% will be based on jobname and created by the UNIX command
% mktemp to ensure uniqueness and availability of the file.
% Note that this file will not have a .mat extension.
% - The remote MATLAB will be started in this directory.
% Stdout and stderr output from MATLAB will be saved there
% (usually in jobname.oXXX, jobname.eXXX files - depending
% on the cluster configuration). Also, a output .mat file
% jobname.out and a flag file will be written there. The
% flag file can be used on torque installations to check for
% successful completion of the remote job.
% jobname - descriptive name of the job. This name will be used to
% construct the job files. Thus it should only contain
% characters that are valid in filenames.
% finishcb - optional: callback function that will be invoked in the
% master MATLAB once the job has finished. A timer is used to
% check periodically whether the remote job has finished. The
% callback will be invoked with all output arguments returned
% from the remote computation, or with an MException, if the
% remote computation failed. The callback must handle this
% exception.
% If this argument is missing, remote output will be written
% to the job output .mat file, but no monitoring of the job
% will be done in the master MATLAB.
%
% Lines/variables that need to be adapted to the local installation:
% mlcmd - Command to run MATLAB/compiled qsub_run_cmd
% qname - Queue name
% qsubcmd - Command to run qsub for job submission
%
% See also QSUB_RUN_CM, QSUB_CHECK_FINISH
%
% This code has been developed as part of a batch job configuration
% system for MATLAB. See
% http://sourceforge.net/projects/matlabbatch
% for details about the original project.
%_______________________________________________________________________
% Copyright (C) 2007 Freiburg Brain Imaging
% Volkmar Glauche
% $Id: qsub_submit_cm.m 409 2009-06-22 09:50:28Z glauche $
rev = '$Rev: 409 $'; %#ok
if nargin < 4
finishcb = '';
end
%% MATLAB command
if isdeployed
error('qsub:NotImplemented',['Running in compiled mode is not yet ' ...
'implemented.']);
mlcmd = '/path/to/executable %s %s %s';
else
% unix command to run MATLAB. If necessary, include full path and command
% line options. The '-r' option is reserved for the code to run the job.
% If a compiled version of qsub_run_cmd is used, the syntax and
% arguments of the run_*.sh script must be used here.
runpath = fileparts(which('qsub_run_cm'));
mlcmd = sprintf(['%s -nodisplay -r ' ...
'"addpath(''%s'');qsub_run_cm(''%%s'',''%%s'',''%%s'');"'], fullfile(matlabroot,'bin','matlab'), runpath);
end
%% QSUB job submission command
% unix command to run qsub. This command needs to be adapted to the queue
% system used (command line switches, queue name). Examples are given for
% torque and SUN gridengine.
% Placeholders ('%s') are for
% 1. queue name (can be adjusted below)
% 2. job name (input argument)
% 3. working directory (input argument)
% 4. script name (derived from job name)
qname = 'main.q'; % Queue name
% Suitable command syntax for torque
% qsubcmd = 'qsub -q %s -N %s -d %s %s';
% Suitable command syntax for SGE
qsubcmd = 'qsub -q %s -N %s -wd %s -terse %s';
% Create temporary file for job inputs
[sts jobfilename] = unix(sprintf('mktemp %s', fullfile(jobdir,sprintf('%s.XXXXXX',jobname))));
if ~sts
% Save job data to jobfilename
jobfilename = jobfilename(1:end-1); % remove trailing char(10)
outfilename = sprintf('%s.out',jobfilename);
flagfilename = sprintf('%s.flag',jobfilename);
save(jobfilename, '-struct','job');
% Create executable shell script to run the job
scriptname = [jobfilename '.sh'];
fid = fopen(scriptname,'w');
fprintf(fid, '#!/bin/sh\n');
fprintf(fid, mlcmd, jobfilename, outfilename, flagfilename);
fclose(fid);
fileattrib(scriptname, '+x');
% Submit job
[p n e v] = fileparts(jobfilename);
jobname = [n e v]; % jobname: full temporary filename without directory name
[sts,qsubname]=unix(sprintf(qsubcmd, ...
qname, jobname, jobdir, scriptname));
if ~sts
if ~isempty(finishcb)
% Start timer, if job has been submitted and a finishcb is
% supplied.
qsubname = qsubname(1:end-1); % remove trailing char(10)
jobtimer = timer('TasksToExecute',Inf, 'Period',10, ...
'ExecutionMode','fixedRate', ...
'TimerFcn', @(ob,ev)qsub_check_finish(ob,ev, jobfilename, outfilename, flagfilename, qsubname, finishcb), ...
'Name', qsubname);
start(jobtimer);
end
else
if any(exist('cfg_message') == 2:6)
cfg_message('qsub_submit_cm:submitfail', 'Failed to submit job %s:\n%s.', ...
jobname, qsubname);
else
warning('qsub_submit_cm:submitfail', 'Failed to submit job %s:\n%s.', ...
jobname, qsubname);
end
end
end