blob: 1fcf9a58a71f165f2a04d70bc8a47da1357149dd [file] [log] [blame]
<?xml version="1.0" encoding="UTF-8"?>
<!-- ******************************************************************************* -->
<!-- * Copyright (c) 2011 University of Illinois All rights reserved. This program -->
<!-- * and the accompanying materials are made available under the terms of the -->
<!-- * Eclipse Public License v1.0 which accompanies this distribution, and is -->
<!-- * available at http://www.eclipse.org/legal/epl-v10.html -->
<!-- * -->
<!-- * Contributors: -->
<!-- * Albert L. Rossi - design and implementation -->
<!-- * Galen Arnold - adapt to openmpi -->
<!-- * Greg Watson - adapt to interactive/debug -->
<!-- ****************************************************************************** -->
<resource-manager-builder
xmlns="http://eclipse.org/ptp/rm"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://eclipse.org/ptp/rm http://eclipse.org/ptp/schemas/v1.1/rm.xsd"
name="edu.sdsc.trestles.torque.interactive.openmpi">
<control-data>
<!-- Set the default value of this attribute if Torque is installed in a non-standard location -->
<attribute name="bindir" visible="false">
<default>/opt/torque/bin/</default>
</attribute>
<!-- This is a necessary attribute for the functioning of LML; link it to the batch-specific variable name -->
<attribute name="control.queue.name" visible="false">
<link-value-to>destination</link-value-to>
</attribute>
<attribute name="queues" visible="false"/>
<attribute name="mpiPrefix" visible="false">
<default>${ptp_rm:mpiCommand#value} -np ${ptp_rm:mpiNumberOfProcesses#value}</default>
</attribute>
<attribute name="Account_Name" type="string">
<description>Account to which to charge this job.</description>
<tooltip>Format: string</tooltip>
</attribute>
<attribute name="mpiCommand" type="choice">
<description>Which MPI launch command to use.</description>
<choice>,mpiexec,mpirun</choice>
</attribute>
<attribute name="mpiNumberOfProcesses" min="1" type="integer">
<description>The number of processes to launch (the '-np' value.)</description>
</attribute>
<attribute name="Account_Name" type="string">
<description>Account to which to charge this job.</description>
<tooltip>Format: string</tooltip>
</attribute>
<attribute name="destination" type="string">
<description>Designation of the queue to which to submit the job.</description>
<tooltip>Format: queue[@server].</tooltip>
</attribute>
<attribute name="modules" type="string">
<description>Modules that will be loaded inside the job script.</description>
</attribute>
<attribute name="Resource_List.mem" type="string">
<description>Maximum amount of memory used by all concurrent processes in the job.</description>
<tooltip><![CDATA[Format: integer[suffix]; the suffix is a multiplier
in bytes or words (word size of the host): b (bytes),
w (words), kb/kw (kilo), mb/mw (mega), gb/bw (giga).]]></tooltip>
<default>20gb</default>
</attribute>
<attribute name="Resource_List.nodes" type="string">
<description>Number and/or type of nodes to be reserved for exclusive use by the job.
[usage hint] number_nodes:ppn=N</description>
<tooltip><![CDATA[The value is one or more node_specs joined with
the '+' character, "node_spec[+node_spec...]. Each node_spec is
a number of nodes required of the type declared in the node_spec
and a name or one or more properity or properities desired for
the nodes. The number, the name, and each properity in the
node_spec are separated by a colon ':'. If no number is specified,
one (1) is assumed. Units: string. The name of a node is its
hostname. The properties of nodes are: ppn=# requested; defaults
to 1; or an arbitrary string defined by system administrator.
Example: To ask for 2 processors on each of two blue nodes and
three processors on one red node: -l nodes=2:blue:ppn=2+red:ppn=3.]]></tooltip>
<default>1:ppn=1</default>
</attribute>
<attribute name="Resource_List.walltime" type="string">
<description>Maximum amount of real time during which the job can be in the running state.</description>
<tooltip>Format: [[hours:]minutes:]seconds[.milliseconds].</tooltip>
<default>00:30:00</default>
<validator>
<regex expression="\d\d:\d\d:\d\d"/>
<error-message>format must be hh:mm:ss</error-message>
</validator>
</attribute>
<start-up-command name="get-queues">
<arg>${ptp_rm:bindir#value}qstat</arg>
<arg>-Q</arg>
<arg>-f</arg>
<stdout-parser delim="\n">
<target ref="queues">
<match>
<expression>Queue: ([\w\d]+)</expression>
<add field="value">
<entry valueGroup="1"/>
</add>
</match>
</target>
</stdout-parser>
</start-up-command>
<submit-interactive name="submit-interactive" directory="${ptp_rm:directory#value}" waitForId="true" keepOpen="true"
flags="ALLOCATE_PTY">
<arg>${ptp_rm:bindir#value}qsub</arg>
<arg>-I</arg>
<arg>-q ${ptp_rm:destination#value}</arg>
<arg>-l nodes=${ptp_rm:Resource_List.nodes#value},mem=${ptp_rm:Resource_List.mem#value},walltime=${ptp_rm:Resource_List.walltime#value}</arg>
<arg isUndefinedIfMatches="-A">-A ${ptp_rm:Account_Name#value}</arg>
<arg>-V</arg>
<input isUndefinedIfMatches=";">${ptp_rm:modules#value};</input>
<input isUndefinedIfMatches=".+ -np">${ptp_rm:mpiPrefix#value}</input>
<input>${ptp_rm:executablePath#value} ${ptp_rm:progArgs#value}</input>
<stdout-parser delim="\n">
<target ref="@jobId">
<match>
<expression>qsub:.+job[\s]+([\d]+[.].+)[\s]to start[\s]*</expression>
<set field="name">
<entry valueGroup="1"/>
</set>
<set field="value">
<entry value="SUBMITTED"/>
</set>
</match>
<match>
<expression>qsub:[\s]+job[\s]+([\d]+[.].+)[\s]ready[\s]*</expression>
<set field="name">
<entry valueGroup="1"/>
</set>
<set field="value">
<entry value="RUNNING"/>
</set>
</match>
<match>
<expression>.*Job not submitted[\s]*</expression>
<set field="value">
<entry value="FAILED"/>
</set>
</match>
</target>
<exit-after>qsub:[\s]+job[\s]+([\d]+[.].+)[\s]ready[\s]*</exit-after>
</stdout-parser>
<stderr-parser delim="\n">
<target ref="@jobId">
<match>
<expression>.+</expression>
<set field="name">
<entry value="${ptp_rm:@jobId#name}"/>
</set>
<set field="value">
<entry value="FAILED"/>
</set>
</match>
</target>
</stderr-parser>
</submit-interactive>
<submit-interactive-debug name="submit-interactive-debug" directory="${ptp_rm:executableDirectory#value}" waitForId="true" keepOpen="true"
flags="ALLOCATE_PTY">
<arg>${ptp_rm:bindir#value}qsub</arg>
<arg>-I</arg>
<arg>-q ${ptp_rm:destination#value}</arg>
<arg>-l nodes=${ptp_rm:Resource_List.nodes#value},mem=${ptp_rm:Resource_List.mem#value},walltime=${ptp_rm:Resource_List.walltime#value}</arg>
<arg isUndefinedIfMatches="-A">-A ${ptp_rm:Account_Name#value}</arg>
<arg>-V</arg>
<input isUndefinedIfMatches=";">${ptp_rm:modules#value};</input>
<input>cd ${ptp_rm:executableDirectory#value} &amp;&amp; perl ${ptp_rm:ptpDirectory#value}/rms/OPENMPI/start_job.pl</input>
<input>mpirun</input>
<input isUndefinedIfMatches="-np">-np ${ptp_rm:mpiNumberOfProcesses#value}</input>
<input>-hostfile $PBS_NODEFILE</input>
<environment name="PTP_JOBID" value="@jobId"/>
<environment name="PTP_LAUNCH_MODE" value="${ptp_rm:launchMode#value}" />
<environment name="PTP_DEBUG_EXEC_PATH" value="${ptp_rm:debuggerExecutablePath#value}"/>
<environment name="PTP_DEBUG_EXEC_ARGS" value="${ptp_rm:debuggerArgs#value}"/>
<environment name="OMPI_MCA_btl_openib_flags" value="1"/>
<environment name="OMPI_MCA_btl_mpi_leave_pinned" value="0"/>
<environment name="OMPI_MCA_btl_openib_warn_default_gid_prefix" value="0"/>
<stdout-parser delim="\n">
<target ref="@jobId">
<match>
<expression>qsub:.+job[\s]+([\d]+[.].+)[\s]to start[\s]*</expression>
<set field="name">
<entry valueGroup="1"/>
</set>
<set field="value">
<entry value="SUBMITTED"/>
</set>
</match>
<match>
<expression>qsub:[\s]+job[\s]+([\d]+[.].+)[\s]ready[\s]*</expression>
<set field="name">
<entry valueGroup="1"/>
</set>
<set field="value">
<entry value="RUNNING"/>
</set>
</match>
<match>
<expression>.*Job not submitted[\s]*</expression>
<set field="value">
<entry value="FAILED"/>
</set>
</match>
</target>
<exit-after>qsub:[\s]+job[\s]+([\d]+[.].+)[\s]ready[\s]*</exit-after>
</stdout-parser>
<stderr-parser delim="\n">
<target ref="@jobId">
<match>
<expression>.+</expression>
<set field="name">
<entry value="${ptp_rm:@jobId#name}"/>
</set>
<set field="value">
<entry value="FAILED"/>
</set>
</match>
</target>
</stderr-parser>
<!-- The SDM master must be started here rather than in the start_job.pl script. This is because the Torque interactive will run
the script on one of the compute nodes, however the SDM master must be run on the login node so that it can connect to the
remote end of the ssh tunnel. -->
<pre-launch-cmd name="SDM Master" exec="${ptp_rm:debuggerExecutablePath#value} --master --routing_file=routes_@jobId ${ptp_rm:debuggerArgs#value}" wait="false"/>
</submit-interactive-debug>
<get-job-status name="get-job-status" ignoreExitStatus="true">
<arg>${ptp_rm:bindir#value}qstat</arg>
<arg>${ptp_rm:@jobId#name}</arg>
<stdout-parser delim="\n">
<target ref="@jobId">
<match>
<expression>[\d]+[.].+[\s]+.+[\s]+.+[\s]+.+[\s]+([A-Z])[\s]+.+</expression>
<set field="value">
<entry valueGroup="1"/>
</set>
</match>
<test op="EQ">
<value>#value</value>
<value>Q</value>
<set field="value">
<entry value="QUEUED_ACTIVE"/>
</set>
</test>
<test op="EQ">
<value>#value</value>
<value>R</value>
<set field="value">
<entry value="RUNNING"/>
</set>
</test>
<test op="EQ">
<value>#value</value>
<value>S</value>
<set field="value">
<entry value="SUSPENDED"/>
</set>
</test>
<test op="EQ">
<value>#value</value>
<value>C</value>
<set field="value">
<entry value="COMPLETED"/>
</set>
</test>
<test op="EQ">
<value>#value</value>
<value>E</value>
<set field="value">
<entry value="COMPLETED"/>
</set>
</test>
<test op="EQ">
<value>#value</value>
<value>H</value>
<set field="value">
<entry value="SYSTEM_ON_HOLD"/>
</set>
</test>
</target>
</stdout-parser>
<stderr-parser delim="\n">
<target ref="@jobId">
<match>
<expression>.+</expression>
<set field="value">
<entry value="COMPLETED"/>
</set>
</match>
</target>
</stderr-parser>
</get-job-status>
<terminate-job name="cancel">
<arg>${ptp_rm:bindir#value}qdel</arg>
<arg>${ptp_rm:@jobId#name}</arg>
</terminate-job>
<launch-tab>
<dynamic>
<title>Basic Settings</title>
<layout>
<grid-layout/>
</layout>
<!-- QUEUES remote path group -->
<composite group="true">
<layout>
<grid-layout numColumns="3" makeColumnsEqualWidth="false"/>
</layout>
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" />
</layout-data>
<tooltip>${ptp_rm:destination#tooltip}</tooltip>
<fixed-text>Queue: </fixed-text>
</widget>
<widget type="combo" style="SWT.BORDER" readOnly="true" attribute="destination">
<layout-data>
<grid-data widthHint="150" horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2"/>
</layout-data>
<items-from>queues</items-from>
</widget>
</composite>
<widget type="label"/>
<!-- ATTRIBUTES group -->
<composite group="true">
<layout>
<grid-layout numColumns="4" makeColumnsEqualWidth="false" horizontalSpacing="10" verticalSpacing="15"/>
</layout>
<!-- HEADER -->
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_RED">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>Name</fixed-text>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_RED">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>Value</fixed-text>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_RED">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>Description</fixed-text>
</widget>
<!-- row 1 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<tooltip>${ptp_rm:Account_Name#tooltip}</tooltip>
<fixed-text>Account: </fixed-text>
</widget>
<widget type="text" style="SWT.BORDER" attribute="Account_Name">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false" widthHint="150"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:Account_Name#description}</fixed-text>
</widget>
<!-- row 2 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" grabExcessHorizontal="false"/>
</layout-data>
<tooltip>${ptp_rm:Resource_List.nodes#tooltip}</tooltip>
<fixed-text>Number of nodes: </fixed-text>
</widget>
<widget type="text" style="SWT.BORDER" attribute="Resource_List.nodes">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" horizontalSpan="2" grabExcessHorizontal="false" widthHint="150"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:Resource_List.nodes#description}</fixed-text>
</widget>
<!-- row 3 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<tooltip>${ptp_rm:Resource_List.mem#tooltip}</tooltip>
<fixed-text>Total Memory Needed: </fixed-text>
</widget>
<widget type="text" style="SWT.BORDER" attribute="Resource_List.mem">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false" widthHint="150"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:Resource_List.mem#description}</fixed-text>
</widget>
<!-- row 4 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<tooltip>${ptp_rm:Resource_List.walltime#tooltip}</tooltip>
<fixed-text>Wallclock Time: </fixed-text>
</widget>
<widget type="text" style="SWT.BORDER" attribute="Resource_List.walltime">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false" widthHint="150"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:Resource_List.walltime#description}</fixed-text>
</widget>
<!-- row 5 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>MPI Command: </fixed-text>
</widget>
<widget type="combo" style="SWT.BORDER" readOnly="true" attribute="mpiCommand">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:mpiCommand#description}</fixed-text>
</widget>
<!-- row 6 -->
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>MPI Number of Processes: </fixed-text>
</widget>
<widget type="spinner" style="SWT.BORDER" attribute="mpiNumberOfProcesses">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false" widthHint="100"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:mpiNumberOfProcesses#description}</fixed-text>
</widget>
<widget type="label" style="SWT.LEFT">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<tooltip>${ptp_rm:modules#tooltip}</tooltip>
<fixed-text>Modules to Load: </fixed-text>
</widget>
<widget type="custom" typeId="envconfig" style="SWT.NONE" attribute="modules">
<layout-data>
<grid-data horizontalAlign="SWT.FILL" verticalAlign="SWT.CENTER" horizontalSpan="2" grabExcessHorizontal="false"/>
</layout-data>
</widget>
<widget type="label" style="SWT.LEFT" foreground="SWT.COLOR_DARK_BLUE">
<layout-data>
<grid-data horizontalAlign="SWT.BEGINNING" verticalAlign="SWT.CENTER" grabExcessHorizontal="false"/>
</layout-data>
<fixed-text>${ptp_rm:modules#description}</fixed-text>
</widget>
</composite>
</dynamic>
</launch-tab>
</control-data>
<monitor-data schedulerType="TORQUE">
</monitor-data>
</resource-manager-builder>