blob: b83aac6e3ac53049bd93b66bbf6dd3e7e6646d46 [file] [log] [blame]
#!/usr/bin/perl -w
#*******************************************************************************
#* Copyright (c) 2012 ParaTools, Inc.
#* All rights reserved. This program and the accompanying materials
#* are made available under the terms of the Eclipse Public License v1.0
#* which accompanies this distribution, and is available at
#* http://www.eclipse.org/legal/epl-v10.html
#*
#* Contributors:
#* Kevin A. Huck (ParaTools, Inc.)
#*******************************************************************************/
use strict;
use Time::Local;
use File::Basename;
use Storable;
my $patint="([\\+\\-\\d]+)"; # Pattern for Integer number
my $patfp ="([\\+\\-\\d.E]+)"; # Pattern for Floating Point number
my $patwrd="([\^\\s]+)"; # Pattern for Work (all noblank characters)
my $patbl ="\\s+"; # Pattern for blank space (variable length)
my $cores_per_node = 4; # for BG/P, override for BG/Q to 16
#####################################################################
# get user info / check system
#####################################################################
my $UserID = getpwuid($<);
my $Hostname = `hostname -d`;
my $verbose=1;
my ($line,%jobs,%jobnr,$key,$value,$count,%notmappedkeys,%notfoundkeys);
#unless( ($Hostname =~ /jugenes\d/) && ($UserID =~ /llstat/) ) {
# die "da_jobs_info_LML.pl can only be used as llstat on jugenesX!";
#}
#####################################################################
# get command line parameter
#####################################################################
if ($#ARGV != 0) {
die " Usage: $0 <filename> $#ARGV\n";
}
my $filename = $ARGV[0];
my $hashfile = sprintf("%s/%s", dirname($ARGV[0]), 'hash.file');
my $system_sysprio=-1;
my $maxtopdogs=-1;
my %months = (
"Jan" => "01",
"Feb" => "02",
"Mar" => "03",
"Apr" => "04",
"May" => "05",
"Jun" => "06",
"Jul" => "07",
"Aug" => "08",
"Sep" => "09",
"Oct" => "10",
"Nov" => "11",
"Dec" => "12"
);
my %mapping = (
"JobName" => "name",
"User" => "owner",
"Rerunable" => "restart",
"Dependencies" => "dependency",
"Nodes" => "totalcores",
"WallTime" => "wall",
"Location" => "nodelist",
"S" => "job_state",
"job_state" => "state",
"QueuedTime" => "queuedate",
"Queue" => "queue",
"StartTime" => "dispatchdate",
"step" => "step",
"spec" => "spec",
"status" => "status",
"detailedstatus" => "detailedstatus",
"Procs" => "totaltasks",
"Checkpoint" => "",
"Error_Path" => "",
"Hold_Types" => "",
"Join_Path" => "",
"Keep_Files" => "",
"Mail_Points" => "",
"Mail_Users" => "",
"Output_Path" => "",
"Priority" => "",
"Resource_List.cput" => "",
"Resource_List.mem" => "",
"Resource_List.nodect" => "",
"Resource_List.pmem" => "",
"Resource_List.vnodelist" => "vnodelist",
"Shell_Path_List" => "",
"Walltime.Remaining" => "",
"comment" => "",
"ctime" => "",
"depend" => "",
"etime" => "",
"exit_status" => "",
"fault_tolerant" => "",
"interactive" => "",
"mtime" => "",
"resources_used.cput" => "",
"resources_used.mem" => "",
"resources_used.vmem" => "",
"resources_used.walltime" => "",
"server" => "",
"session_id" => "",
"start_count" => "",
"submit_args" => "",
"Resource_List.backfill" => "",
"Resource_List.bandwidth" => "",
"Resource_List.enabled" => "",
"Resource_List.job_type" => "",
"Resource_List.ncpus" => "",
"Resource_List.nightrun" => "",
"Resource_List.node_type" => "",
"Resource_List.operational" => "",
"Resource_List.place" => "",
"Resource_List.select" => "",
"Submit_arguments" => "",
"Variable_List" => "",
"alt_id" => "",
"jobdir" => "",
"pset" => "",
"resources_used.cpupercent" => "",
"resources_used.ncpus" => "",
"stime" => "",
"substate" => "",
# unknown attributes
"group" => "group",
);
my $cmd="/usr/bin/qstat";
$cmd=$ENV{"CMD_JOBINFO"} if($ENV{"CMD_JOBINFO"});
open(IN,"$cmd -f -l |");
my $jobid="-";
my $lastkey="-";
while($line=<IN>) {
chomp($line);
if($line=~/^JobID\:\s+(\S*)$/) {
$jobid=$1;
$jobs{$jobid}{step}=$jobid;
} elsif($line=~/^\s+(\S*)\s+\:\s+(.*)$/) {
($key,$value)=($1,$2);
$key=~s/\s/_/gs;
$lastkey=$key;
$jobs{$jobid}{$key}=$value;
} else {
$line=~s/^\s*//gs;
$jobs{$jobid}{$lastkey}.=$line;
}
}
close(IN);
# add unknown but manatory attributes to jobs
foreach $jobid (sort(keys(%jobs))) {
$jobs{$jobid}{group} = "unknown" if(!exists($jobs{$jobid}{group}));
$jobs{$jobid}{exec_host} = "-" if(!exists($jobs{$jobid}{exec_host}));
$jobs{$jobid}{totaltasks} = $jobs{$jobid}{"Procs"};
$jobs{$jobid}{spec} = $jobs{$jobid}{"Resource_List.nodes"} if(!exists($jobs{$jobid}{spec}));
$jobs{$jobid}{group} = "unknown" if(!exists($jobs{$jobid}{group}));
$jobs{$jobid}{Hold_Types} = "";
$jobs{$jobid}{Hold_Types} = "u" if($jobs{$jobid}{"User_Hold"} eq "True");
$jobs{$jobid}{Hold_Types} = "s" if($jobs{$jobid}{"Admin_Hold"} eq "True");
# check state
$jobs{$jobid}{job_state} = $jobs{$jobid}{"S"};
($jobs{$jobid}{status},$jobs{$jobid}{detailedstatus}) = &get_state($jobs{$jobid}{"S"},
$jobs{$jobid}{Hold_Types});
($jobs{$jobid}{StartTime},$jobs{$jobid}{QueuedTime}) = &fixdates($jobs{$jobid}{StartTime}, $jobs{$jobid}{QueuedTime});
}
open(OUT,"> $filename") || die "cannot open file $filename";
printf(OUT "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
printf(OUT "<lml:lgui xmlns:lml=\"http://www.llview.de\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n");
printf(OUT " xsi:schemaLocation=\"http://www.llview.de lgui.xsd\"\n");
printf(OUT " version=\"0.7\"\>\n");
printf(OUT "<objects>\n");
$count=0;
foreach $jobid (sort(keys(%jobs))) {
$count++;$jobnr{$jobid}=$count;
printf(OUT "<object id=\"j%06d\" name=\"%s\" type=\"job\"/>\n",$count,$jobid);
}
printf(OUT "</objects>\n");
printf(OUT "<information>\n");
foreach $jobid (sort(keys(%jobs))) {
printf(OUT "<info oid=\"j%06d\" type=\"short\">\n",$jobnr{$jobid});
foreach $key (sort(keys(%{$jobs{$jobid}}))) {
if(exists($mapping{$key})) {
if($mapping{$key} ne "") {
$value=&modify($key,$mapping{$key},$jobs{$jobid}{$key});
if($value) {
printf(OUT " <data %-20s value=\"%s\"/>\n","key=\"".$mapping{$key}."\"",$value);
}
} else {
$notmappedkeys{$key}++;
}
} else {
$notfoundkeys{$key}++;
}
}
printf(OUT "</info>\n");
}
printf(OUT "</information>\n");
printf(OUT "</lml:lgui>\n");
close(OUT);
foreach $key (sort(keys(%notfoundkeys))) {
printf("%-40s => \"\",\n","\"".$key."\"",$notfoundkeys{$key});
}
sub get_state {
my($job_state,$Hold_types)=@_;
my($state,$detailed_state);
$state="UNDETERMINED";$detailed_state="";
if($job_state eq "C") {
$state="COMPLETED";$detailed_state="JOB_OUTERR_READY";
}
if($job_state eq "H") {
$state="SUBMITTED";
$detailed_state="USER_ON_HOLD" if($Hold_types eq "u");
$detailed_state="SYSTEM_ON_HOLD" if($Hold_types eq "s");
$detailed_state="USER_SYSTEM_ON_HOLD" if($Hold_types=~"(us|su)");
$detailed_state="SYSTEM_ON_HOLD" if($Hold_types eq "o");
}
if($job_state eq "E") {
$state="COMPLETED";$detailed_state="JOB_OUTERR_READY";
}
if($job_state eq "Q") {
$state="SUBMITTED";$detailed_state="";
}
if($job_state eq "W") {
$state="SUBMITTED";$detailed_state="";
}
if($job_state eq "T") {
$state="SUBMITTED";$detailed_state="";
}
if($job_state eq "R") {
$state="RUNNING";$detailed_state="";
}
return($state,$detailed_state);
}
sub fixdates {
my($dispatchdate,$queuedate)=@_;
my $month = 0;
my $day = 0;
my $hour = 0;
my $minute = 0;
my $second = 0;
my $year = 0;
my $wday = 0;
my $yday = 0;
my $isdst = 0;
my $modified_queuedate=$queuedate;
my $modified_dispatchdate=$dispatchdate;
my $been_dispatched = 0;
# printf ( "Dispatch Date: %s\n", $dispatchdate);
# printf ( "Queue Date: %s\n", $queuedate);
# parse this: Wed Jan 11 01:26:31 2012 +0000 (UTC)
if($dispatchdate=~/^(\S+) (\S+) (\S+) (\S+)\:(\S+)\:(\S+) (\S+) +(\S+) \(UTC\)$/) {
$been_dispatched = 1;
$month = $months{$2};
$day = $3;
$hour = $4;
$minute = $5;
$second = $6;
$year = $7;
my $tmpvar = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $month, $day, $hour, $minute, $second);
$modified_dispatchdate=$tmpvar;
}
if($queuedate=~/^(\S+)\:(\S+)\:(\S+)$/) {
my $seconds = (3600*int($1)) + (60*int($2)) + $3;
my $now = 0;
if ($been_dispatched == 1) {
# "now" is the dispatched date/time
$now = timelocal($second, $minute, $hour, $day, $month, $year);
} else {
# if the dispatch date is N/A, then compute the queue date based on now
($second, $minute, $hour, $day, $month, $year, $wday, $yday, $isdst) = localtime;
$now = timelocal($second, $minute, $hour, $day, $month, $year);
}
# printf ( "%s is %d seconds\n", $queuedate, $seconds);
my $then = $now - $seconds;
# printf ( "then is %d, now is %d\n", $then, $now);
($second, $minute, $hour, $day, $month, $year, $wday, $yday, $isdst) = localtime($then);
my $tmpvar = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year+1900, $month, $day, $hour, $minute, $second);
# printf ( "then was %s\n", $tmpvar);
$modified_queuedate=$tmpvar;
}
return($modified_dispatchdate, $modified_queuedate);
}
sub modify {
my($key,$mkey,$value)=@_;
my $ret=$value;
if(!$ret) {
return(undef);
}
if($mkey eq "owner") {
$ret=~s/\@.*//gs;
}
if($mkey eq "state") {
$ret="Completed" if ($value eq "C");
$ret="Removed" if ($value eq "E");
$ret="System Hold" if ($value eq "H");
$ret="Idle" if ($value eq "Q");
$ret="Idle" if ($value eq "W");
$ret="Idle" if ($value eq "T");
$ret="Running" if ($value eq "R");
$ret="System Hold" if ($value eq "S");
}
if(($mkey eq "wall") || ($mkey eq "wallsoft")) {
if($value=~/\($patint seconds\)/) {
$ret=$1;
}
if($value=~/$patint minutes/) {
$ret=$1*60;
}
if($value=~/^$patint[:]$patint[:]$patint$/) {
$ret=$1*60*60+$2*60+$3;
}
}
if($mkey eq "nodelist") {
if($ret=~/^[\w]+\-(\d)(\d)(\d)(\d)(\d)\-(\d)(\d)(\d)(\d)(\d)\-[\d]+$/) {
$ret = map_torus_to_nodes($ret);
$cores_per_node = 16;
} else {
$ret = map_block_to_nodes($ret);
}
if($ret ne "-") {
$ret=~s/\//,/gs;
my @nodes = split(/\+/,$ret);
#$ret="(".join(')(',@nodes).")";
$ret="".join(')(',@nodes)."";
}
}
if($mkey eq "vnodelist") {
if($ret ne "-") {
my @nodes = split(/\+/,$ret);
my ($c,$nd,$num);
for($c=0;$c<$#nodes;$c++) {
if($nodes[$c]=~/([^\(\)\:]+)\:.*ncpus=(\d+)/s) {
$nd=$1;$num=$2;
$nodes[$c]="$nd,$num";
} elsif($nodes[$c]=~/([^\(\)\:]+)\:.*mpiprocs=(\d+)/s) {
$nd=$1;$num=$2;
$nodes[$c]="$nd,$num";
} elsif($nodes[$c]=~/([^\(\)\:]+)/s) {
$nd=$1;$num=1;
$nodes[$c]="$nd,$num";
} else {
print STDERR "Error in job node list: $nodes[$c]\n";
}
}
$ret="(".join(')(',@nodes).")";
}
}
if($mkey eq "totalcores") {
my $numcores = $ret * $cores_per_node;
$ret=$numcores if($numcores>0);
}
if($mkey eq "totaltasks") {
my $numcores=0;
my ($spec);
foreach $spec (split(/\s*\+\s*/,$ret)) {
# std job
if($ret=~/^$patint[:]ppn=$patint/) {
$numcores+=$1*$2;
} elsif($ret=~/^$patwrd[:]ppn=$patint/) {
$numcores+=1*$2;
}
}
$ret=$numcores if($numcores>0);
}
if(($mkey eq "comment")) {
$ret=~s/\"//gs;
}
# mask & in user input
if($ret=~/\&/) {
$ret=~s/\&/\&amp\;/gs;
}
return($ret);
}
sub map_block_to_nodes {
my($blockname)=@_;
my $start_row = 0;
my $start_column = 0;
my $start_midplane = 0;
my $start_nodecard = 0;
my $start_computecard = 0;
my $max_row = 0;
my $max_column = 0;
my $max_midplane = 2;
my $max_nodecard = 16;
my $max_computecard = 32;
my $blocksize = 0;
# For blocks 512 nodes in size:
# <machine label>-R<row><column>-M<midplane>-512
if(($blockname=~/^[\w]+\-R(\d)(\d)\-M(\d+)\-512$/) ||
($blockname=~/^[\w]+\-R(\d)(\d)\-M(\d+)\-[TJ]\d+\-512$/)) {
$start_row=$1;
$max_row=$1;
$start_column=$2;
$max_column=$2;
$start_midplane=$3;
$max_midplane=$3+1;
$start_nodecard=0;
$blocksize = 512;
}
# For blocks 1,024 nodes in size:
# <machine label>-R<row><column>-1024
elsif(($blockname=~/^[\w]+\-R(\d)(\d)\-1024$/) ||
($blockname=~/^[\w]+\-R(\d)(\d)\-[TJ]\d+\-1024$/)) {
$start_row=$1;
$max_row=$1;
$start_column=$2;
$max_column=$2;
$start_midplane=0;
$start_nodecard=0;
$blocksize = 1024;
}
# For blocks under 512 nodes:
# <machine label>-R<row><column>-M<midplane>-N<first node card in block>-<block size in compute node cards>
elsif(($blockname=~/^[\w]+\-R(\d)(\d)\-M(\d+)\-N(\d+)\-(\d+)$/) ||
($blockname=~/^[\w]+\-R(\d)(\d)\-M(\d+)\-N(\d+)\-[TJ]\d+\-(\d+)$/)) {
$start_row=$1;
$start_column=$2;
$start_midplane=$3;
$start_nodecard=$4;
$blocksize = $5;
}
# For blocks greater than 1,024 nodes:
# <machine label>-R<starting row><starting column>-R<ending row><ending column>-<blocksize>
elsif(($blockname=~/^[\w]+\-R(\d)(\d)\-R(\d)(\d)\-(\d+)$/) ||
($blockname=~/^[\w]+\-R(\d)(\d)\-R(\d)(\d)\-[TJ]\d+\-(\d+)$/)) {
$start_row=$1;
$start_column=$2;
$max_row=$3;
$max_column = $4;
$blocksize = $5;
}
my $row = 0;
my $column = 0;
my $midplane = 0;
my $nodecard = 0;
my $computecard = 0;
my $node_count = 0;
my $node_string = "";
for ($row = $start_row; $row <= $max_row; $row++) {
for ($column = $start_column; $column <= $max_column; $column++) {
for ($midplane = $start_midplane; $midplane < $max_midplane; $midplane++) {
for ($nodecard = $start_nodecard; $nodecard < $max_nodecard; $nodecard++) {
for ($computecard = $start_computecard; $computecard < $max_computecard; $computecard++) {
if (length($node_string) > 0) {
$node_string=sprintf("%s,R%01d%01d-M%01d-N%02d-C%02d", $node_string, $row, $column, $midplane, $nodecard, $computecard+4);
} else {
$node_string=sprintf("R%01d%01d-M%01d-N%02d-C%02d", $row, $column, $midplane, $nodecard, $computecard+4);
}
$node_count = $node_count + 1;
last if ($node_count >= $blocksize);
}
last if ($node_count >= $blocksize);
}
last if ($node_count >= $blocksize);
}
last if ($node_count >= $blocksize);
}
last if ($node_count >= $blocksize);
}
# printf ("%d node block %s maps to %s\n", $blocksize, $blockname, $node_string);
return ($node_string);
}
sub map_torus_to_nodes {
my($torus_name)=@_;
printf("\nMapping %s\n", $torus_name);
my $begin_torus_x = 0;
my $begin_torus_y = 0;
my $begin_torus_z = 0;
my $begin_torus_w = 0;
my $begin_torus_t = 0;
my $end_torus_x = 0;
my $end_torus_y = 0;
my $end_torus_z = 0;
my $end_torus_w = 0;
my $end_torus_t = 0;
# Check for a BG/Q-style blockname
# EAS-xyzwt-xyzwt-size
if($torus_name=~/^[\w]+\-(\d)(\d)(\d)(\d)(\d)\-(\d)(\d)(\d)(\d)(\d)\-[\d]+$/) {
$begin_torus_x=$1;
$begin_torus_y=$2;
$begin_torus_z=$3;
$begin_torus_w=$4;
$begin_torus_t=$5;
$end_torus_x=$6;
$end_torus_y=$7;
$end_torus_z=$8;
$end_torus_w=$9;
$end_torus_t=$10;
}
my $x = 0;
my $y = 0;
my $z = 0;
my $w = 0;
my $t = 0;
my %hash = %{retrieve($hashfile)};
#my $max_torus_x = $hash { "max_x" };
#my $max_torus_y = $hash { "max_y" };
#my $max_torus_z = $hash { "max_z" };
#my $max_torus_w = $hash { "max_w" };
#my $max_torus_t = $hash { "max_t" };
my $node_string = "";
#my $stop_x = ($max_torus_x < $end_torus_x ? $max_torus_x : $end_torus_x);
#for ($x = $begin_torus_x; $x <= $stop_x; $x++) {
#my $stop_y = ($x < $stop_x ? $max_torus_y : $end_torus_y);
#for ($y = $begin_torus_y; $y <= $stop_y; $y++) {
#my $stop_z = ($y < $stop_y ? $max_torus_z : $end_torus_z);
#for ($z = $begin_torus_z; $z <= $stop_z; $z++) {
#my $stop_w = ($z < $stop_z ? $max_torus_w : $end_torus_w);
#for ($w = $begin_torus_w; $w <= $stop_w; $w++) {
#my $stop_t = ($w < $stop_w ? $max_torus_t : $end_torus_t);
#for ($t = $begin_torus_t; $t <= $stop_t ; $t++) {
for ($x = $begin_torus_x; $x <= $end_torus_x; $x++) {
for ($y = $begin_torus_y; $y <= $end_torus_y; $y++) {
for ($z = $begin_torus_z; $z <= $end_torus_z; $z++) {
for ($w = $begin_torus_w; $w <= $end_torus_w; $w++) {
for ($t = $begin_torus_t; $t <= $end_torus_t ; $t++) {
my $coordinate = sprintf ("%d%d%d%d%d", $x, $y, $z, $w, $t);
my $block = $hash{ $coordinate };
# printf("%s %s\n", $coordinate, $block);
if (length($node_string) > 0) {
$node_string=sprintf("%s,%s", $node_string, $block);
} else {
$node_string=sprintf("%s", $block);
}
}
}
}
}
}
return ($node_string);
}