tools/SysStatLogger/sysstatLogger.sh - titan/titan.Libraries.CLL - Git at Google

 #!/bin/bash
 #///////////////////////////////////////////////////////////////////////////////
 #// Copyright (c) 2000-2019 Ericsson Telecom AB                               //
 #//                                                                           //
 #// All rights reserved. This program and the accompanying materials          //
 #// are made available under the terms of the Eclipse Public License v2.0     //
 #// which accompanies this distribution, and is available at                  //
 #// https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.html                                 //
 #///////////////////////////////////////////////////////////////////////////////

 ###############################################################################
 #
 # This script logs various performance related gauges and counters like:
 # - CPU load
 # - overall free and used RAM
 # - network packets and bytes in and out
 # - TitanSim per process CPU & RAM utilization
 #
 # It uses the sysstat utility http://sebastien.godard.pagesperso-orange.fr/download.html
 # Requires minimum sysstat version 10.2.0
 #
 # Please see -h command line option and TitanSim online help for documentation.
 ###############################################################################


 ###############################################################################
 #
 # DETAILED DESCRIPTION
 #
 # This script can measure any selection of
 #   1) system statistics collected by the 'sar' utility with flags '-q -u -r -S'
 #   2) network interface statistics collected by the 'sar' utility with flags '-n DEV'
 #   3) CPU and MEM usage of processes or threads collected by the 'pidstat' utility
 #
 # The available statistics depend on the version of the used systat utility.
 # The actual list can be printed. See the help for more details.
 #
 # The statistics is collected periodically. The length of the period is the granularity interval.
 # The collected statistics show the average values of the given data over the granularity interval.
 # The granularity interval is configurable.
 #
 # The processes to measure can be specified on the following ways:
 #  1) PIDs collected for all processes with a given executable name
 #  2) PIDs collected from titan logfile names. Logfile names should follow a given pattern.
 #  3) PIDs collected from titan HC log file. Certain LOG MASKS should be enabled.
 #
 # It is also possible to monitor all the threads for the monitored PID list.
 # If threads are enabled, CPU/MEM statistics are collected for every thread of the
 # processes monitored.
 #
 # If the PID list is collected from titan logfile names or the HC log, titan process names
 # will be printed in the column names for processes (if available).
 #
 # For the process statistics the following sums are calculated automatically also:
 #  countPs      - current number of processes to monitor (based on the executable name)
 #  countThreads - number of threads for all the processes
 #  sum%MEM      - sum of memory usage percentages of all processes
 #  sum%CPU      - sum of CPU utilizations of all processes
 #  max%CPU      - maximum of single process CPU utilization
 #  sumMLSim%CPU - sum of CPU utilization of all MLSim processes
 #  maxMLSim%CPU - maximum of a single MLSim thread CPU utilization
 #
 # MLSim and countThreads statistics are only available when thread monitoring is
 # enabled.
 # CPU utilization for a process gives the avarage CPU usage of one CPU of the process
 # during the granularity period (its value should be in the 0.0-1.0 interval).
 #
 # For Titansim monitoring there are additional features like:
 #  1) Wait until titansim boots up (ready-to-run LED is green).
 #     Process statistics are collected only after ready-to-run state is reached.
 #     Before that only system statistics is collected.
 #     When ready-to-run state is reached, PID list is collected and the capture of
 #     process statistics starts.
 #     For this feature to work, the CLI telnet port has to be specified.
 #
 #     If the telnet port is set to 0, collection of process statistics starts
 #     immediately. In this case the PID list is generated at the time when
 #     the measurement script is started.
 #     For non-titansim based processes, telnet port has to be set to 0!
 #
 #  2) Capturing CPU/MEM utilization of MLSim threads.
 #     MLSim threads are identified by names prefixed with "ML_".
 #     CPU and MEM usage is captured for all MLSim threads.
 #
 # During the measurment live data is printed to stdout:
 # During the wait-to-ready period only system/network statistics are collected.
 # After ready-to-run state is detected, capture of process statistics is started.
 # A new header line is printed and the live process statistics is printed to stdout.
 # It is possible to redirect the live system statistics to a separate file. In this case
 # only the process statistics is printed to stdout.
 #
 # When the measurment is finished (by stopping the script or all the monitored
 # processes have terminated) the complete measured data can be printed to a CSV file.
 # The file contains the captured system stats before the ready-to-run state, and
 # the system stats together with the process stats after the ready-to-run state.
 # All the data is printed to a single line for each capture time.
 # The name of the output file can be configured.
 #
 # While the script is working it creates temporary files. They are created by the 'mktemp'
 # command and places them to the '/tmp' directory. The files are cleared automatically
 # when the script terminates. All the temporary file names have the prefix 'sysstatlogger_'.
 #
 ###############################################################################


 ###############################################################################
 #
 # EXAMPLES
 #
 # To monitor all titansim processes including MLSim threads.
 # The stdout is redirected to live.txt, the final output is written into stats.csv.
 # The pidlist is detected from the process names, ready to run is waited on default port:
 # <sysstatLoggerPath>/sysstatlogger.sh -t -S -o stats.csv > live.txt
 #
 # Same but statistics is not collected during startup:
 # <sysstatLoggerPath>/sysstatlogger.sh -S -o stats.csv > live.txt
 #
 # Statistics collection is started immediately (do not wait for ready-to-run):
 # <sysstatLoggerPath>/sysstatlogger.sh -T 0 -S -o stats.csv > live.txt
 #
 # Statistics collection is started after startup, telnet port is set to 8100:
 # <sysstatLoggerPath>/sysstatlogger.sh -T 8100 -S -o stats.csv > live.txt
 #
 # Get PID list from logfile names, logfiles are searched in build/titansim/build:
 # <sysstatLoggerPath>/sysstatlogger.sh -f -b build/titansim -o stats.csv > live.txt
 #
 # Get PID list from HC log, HC log is searched in build/titansim/build:
 # <sysstatLoggerPath>/sysstatlogger.sh -H -b build/titansim -o stats.csv > live.txt
 #
 # Get PID list for the executable name titansim:
 # <sysstatLoggerPath>/sysstatlogger.sh -E titansim -o stats.csv > live.txt
 #
 # Get PID list for the executable name 'top' and its threads, do not wait for ready-to-run:
 # <sysstatLoggerPath>/sysstatlogger.sh -S -T 0 -E top -o stats.csv > live.txt
 #
 # Get PID list for the executable name 'top', do not wait for ready-to-run,
 # only "%user" is captured from system stats, nothing from network stats:
 # <sysstatLoggerPath>/sysstatlogger.sh -T 0 -E top -o stats.csv -c "%user" -d "" > live.txt
 #
 ###############################################################################

 ###############################################
 # CONSTANTS
 ###############################################

 IFS=$'\n'
 tmpTemplate="sysstatlogger_$(date +%F_%H:%M:%S)_XXXXX"


 ###############################################
 # FUNCTIONS
 ###############################################

 function chk_sysstat_version() {
   SYSSTAT_VERSION_INFO=$(pidstat -V 2>&1 | awk 'BEGIN{FS="[ .]"}/^sysstat/{print ($3>9)}')
   if [[ $SYSSTAT_VERSION_INFO != 1 ]]
   then
     printf "The %s is not supported. Minimum version: 10.2.0\n" $(pidstat -V 2>&1 | awk '/^sysstat/{print}')
     exit 1
   fi
 }

 ###############################################
 # Graceful exit at SIGINT (remove temp files)
 function ctrl_c() {
   printf "\n\n$0 stopped.\n" >> $nonLogOutput
   #kill sys stat collector:
   if [[ "$SAR_PID" != "" ]]
   then
     kill -INT $SAR_PID
     wait $SAR_PID 2>/dev/null
   fi
   if [[ "$sysstatLogFile" != "/dev/stdout" && "$SAR_SYSLOGGER_PID" != "" ]]
   then
     kill -9 $SAR_SYSLOGGER_PID
     wait $SAR_SYSLOGGER_PID 2>/dev/null
   fi

   if [[ "$outputfile" != "/dev/null" ]]
   then
     printf "Collecting sar data...\n" >> $nonLogOutput
     process_sarData
     merge_outputs >> $outputfile
   fi
   printf "Cleaning-up temporary files..." >> $nonLogOutput
   if [ -z "$TMPDIR" ]; then
     TMPDIR="/tmp"
   fi
   tmpMask=$(echo $tmpTemplate | sed 's/X/?/g')
   rm $TMPDIR/$tmpMask 2> /dev/null
   printf "Done.\n" >> $nonLogOutput
   if [[ "$outputfile" != "/dev/null" ]]
   then
     printf "Output written into the file \"$outputfile\"\n\n" >> $nonLogOutput
   fi
   exit $1
 }


 ###############################################
 # Print help and exit
 function usage {
   me=$(basename $0)
   printf "\nUsage:\n\n"
   printf "  %s  [-c <column names>] [-d <column names for network stats>] \n" $me
   printf "  [-g <granularity>] [-H [-l <hc logfile>] | -f [-b <base directory>] | -n | [-E <executable>]]\n"
   printf "  [-i <interface>] [-e <non-Log output file>] [-s <system stats live output>]\n"
   printf "  [-t] [-T <telnet port>] [-S] [-o <output file>] [-D] [-h]\n\n"
   printf "    -c, --columnsSar:    Columns to measure for system statistics. Default:\n"
   printf "                         \"%%user %%nice %%system %%iowait %%steal %%idle[...] kbmemfree kbmemused %%memused kbswpfree kbswpused %%swpused ldavg-1 ldavg-5 ldavg-15\"\n"
   printf "    -d, --columnsSarNet: Columns to measure for network interface statistics. Default:\n"
   printf "                         \"rxpck/s txpck/s rxkB/s txkB/s\"\n"
   printf "    -g, --granularity:   Granularity period in seconds e.g. 5\n"
   printf "    -b, --basedir:       TitanSim installation directory e.g. /home/ttcn3/titansim\n"
   printf "                         <basedir>/build/ is used to search for log files\n"
   printf "                         Can be used together with -H and -f\n"
   printf "    -n, --noprocnames:   Process names will not be logged just PIDs\n"
   printf "    -H, --hclog:         Get PIDs and process names from HC log file (default)\n"
   printf "                         FileMask := ... TTCN3_PARALLEL | TTCN3_EXECUTOR ... must be enabled.\n"
   printf "    -l, --hclogfile:     Name and path of TitanSim HostController log file\n"
   printf "                         Use together with -H\n"
   printf "    -f, --logfilename:   Get PIDs and process names from the log file names\n"
   printf "                         LogFile := \"titansim.%%h-%%n-%%p.log\" must be set\n"
   printf "    -E, --executable:    Name of the executable, default: titansim\n"
   printf "    -i, --interface:     Interface to monitor, e.g. eth0. Default: lo\n"
   printf "    -e, --nonlogoutput   File to print non-log messages e.g. /dev/stderr\n"
   printf "                         Not to mess up the log with system messages\n"
   printf "    -s, --sysstatlog     Redirect live system statistics to this file.\n"
   printf "    -p, --pidstatlog     Redirect live per-process statistics to this file. Default: /dev/stdout\n"
   printf "    -t, --bootlog        Log minimal data during TitanSim startup\n"
   printf "    -T, --telnetPort     TitanSim telnet port number (CLI), default: 7100\n"
   printf "                         For zero and negative values: wait for start is disabled\n"
   printf "    -S, --showThreads    Show threads, default: false\n"
   printf "    -o, --outputfile     Write merged system statistics and pidstat output to this file\n"
   printf "    -D, --showData       Print available data o measure\n"
   printf "    -h, --help:          Print this help page\n\n"

   nonLogOutput=/dev/null
   ctrl_c 1
   exit 1
 }


 ###############################################
 # Set default values for parameters
 function set_defaults {

   # Name of the executable
   executableName="titansim"

   # Ethernet interface for traffic measurement
   nic="lo"

   # Capture performance data in every $granularity seconds
   granularity=5

   # Use no process names by default
   pidNames="nonames"

   # TitanSim installation directory
   titansimBaseDir=$(echo $(dirname $(readlink -f "$0")) | sed 's/\/src\/Libraries\/EPTF_Core_Library_CNL113512\/tools\/SysStatLogger//')

   # TitanSim HostController log file directory and name (used with -H, --hclog)
   titansimLogFile="$titansimBaseDir/build/titansim.$(hostname)-HC-*.log"

   # TitanSim log file directory and name (used with -f, --logfilename)
   titansimLogFiles="$titansimBaseDir/build/*-*-*.log"

   # File or device to print messages other than performance log
   nonLogOutput="/dev/stderr"

   # Shall some basic data be logged during TitanSim bootup?
   bootlog=false

   # Titansim telnet port
   telnetPort=7100

   # collected data from sar data
   sarHeaders=(timestamp %user %nice %system %iowait %steal %idle[...] kbmemfree kbmemused %memused kbswpfree kbswpused %swpused ldavg-1 ldavg-5 ldavg-15)
   sarHeaders_net=(timestamp rxpck/s txpck/s rxkB/s txkB/s)

   # show threads
   showThreads=false

   # Clock ticks / sec
   HERTZ=$(getconf CLK_TCK)

   # output file name
   outputfile="/dev/null" #"sysstatlogger_$(date +%F_%H:%M:%S).csv"

   # system stat log file
   sysstatLogFile="/dev/stdout"

   #pidstat log file
   pidstatLogFile="/dev/stdout"
 }


 ###############################################
 # Parse command line arguments
 function parse_arguments() {

   while [[ $# > 0 ]]
   do
   key="$1"

   case "$key" in

     -c|--columnsSar)
       if [ -z "$2" ]; then
         sarHeaders=(timestamp)
       else
         ifs=$IFS;unset IFS
         sarHeaders=($(echo "timestamp $2"))
         IFS=$ifs;unset ifs;
       fi
       shift
     ;;

     -d|--columnsSarNet)
       if [ -z "$2" ]; then
         sarHeaders_net=(timestamp)
       else
         ifs=$IFS;unset IFS
         sarHeaders_net=($(echo "timestamp $2"))
         IFS=$ifs;unset ifs;
       fi
       shift
     ;;

     -g|--granularity)
       if [ -z "$2" ]; then
         printf "\nMissing granularity value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       if [[ ! $2 =~ ^[0-9]{1,3}$ ]]; then
         printf "\nInvalid granularity value: %s\n" $2 >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi
       granularity="$2"
       shift
     ;;

     -b|--basedir)
       if [ -z "$2" ]; then
         printf "\nMissing TitanSim base directory value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       if [ ! -d $2 ]; then
         printf "\nTitanSim base directory does not exist: %s\n" $2 >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi
       titansimBaseDir="$2"

       # TitanSim HostController log file directory and name (used with -H, --hclog)
       titansimLogFile="$titansimBaseDir/build/titansim.$(hostname)-HC-*.log"

       # TitanSim log file directory and name (used with -f, --logfilename)
       titansimLogFiles="$titansimBaseDir/build/*-*-*.log"

       shift
     ;;

     -H|--hclog)
       pidNames="hclog"
     ;;

     -n|--noprocnames)
       pidNames="nonames"
     ;;

     -l|--hclogfile)
       if [ -z "$2" ]; then
         printf "\nMissing hc logfile value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       titansimLogFile="$2"
       shift
     ;;

     -f|--logfilename)
       pidNames="logfilename"
     ;;

     -E|--executable)
       if [ -z "$2" ]; then
         printf "\nMissing executable value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       executableName="$2"
       shift
     ;;

     -i|--interface)
       if [ -z "$2" ]; then
         usage >> $nonLogOutput
       fi;
       if [ ! -d /sys/class/net/$2 ]; then
         printf "\nNetwork interface %s does not exist.\n" $2 >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi
       nic="$2"
       shift
     ;;

     -e|--nonlogoutput)
       if [ -z "$2" ]; then
         printf "\nMissing nonlogoutput value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       nonLogOutput="$2"
       shift
     ;;

     -s|--sysstatlog)
       if [ -z "$2" ]; then
         printf "\nMissing sysstatlog value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       sysstatLogFile="$2"
       printf "" > $sysstatLogFile
       shift
     ;;

     -p|--pidstatlog)
       if [ -z "$2" ]; then
         printf "\nMissing pidstatlog value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       pidstatLogFile="$2"
       printf "" > $pidstatLogFile
       shift
     ;;

     -t|--bootlog)
       bootlog=true
     ;;

     -T|--telnetPort)
       if [ -z "$2" ]; then
         printf "\nMissing telnet port value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       telnetPort="$2"
       if [ $telnetPort -gt 0 ]
       then
         printf "\nUsing telnet port for CLI: %s\n" $2 >> $nonLogOutput
       else
         printf "\nNot waiting for startup\n" >> $nonLogOutput
       fi
       shift
     ;;

     -S|--showThreads)
       showThreads=true
     ;;

     -o|--outputfile)
       if [ -z "$2" ]; then
         printf "\nMissing output file value\n" >> $nonLogOutput
         nonLogOutput=/dev/null
         ctrl_c 1
       fi;
       if [ -f $2 ]; then
         printf "\nOutput file exists and it will be overwritten: %s\n" $2 >> $nonLogOutput
       fi
       outputfile="$2"
       #printf "Using output file $2\n" >> $nonLogOutput
       shift
     ;;

     -h|--help)
       usage >> $nonLogOutput
     ;;

     -D|--showData)
       print_available_sar_data
       print_available_pidstat_data
       exit 0
     ;;

     *)
       printf "\nInvalid option found.\n" >> $nonLogOutput
       nonLogOutput=/dev/null
       ctrl_c 1
     ;;

   esac
   shift
   done
 }


 ###############################################
 # Print log file header
 function print_all_header() {
   logstart=$(date +%F\ %H:%M:%S)
   printf "Logging started at: %s\n" $logstart >> $nonLogOutput
   printf "Press ^C to stop logging performance data\n" >> $nonLogOutput
   #print_table_header
 }

 ###############################################
 # Print the header line
 function print_table_header() {
   printf "%s\t" "${sarHeaders[@]}"
   printf "${nic}_%s\t" "${sarHeaders_net[@]}"
   printf "timestamp\tcountPs\tcountThreads\tsum%%MEM\tsum%%CPU\tmax%%CPU\tsumMLSim%%CPU\tmaxMLSim%%CPU"
   printf "\t|"
   printf "\t%s%%CPU" "${procNames[@]}"
   printf "\t|"
   printf "\t%s%%MEM" "${procNames[@]}"
   printf "\n"
 }


 ###############################################
 # Gets TitanSim's status via the Playlist script
 function get_titansim_status() {
   TS_STATUS=`( echo "ds get <datadescription xmlns='http://ttcn.ericsson.se/protocolModules/xtdp/xtdl' element='ReadyToRun' source='ExecCtrl'></datadescription>"; sleep 0.1) | telnet localhost $telnetPort 2>&1 | grep TTCN`
   echo $TS_STATUS | awk '/.+\[led:green\]ReadyToRun/ {print "ready to run"} /.+\[led:blue\]ReadyToRun/ {print "almost ready"} /.+Invalid/{print "starting up"}'
 }


 ###############################################
 # Wait until TitanSim boots up
 function wait_for_titansim_bootup() {
   if [[ $telnetPort -le 0 ]]
   then
     return
   fi
   lastplStatus="unknown"

   if $bootlog ; then
     printf "Logging basic data while is TitanSim booting up...\n" >> $nonLogOutput
     print_table_header | tee -a $outputfile >> $sysstatLogFile
     tmpSarStartupFile=$(mktemp -t $tmpTemplate)
     rm $tmpSarStartupFile
   else
     printf "Waiting until TitanSim boots up... " >> $nonLogOutput
   fi

   while [ "$plOutput" != "ready to run" ]
   do
     plOutput=$(get_titansim_status)

     if [ -z "$plOutput" ]
     then
       plOutput="not started"
     fi

     if [ "$lastStatus" != "$plOutput" ]
     then
       printf "Status: %s\n" $plOutput  >> $nonLogOutput
       lastStatus="$plOutput"
     fi

     if $bootlog && [ "$plOutput" != "ready to run" ] ; then
       print_sysstat_filtered | tee -a $outputfile >> $sysstatLogFile

       rm $tmpSarStartupFile
     else
       sleep 1;
     fi

   done
   if $bootlog ; then
     printf "\nStart full logging for individual processes...\n" >> $nonLogOutput
   else
     printf "\n" >> $nonLogOutput
   fi;
 }

 ###############################################
 # Search HC log file in $titansimBaseDir/build/
 function search_HC_log() {
     printf "Searching HC logfile in %s ...\n" "$titansimBaseDir/build/*.log" >> $nonLogOutput
     titansimLogFile=$titansimBaseDir/build/*.log
 }

 ###############################################
 # Initialize process names and PIDs
 function gather_process_names() {

   # Method#1: Get PIDs and process names from the name of the log files
   if [ "$pidNames" == "logfilename" ]; then
     printf "Getting PID names from log file names\n" >> $nonLogOutput
     logFiles=$(ls -b -1 $titansimLogFiles 2>/dev/null)

     if [[ "$logFiles" == "" ]]
     then
       printf "\nLog files not found with pattern: %s\n" $titansimLogFiles >> $nonLogOutput
       nonLogOutput=/dev/null
       ctrl_c 1
     fi

     for line in $logFiles;
     do
       pid=$(echo $line | sed 's/.*-\(.*\)-\([0-9]*\).log/\2/')
       if [[ ! $pid =~ [0-9]{1,}$ ]]
       then
         printf "Pid not found in filename: %s. File ignored\n" $line >> $nonLogOutput
         continue;
       fi
       ptc=$(echo $line | sed 's/.*-\(.*\)-\([0-9]*\).log/\1/')
       procNames=("${procNames[@]}" "[$pid]$ptc")
       procIds=("${procIds[@]}" "$pid")
     done
   fi

   # Method#2: Get PIDs and process names from the content of HC log file
   if [ "$pidNames" == "hclog" ]; then

     printf "Getting PID names from HC log content: %s\n" "$titansimLogFile" >> $nonLogOutput
     titansimMtcInfo=$(cat $titansimLogFile  2> /dev/null | grep "MTC was created. Process id: ")
     titansimPtcInfo=$(cat $titansimLogFile  2> /dev/null | grep "PTC was created" | grep " process id: ")

     if [[ "$titansimMtcInfo" == "" && "$titansimPtcInfo" == "" ]]; then
       printf "\nMTC/PTC info was not found in HC log file: %s\n" "$titansimLogFile" >> $nonLogOutput
       search_HC_log
       titansimMtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "MTC was created. Process id: ")
       titansimPtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "PTC was created" | grep " process id: ")
     fi

     if [[ "$titansimMtcInfo" != "" ]]
     then
       procName="MTC"
       procId=$(echo $titansimMtcInfo |sed 's/.*Process id: \([0-9]*\).*/\1/')
       procNames=("${procNames[@]}" "[$procId]$procName")
       procIds=("${procIds[@]}" "$procId")
     fi

     for line in $titansimPtcInfo
     do
       compName=$(echo $line | grep "component name:")
       if [ "$compName"  ]
       then
         procName=$(echo $line |sed 's/.*component\ name:\ \([a-zA-Z0-9_\/\.]*\),.*/\1/')
       else
         procName="unknown"
         if [ $(echo $line | grep "HostAdmin") ]
         then
           procName="EPTF_HostAdmin"
         fi;
         if [ $(echo $line | grep "MSRP_Logger_CT") ]
         then
           procName="MSRP_Logger_CT"
         fi;
         if [ $(echo $line | grep "IMS_Setup_CT") ]
         then
           procName="IMS_Setup"
         fi;
       fi

       procId=$(echo $line |sed 's/.*process id: \([0-9]*\).*/\1/')
       procNames=("${procNames[@]}" "[$procId]$procName")
       procIds=("${procIds[@]}" "$procId")

     done
   fi

   if [[ "${procIds[0]}" != "" ]]
   then
     #check process name:
     processNameFromPid=$(ps -p ${procIds[0]} | tail -n +2  | awk '{print $4}')
     if [[ "$processNameFromPid" == "" ]]
     then
       printf "The process %s found in the log file is not running. The log file might be too old.\n" ${procNames[0]} >> $nonLogOutput
       nonLogOutput=/dev/null
       ctrl_c 1
     fi
     printf "Detected executable name from process %s: \"%s\"\n" ${procNames[0]} $processNameFromPid >> $nonLogOutput
     executableName=$processNameFromPid
   else
       printf "No processes found in the log files. Searching processes with name \"%s\"\n" $executableName >> $nonLogOutput
   fi

   processPidlist=$(ps  -C $executableName | tail -n +2 | awk '{if (n==0){n=1; printf "%s", $1}else {printf ",%s",$1}}')
   #threadPidlist=$(ps -T -C $executableName | tail -n +2 | awk '{if (n==0){n=1; printf "%s", $2}else {printf ",%s",$2}}')

   if [[ $processPidlist == "" ]]
   then
       printf "\nNo processes found to monitor.\n" >> $nonLogOutput
       nonLogOutput=/dev/null
       ctrl_c 1
   fi

   # Method#3: Get PIDs from the name of the executable. No process names just PIDs (default)
   if [[ "$pidNames" == "nonames" || "${procIds[0]}" == "" ]]; then
     printf "Getting PID names from executable name: %s\n" $executableName >> $nonLogOutput
     printf "No process names will be logged just PIDs\n" >> $nonLogOutput

     if [[ $showThreads == true ]]
     then
       titansimProcesses=$(ps -T -C $executableName | tail -n +2)
     else
       titansimProcesses=$(ps -C $executableName | tail -n +2)
     fi

     if [[ "$titansimProcesses" == "" ]]
     then
       printf "\nNo process found with name: %s\n" $executableName >> $nonLogOutput
       nonLogOutput=/dev/null
       ctrl_c 1
     fi

     for line in $titansimProcesses
     do
       if [[ $showThreads == true ]]
       then
         procId=$(echo $line | awk '{print $2}')
         procName=$(echo $line | awk '{print $5}')
       else
         procId=$(echo $line | awk '{print $1}')
         procName=$(echo $line | awk '{print $4}')
       fi

       procNames=("${procNames[@]}" "[$procId]$procName")
       procIds=("${procIds[@]}" "$procId")
     done
   fi
 }

 ###############################################
 # Starts the sar command
 function start_limited_capture() {

   tmpSarLogFile=$(mktemp -t $tmpTemplate)
   #start sys stat collector:
   sar -o $tmpSarLogFile -u -q  $granularity  >> /dev/null &
   SAR_PID=$!
 }

 ###############################################
 # Starts the pidstat command
 function start_capture() {

   proclist=""
   for proc in "${procNames[@]}"
   do
     if [[ "$proclist" != "" ]]
     then
       proclist="$proclist,"
     fi
     proclist="$proclist$proc"
   done

   tmpPidstatLogFile=$(mktemp -t $tmpTemplate)

   #start the pidstat collector
   if [[ $showThreads == true ]]
   then
     pidstat -h -t -r -u -v -p $processPidlist,SELF $granularity | process_pidstat $proclist | tee $tmpPidstatLogFile >> $pidstatLogFile
   else
     pidstat -h -r -u -v -p $processPidlist $granularity | process_pidstat $proclist | tee $tmpPidstatLogFile >> $pidstatLogFile
   fi
   ctrl_c
   exit
 }

 ###############################################
 # Select the columns from the sar data and print it in one line
 function process_sarData() {
   tmpsadfLogFile=$(mktemp -t $tmpTemplate)
   #sadf -T -h -d $tmpSarLogFile -- -q -u -r -S | sed 's/;/\t/g'
   sadf -T -h -d $tmpSarLogFile -- -q -u -r -S | awk '
     BEGIN {
       FS=";"
     }
     // {
       #print $3,$5,$6,$7,$8,$9,$10,$28,$29,$30
       #printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8,$9,$10,$11,$12,$13,$21,$22,$23,$28,$29,$30
       printf "'$sarHeaders_columns_formatstr'\n", '$sarHeaders_columns_str'
       fflush()
     }
   ' > $tmpsadfLogFile
   tmpsadfLogFile_nic=$(mktemp -t $tmpTemplate)
   sadf -T -d $tmpSarLogFile -- -n DEV | grep -e "#\|;$nic;" | awk '
     BEGIN {
       FS=";"
     }
     // {
       #printf "%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8
       printf "'$sarHeaders_net_columns_formatstr'\n", '$sarHeaders_net_columns_str'
       fflush()
     }
   ' > $tmpsadfLogFile_nic
 }

 ###############################################
 # Print available stats for processes from pidstat command
 function print_available_pidstat_data() {
   echo "Available stats for processes: "
   pidstatData=($(pidstat -h -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
   echo ${pidstatData[@]}
 }

 ###############################################
 # Print available system stats from sar command
 function print_available_sar_data() {
   echo "Available system stats: "
   tmpsarDataFile=$(mktemp -t $tmpTemplate)
   sar -o $tmpsarDataFile -u -q  1 1  >> /dev/null
   availableSarHeaders=($(sadf -T -h -d $tmpsarDataFile -- -q -u -r -S | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
   availableSarHeaders_net=($(sadf -T -h -d $tmpsarDataFile -- -n DEV | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
   rm $tmpsarDataFile
   echo ${availableSarHeaders[@]}
   echo "Available system stats for network interface $nic: "
   echo ${availableSarHeaders_net[@]}
 }

 ###############################################
 # Select the columns of the system stats to collect from sar data
 function detect_columns_in_sar_data() {
   tmpsarDataFile=$(mktemp -t $tmpTemplate)
   sar -o $tmpsarDataFile -u -q  1 1  >> /dev/null
   availableSarHeaders=($(sadf -T -h -d $tmpsarDataFile -- -q -u -r -S | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
   availableSarHeaders_net=($(sadf -T -h -d $tmpsarDataFile -- -n DEV | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
   rm $tmpsarDataFile
   #echo "availableSarHeaders: ${availableSarHeaders[@]}." >> $nonLogOutput
   #echo "availableSarHeaders_net: ${availableSarHeaders_net[@]}." >> $nonLogOutput

   #select columns to capture:
   for i in $(seq 0 $((${#sarHeaders[@]}-1)))
   do
     # unknown by default:
     sarHeaders_columns[i]="\"-\""
     #echo "searching col #$i for ${sarHeaders[$i]}" >> $nonLogOutput
     for j in $(seq 0 $((${#availableSarHeaders[@]}-1)))
     do
       if [[ "${sarHeaders[$i]}" == "${availableSarHeaders[$j]}" ]]
       then
         sarHeaders_columns[i]="\$$(($j+1))"
         break
       fi
     done
   done
   #echo "foundSarHeaders: ${sarHeaders_columns[@]}" >> $nonLogOutput

   #select columns to capture fro net data:
   for i in $(seq 0 $((${#sarHeaders_net[@]}-1)))
   do
     # unknown by default:
     sarHeaders_net_columns[i]="\"-\""
     #echo "searching col for ${sarHeaders_net[$i]}" >> $nonLogOutput
     for j in $(seq 0 $((${#availableSarHeaders_net[@]}-1)))
     do
       if [[ "${sarHeaders_net[$i]}" == "${availableSarHeaders_net[$j]}" ]]
       then
         sarHeaders_net_columns[i]="\$$(($j+1))"
         break
       fi
     done
   done
   #echo "foundSarHeaders_net: ${sarHeaders_net_columns[@]}" >> $nonLogOutput

   sarHeaders_columns_str=$(IFS=,; printf "%s" "${sarHeaders_columns[*]}")
   sarHeaders_columns_formatstr=$(a=$(IFS=","; echo "${sarHeaders_columns[*]/*/%s}");echo "${a//,/\\t}")
   #echo "sarHeaders_columns_str: $sarHeaders_columns_str" >> $nonLogOutput
   #echo "sarHeaders_columns_formatstr: $sarHeaders_columns_formatstr" >> $nonLogOutput

   sarHeaders_net_columns_str=$(IFS=,; printf "%s" "${sarHeaders_net_columns[*]}")
   sarHeaders_net_columns_formatstr=$(a=$(IFS=","; echo "${sarHeaders_net_columns[*]/*/%s}");echo "${a//,/\\t}")
   #echo "sarHeaders_net_columns_str: $sarHeaders_net_columns_str" >> $nonLogOutput
   #echo "sarHeaders_net_columns_formatstr: $sarHeaders_net_columns_formatstr" >> $nonLogOutput
 }

 ###############################################
 # Select the columns of the process stats to collect from pidstat data
 function detect_columns_in_pidstat_data() {
   pidstatData=($(pidstat -h -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
   for i in $(seq 0 $((${#pidstatData[@]}-1)))
   do
     if [[ ${pidstatData[$i]} == "PID" ]]
     then
       pidID_pidstat=$i
     fi
     if [[ ${pidstatData[$i]} == "%CPU" ]]
     then
       cpuID_pidstat=$i
     fi
     if [[ ${pidstatData[$i]} == "%MEM" ]]
     then
       memID_pidstat=$i
     fi
     if [[ ${pidstatData[$i]} == "Command" ]]
     then
       cmdID_pidstat=$i
     fi
   done
   #echo "PID index: $pidID_pidstat"
   #echo "%CPU index: $cpuID_pidstat"
   #echo "%MEM index: $memID_pidstat"
   #echo "Command index: $cmdID_pidstat"
 }

 ###############################################
 # Same as detect_columns_in_pidstat_data but with threads monitoring
 function detect_columns_in_pidstat_t_data() {
   pidstatData=($(pidstat -h -t -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
   for i in $(seq 0 $((${#pidstatData[@]}-1)))
   do
     if [[ ${pidstatData[$i]} == "TID" ]]
     then
       tidID_pidstat_t=$i
     fi
     if [[ ${pidstatData[$i]} == "TGID" ]]
     then
       tgidID_pidstat_t=$i
     fi
     if [[ ${pidstatData[$i]} == "%CPU" ]]
     then
       cpuID_pidstat_t=$i
     fi
     if [[ ${pidstatData[$i]} == "%MEM" ]]
     then
       memID_pidstat_t=$i
     fi
     if [[ ${pidstatData[$i]} == "Command" ]]
     then
       cmdID_pidstat_t=$i
     fi
   done
   #echo "TID index: $tidID_pidstat_t"
   #echo "TGID index: $tgidID_pidstat_t"
   #echo "%CPU index: $cpuID_pidstat_t"
   #echo "%MEM index: $memID_pidstat_t"
   #echo "Command index: $cmdID_pidstat_t"
 }

 ###############################################
 # Select and print the measured values for process statistics
 function process_pidstat() {
   awk -v proclist=$1 -v executableName=$executableName \
       -v pidID=$pidID_pidstat -v cpuID=$cpuID_pidstat -v memID=$memID_pidstat -v cmdID=$cmdID_pidstat \
       -v tidID=$tidID_pidstat_t -v tgidID=$tgidID_pidstat_t -v cpuID_t=$cpuID_pidstat_t -v memID_t=$memID_pidstat_t -v cmdID_t=$cmdID_pidstat_t \
   '
 function hms(s)
 {
   h=int(s/3600);
   hh=s/3600%24;
   mm=s/60%60;
   ss=s%60
   printf("%s", strftime("%F %H:%M:%S",s))
   #printf("%d:%02d:%02d", hh, mm, ss);
 }

   BEGIN{
     start=0;
     #pids are indexed from 1!
     split("",pids)
     nofPids=split(proclist,procNames,",")
     for (i=1; i<=nofPids; i++) {
       split(procNames[i],tmp,"]")
       pids[i]=substr(tmp[1],2)
     }
     useThreads=0
   }
   /TID/ {
     # when TID is in the header, pidstat is running with -t option: thread mode
     if ($(tidID+1)=="TID") {
       useThreads=1;
     }
   }
   /pidstat/ {
     #have to kill pidstat if countPS==0 (it sometimes does not stop when the processes have terminated)
     if (countPs==0 && start==1) {
       pidstatpid=(useThreads==0) ? $pidID : $tidID
       if (pidstatpid>0) {
         command="kill -INT " pidstatpid
         command | getline result
         close(command)
       }
     }
     next
   }
   /^$/{
     #print the header:
     if (start==0 && timestamp!=0) {
       printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", "timestamp", "countPs", "countThreads", "sum%MEM", "sum%CPU", "max%CPU", "sumMLSim%CPU", "maxMLSim%CPU"
       printf "|\t"
       for (i=1; i<=nofPids;i++) {
         printf("%s%CPU\t", procNames[i])
       }
       printf "|\t"
       for (i=1; i<=nofPids;i++) {
         printf("%s%MEM\t", procNames[i])
       }
       printf "\n"
       fflush()
       start=1
     }
     #print the data:
     if (start==1) {
       command="ps -C "executableName" | tail -n +2 | wc -l"
       command | getline countPs
       close(command)
       command="ps -T -C "executableName" | tail -n +2 | wc -l"
       command | getline countThreads
       close(command)
       printf "%s\t%d\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t", hms(timestamp), countPs, countThreads, summem, sumcpu, maxcpu, sumMLSimcpu, maxMLSimcpu
       printf "|\t"
       for (i=1; i<=nofPids;i++) {
         printf "%s\t", cpus[pids[i]]
       }
       printf "|\t"
       for (i=1; i<=nofPids;i++) {
         printf "%s\t", mems[pids[i]]
       }
       printf "\n"
       fflush()
       n=0
       sumcpu=0
       maxcpu=0
       summem=0
       sumMLSimcpu=0
       maxMLSimcpu=0
       split("",cpus)
       split("",mems)
     }
   }
   /^ /{
    #print $0
    #print "pid:" $3 " cpu:" $7 " mem:" $13 " CMD:" $16
    #print "with threads pid:" $3 " TID:" $4 " cpu:" $8 " mem:" $14 " CMD:" $17

    if (useThreads==1) {
      TID=$tidID

      if (TID==0) {
       next; #ignore the overall counters for the process
      }

      timestamp=$1
      TGID=$tgidID
      cpu=$cpuID_t
      mem=$memID_t
      cmd=$cmdID_t
      pid=(TID==0) ? TGID : TID
    } else {
      timestamp=$1
      pid=$pidID
      cpu=$cpuID
      mem=$memID
      cmd=$cmdID
    }

    if (proclist=="") {
      pids[++n]=pid
      nofPids=n
    }
    cpus[pid]=cpu
    if(maxcpu==0) {
      maxcpu=cpu
    } else {
      maxcpu = (maxcpu > cpu) ? maxcpu : cpu
    }
    sumcpu=sumcpu+cpu
    mems[pid]=mem
    summem=summem+mem

    if (index(cmd, "ML_")!=0) {
      #mlsim process
      sumMLSimcpu=sumMLSimcpu+cpu
      if(maxMLSimcpu==0) {
        maxMLSimcpu=cpu
      } else {
        maxMLSimcpu = (maxMLSimcpu > cpu) ? maxMLSimcpu : cpu
      }
    }
   }'
 }

 ###############################################
 # Append lines together in separate files so that lineN = lineNInFile1 + lineNInFile2 + ...
 function merge_outputs() {
   print_table_header
   awk '
   {
     timestamp="timestamp"
     if ($1 != "timestamp") {
       timestamp=$1$2
     } else {
       n=0
       recordcounter++
     }
     if (recordcounter==1) {
       timedata[n]=$0
     } else {
       if (timedata[n]=="") {
         next
       }
       timedata[n]=sprintf("%s\t%s", timedata[n], $0)
     }
     n++;
   }
   END {
     #i=0: print the header, i=1: no header (header is printed by print_table_header as well)
     for (i=1; timedata[i]!=""; i++) {
       printf "%s\n", timedata[i]
     }
     fflush()
   }' $tmpsadfLogFile $tmpsadfLogFile_nic $tmpPidstatLogFile
 }

 ###############################################
 # Prints system statitics during process monitoring, full log
 function print_sysstat_during_execution_full() {
   sar -q -u -r -S -n DEV $granularity | awk -v date=$(date +%F) '
   BEGIN {
     headerIsPrinted=0
     headerTime=""
     dataTime=""
     headerRow=""
     dataRow=""
     newdata=0
   }
   /^[^0-9]+.+/ {
     next
   }
   /^$/ {
     newdata=1
     next
   }
   /.+/ {
     #printf "current line: %s\n", $0
     #printf "header time : %s\n", headerTime
     #printf "data   time : %s\n", dataTime
     if (newdata==1 && dataTime == $1) {
       #printf "End of data series for %s\n", dataTime
       #end of data series for this period
       if (!headerIsPrinted) {
         printf "%s\n", headerRow
         headerIsPrinted=1
       }
       printf "%s\n", dataRow
       dataRow=""
       dataTime=""
       headerTime=$1
       fflush();
     }
     if (newdata==1) {
       newdata=0
     }
     if (headerTime=="") {
       headerTime=$1
     }
     if ($1 == headerTime) {
       #print "Header is detected"
       if (!headerIsPrinted) {
         gsub(/[ ]+/,"\t")
         gsub(headerTime,"timestamp")
         currentHeader=$0
       }
       next # exit for header line
     }
     # data row
     #print "Data is detected"
     #update header:
     if (!headerIsPrinted) {
       if (headerRow=="") {
         headerRow=currentHeader
       } else {
         headerRow=sprintf("%s\t%s", headerRow, currentHeader)
       }
     }
     if (dataTime == "") {
       dataTime=$1
     }
     gsub(/[ ]+/,"\t")
     if (dataRow=="") {
       dataRow=sprintf("%s %s", date, $0)
     } else {
       dataRow=sprintf("%s\t%s %s", dataRow, date, $0)
     }
   }
   ' >> $sysstatLogFile &
   SAR_SYSLOGGER_PID=$(jobs -p)
 }


 ###############################################
 # Prints system statitics, filtered log
 function print_sysstat_filtered() {
     sar -o $tmpSarStartupFile -u -q  $granularity 1 >> /dev/null

     TMP2=""
     while [ "$TMP2" == "" ]
     do
       TMP2=`sadf -T -h -d $tmpSarStartupFile -- -q -u -r -S | tail -n +2`
     done
     echo "$TMP2" | awk '
       BEGIN {
         FS=";"
       }
       // {
         #print $3,$5,$6,$7,$8,$9,$10,$28,$29,$30
         #printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", $3,$5,$6,$7,$8,$9,$10,$11,$12,$13,$21,$22,$23,$28,$29,$30
         printf "'$sarHeaders_columns_formatstr'\t", '$sarHeaders_columns_str'
         fflush()
       }
     '

     # append stats for the $nic interface:
     sadf -T -d $tmpSarStartupFile -- -n DEV | grep -e "#\|;$nic;"| tail -n +2 | awk -v executableName=$executableName '
       BEGIN {
         FS=";"
       }
       // {
         command="ps -C "executableName" | tail -n +2 | wc -l"
         command | getline countPs
         close(command)
         command="ps -T -C "executableName" | tail -n +2 | wc -l"
         command | getline countThreads
         close(command)
         #printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8,$3,countPs,countThreads
         printf "'$sarHeaders_net_columns_formatstr'\t%s\t%s\t%s\n", '$sarHeaders_net_columns_str',$3,countPs,countThreads
         fflush()
       }
     '
 }

 ###############################################
 # Prints system statitics during process monitoring, filtered log
 function print_sysstat_during_execution_filtered() {
   tmpSarStartupFile=$(mktemp -t $tmpTemplate)

   if  ! $bootlog  || [ $telnetPort -le 0 ]
   then
     print_table_header >> $sysstatLogFile
   fi
   while [[ 1 == 1 ]]
   do
     print_sysstat_filtered >> $sysstatLogFile
     rm $tmpSarStartupFile
   done &
   SAR_SYSLOGGER_PID=$!
 }

 ###############################################
 # MAIN PART
 ###############################################

 chk_sysstat_version

 set_defaults

 parse_arguments "$@"

 detect_columns_in_sar_data
 detect_columns_in_pidstat_data
 detect_columns_in_pidstat_t_data

 if [ ! -d /sys/class/net/$nic ]; then
   printf "\nNetwork interface %s does not exist.\n" $nic >> $nonLogOutput
   nonLogOutput=/dev/null
   ctrl_c 1
 fi

 trap ctrl_c INT TERM

 printf "" > $outputfile

 #start_limited_capture

 wait_for_titansim_bootup

 if [[ "$sysstatLogFile" != "/dev/stdout" ]]
 then
 print_sysstat_during_execution_filtered
 #print_sysstat_during_execution_full
 fi

 gather_process_names

 print_all_header >> $pidstatLogFile


 start_limited_capture
 start_capture