blob: e57d29bb4c80fb4d6796bc1134d6059317a19254 [file] [log] [blame]
#!/bin/bash
#///////////////////////////////////////////////////////////////////////////////
#// Copyright (c) 2000-2019 Ericsson Telecom AB //
#// //
#// All rights reserved. This program and the accompanying materials //
#// are made available under the terms of the Eclipse Public License v2.0 //
#// which accompanies this distribution, and is available at //
#// https://www.eclipse.org/org/documents/epl-2.0/EPL-2.0.html //
#///////////////////////////////////////////////////////////////////////////////
###############################################################################
#
# This script logs various performance related gauges and counters like:
# - CPU load
# - overall free and used RAM
# - network packets and bytes in and out
# - TitanSim per process CPU & RAM utilization
#
# It uses the sysstat utility http://sebastien.godard.pagesperso-orange.fr/download.html
# Requires minimum sysstat version 10.2.0
#
# Please see -h command line option and TitanSim online help for documentation.
###############################################################################
###############################################################################
#
# DETAILED DESCRIPTION
#
# This script can measure any selection of
# 1) system statistics collected by the 'sar' utility with flags '-q -u -r -S'
# 2) network interface statistics collected by the 'sar' utility with flags '-n DEV'
# 3) CPU and MEM usage of processes or threads collected by the 'pidstat' utility
#
# The available statistics depend on the version of the used systat utility.
# The actual list can be printed. See the help for more details.
#
# The statistics is collected periodically. The length of the period is the granularity interval.
# The collected statistics show the average values of the given data over the granularity interval.
# The granularity interval is configurable.
#
# The processes to measure can be specified on the following ways:
# 1) PIDs collected for all processes with a given executable name
# 2) PIDs collected from titan logfile names. Logfile names should follow a given pattern.
# 3) PIDs collected from titan HC log file. Certain LOG MASKS should be enabled.
#
# It is also possible to monitor all the threads for the monitored PID list.
# If threads are enabled, CPU/MEM statistics are collected for every thread of the
# processes monitored.
#
# If the PID list is collected from titan logfile names or the HC log, titan process names
# will be printed in the column names for processes (if available).
#
# For the process statistics the following sums are calculated automatically also:
# countPs - current number of processes to monitor (based on the executable name)
# countThreads - number of threads for all the processes
# sum%MEM - sum of memory usage percentages of all processes
# sum%CPU - sum of CPU utilizations of all processes
# max%CPU - maximum of single process CPU utilization
# sumMLSim%CPU - sum of CPU utilization of all MLSim processes
# maxMLSim%CPU - maximum of a single MLSim thread CPU utilization
#
# MLSim and countThreads statistics are only available when thread monitoring is
# enabled.
# CPU utilization for a process gives the avarage CPU usage of one CPU of the process
# during the granularity period (its value should be in the 0.0-1.0 interval).
#
# For Titansim monitoring there are additional features like:
# 1) Wait until titansim boots up (ready-to-run LED is green).
# Process statistics are collected only after ready-to-run state is reached.
# Before that only system statistics is collected.
# When ready-to-run state is reached, PID list is collected and the capture of
# process statistics starts.
# For this feature to work, the CLI telnet port has to be specified.
#
# If the telnet port is set to 0, collection of process statistics starts
# immediately. In this case the PID list is generated at the time when
# the measurement script is started.
# For non-titansim based processes, telnet port has to be set to 0!
#
# 2) Capturing CPU/MEM utilization of MLSim threads.
# MLSim threads are identified by names prefixed with "ML_".
# CPU and MEM usage is captured for all MLSim threads.
#
# During the measurment live data is printed to stdout:
# During the wait-to-ready period only system/network statistics are collected.
# After ready-to-run state is detected, capture of process statistics is started.
# A new header line is printed and the live process statistics is printed to stdout.
# It is possible to redirect the live system statistics to a separate file. In this case
# only the process statistics is printed to stdout.
#
# When the measurment is finished (by stopping the script or all the monitored
# processes have terminated) the complete measured data can be printed to a CSV file.
# The file contains the captured system stats before the ready-to-run state, and
# the system stats together with the process stats after the ready-to-run state.
# All the data is printed to a single line for each capture time.
# The name of the output file can be configured.
#
# While the script is working it creates temporary files. They are created by the 'mktemp'
# command and places them to the '/tmp' directory. The files are cleared automatically
# when the script terminates. All the temporary file names have the prefix 'sysstatlogger_'.
#
###############################################################################
###############################################################################
#
# EXAMPLES
#
# To monitor all titansim processes including MLSim threads.
# The stdout is redirected to live.txt, the final output is written into stats.csv.
# The pidlist is detected from the process names, ready to run is waited on default port:
# <sysstatLoggerPath>/sysstatlogger.sh -t -S -o stats.csv > live.txt
#
# Same but statistics is not collected during startup:
# <sysstatLoggerPath>/sysstatlogger.sh -S -o stats.csv > live.txt
#
# Statistics collection is started immediately (do not wait for ready-to-run):
# <sysstatLoggerPath>/sysstatlogger.sh -T 0 -S -o stats.csv > live.txt
#
# Statistics collection is started after startup, telnet port is set to 8100:
# <sysstatLoggerPath>/sysstatlogger.sh -T 8100 -S -o stats.csv > live.txt
#
# Get PID list from logfile names, logfiles are searched in build/titansim/build:
# <sysstatLoggerPath>/sysstatlogger.sh -f -b build/titansim -o stats.csv > live.txt
#
# Get PID list from HC log, HC log is searched in build/titansim/build:
# <sysstatLoggerPath>/sysstatlogger.sh -H -b build/titansim -o stats.csv > live.txt
#
# Get PID list for the executable name titansim:
# <sysstatLoggerPath>/sysstatlogger.sh -E titansim -o stats.csv > live.txt
#
# Get PID list for the executable name 'top' and its threads, do not wait for ready-to-run:
# <sysstatLoggerPath>/sysstatlogger.sh -S -T 0 -E top -o stats.csv > live.txt
#
# Get PID list for the executable name 'top', do not wait for ready-to-run,
# only "%user" is captured from system stats, nothing from network stats:
# <sysstatLoggerPath>/sysstatlogger.sh -T 0 -E top -o stats.csv -c "%user" -d "" > live.txt
#
###############################################################################
###############################################
# CONSTANTS
###############################################
IFS=$'\n'
tmpTemplate="sysstatlogger_$(date +%F_%H:%M:%S)_XXXXX"
###############################################
# FUNCTIONS
###############################################
function chk_sysstat_version() {
SYSSTAT_VERSION_INFO=$(pidstat -V 2>&1 | awk 'BEGIN{FS="[ .]"}/^sysstat/{print ($3>9)}')
if [[ $SYSSTAT_VERSION_INFO != 1 ]]
then
printf "The %s is not supported. Minimum version: 10.2.0\n" $(pidstat -V 2>&1 | awk '/^sysstat/{print}')
exit 1
fi
}
###############################################
# Graceful exit at SIGINT (remove temp files)
function ctrl_c() {
printf "\n\n$0 stopped.\n" >> $nonLogOutput
#kill sys stat collector:
if [[ "$SAR_PID" != "" ]]
then
kill -INT $SAR_PID
wait $SAR_PID 2>/dev/null
fi
if [[ "$sysstatLogFile" != "/dev/stdout" && "$SAR_SYSLOGGER_PID" != "" ]]
then
kill -9 $SAR_SYSLOGGER_PID
wait $SAR_SYSLOGGER_PID 2>/dev/null
fi
if [[ "$outputfile" != "/dev/null" ]]
then
printf "Collecting sar data...\n" >> $nonLogOutput
process_sarData
merge_outputs >> $outputfile
fi
printf "Cleaning-up temporary files..." >> $nonLogOutput
if [ -z "$TMPDIR" ]; then
TMPDIR="/tmp"
fi
tmpMask=$(echo $tmpTemplate | sed 's/X/?/g')
rm $TMPDIR/$tmpMask 2> /dev/null
printf "Done.\n" >> $nonLogOutput
if [[ "$outputfile" != "/dev/null" ]]
then
printf "Output written into the file \"$outputfile\"\n\n" >> $nonLogOutput
fi
exit $1
}
###############################################
# Print help and exit
function usage {
me=$(basename $0)
printf "\nUsage:\n\n"
printf " %s [-c <column names>] [-d <column names for network stats>] \n" $me
printf " [-g <granularity>] [-H [-l <hc logfile>] | -f [-b <base directory>] | -n | [-E <executable>]]\n"
printf " [-i <interface>] [-e <non-Log output file>] [-s <system stats live output>]\n"
printf " [-t] [-T <telnet port>] [-S] [-o <output file>] [-D] [-h]\n\n"
printf " -c, --columnsSar: Columns to measure for system statistics. Default:\n"
printf " \"%%user %%nice %%system %%iowait %%steal %%idle[...] kbmemfree kbmemused %%memused kbswpfree kbswpused %%swpused ldavg-1 ldavg-5 ldavg-15\"\n"
printf " -d, --columnsSarNet: Columns to measure for network interface statistics. Default:\n"
printf " \"rxpck/s txpck/s rxkB/s txkB/s\"\n"
printf " -g, --granularity: Granularity period in seconds e.g. 5\n"
printf " -b, --basedir: TitanSim installation directory e.g. /home/ttcn3/titansim\n"
printf " <basedir>/build/ is used to search for log files\n"
printf " Can be used together with -H and -f\n"
printf " -n, --noprocnames: Process names will not be logged just PIDs\n"
printf " -H, --hclog: Get PIDs and process names from HC log file (default)\n"
printf " FileMask := ... TTCN3_PARALLEL | TTCN3_EXECUTOR ... must be enabled.\n"
printf " -l, --hclogfile: Name and path of TitanSim HostController log file\n"
printf " Use together with -H\n"
printf " -f, --logfilename: Get PIDs and process names from the log file names\n"
printf " LogFile := \"titansim.%%h-%%n-%%p.log\" must be set\n"
printf " -E, --executable: Name of the executable, default: titansim\n"
printf " -i, --interface: Interface to monitor, e.g. eth0. Default: lo\n"
printf " -e, --nonlogoutput File to print non-log messages e.g. /dev/stderr\n"
printf " Not to mess up the log with system messages\n"
printf " -s, --sysstatlog Redirect live system statistics to this file.\n"
printf " -p, --pidstatlog Redirect live per-process statistics to this file. Default: /dev/stdout\n"
printf " -t, --bootlog Log minimal data during TitanSim startup\n"
printf " -T, --telnetPort TitanSim telnet port number (CLI), default: 7100\n"
printf " For zero and negative values: wait for start is disabled\n"
printf " -S, --showThreads Show threads, default: false\n"
printf " -o, --outputfile Write merged system statistics and pidstat output to this file\n"
printf " -D, --showData Print available data o measure\n"
printf " -h, --help: Print this help page\n\n"
nonLogOutput=/dev/null
ctrl_c 1
exit 1
}
###############################################
# Set default values for parameters
function set_defaults {
# Name of the executable
executableName="titansim"
# Ethernet interface for traffic measurement
nic="lo"
# Capture performance data in every $granularity seconds
granularity=5
# Use no process names by default
pidNames="nonames"
# TitanSim installation directory
titansimBaseDir=$(echo $(dirname $(readlink -f "$0")) | sed 's/\/src\/Libraries\/EPTF_Core_Library_CNL113512\/tools\/SysStatLogger//')
# TitanSim HostController log file directory and name (used with -H, --hclog)
titansimLogFile="$titansimBaseDir/build/titansim.$(hostname)-HC-*.log"
# TitanSim log file directory and name (used with -f, --logfilename)
titansimLogFiles="$titansimBaseDir/build/*-*-*.log"
# File or device to print messages other than performance log
nonLogOutput="/dev/stderr"
# Shall some basic data be logged during TitanSim bootup?
bootlog=false
# Titansim telnet port
telnetPort=7100
# collected data from sar data
sarHeaders=(timestamp %user %nice %system %iowait %steal %idle[...] kbmemfree kbmemused %memused kbswpfree kbswpused %swpused ldavg-1 ldavg-5 ldavg-15)
sarHeaders_net=(timestamp rxpck/s txpck/s rxkB/s txkB/s)
# show threads
showThreads=false
# Clock ticks / sec
HERTZ=$(getconf CLK_TCK)
# output file name
outputfile="/dev/null" #"sysstatlogger_$(date +%F_%H:%M:%S).csv"
# system stat log file
sysstatLogFile="/dev/stdout"
#pidstat log file
pidstatLogFile="/dev/stdout"
}
###############################################
# Parse command line arguments
function parse_arguments() {
while [[ $# > 0 ]]
do
key="$1"
case "$key" in
-c|--columnsSar)
if [ -z "$2" ]; then
sarHeaders=(timestamp)
else
ifs=$IFS;unset IFS
sarHeaders=($(echo "timestamp $2"))
IFS=$ifs;unset ifs;
fi
shift
;;
-d|--columnsSarNet)
if [ -z "$2" ]; then
sarHeaders_net=(timestamp)
else
ifs=$IFS;unset IFS
sarHeaders_net=($(echo "timestamp $2"))
IFS=$ifs;unset ifs;
fi
shift
;;
-g|--granularity)
if [ -z "$2" ]; then
printf "\nMissing granularity value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
if [[ ! $2 =~ ^[0-9]{1,3}$ ]]; then
printf "\nInvalid granularity value: %s\n" $2 >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
granularity="$2"
shift
;;
-b|--basedir)
if [ -z "$2" ]; then
printf "\nMissing TitanSim base directory value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
if [ ! -d $2 ]; then
printf "\nTitanSim base directory does not exist: %s\n" $2 >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
titansimBaseDir="$2"
# TitanSim HostController log file directory and name (used with -H, --hclog)
titansimLogFile="$titansimBaseDir/build/titansim.$(hostname)-HC-*.log"
# TitanSim log file directory and name (used with -f, --logfilename)
titansimLogFiles="$titansimBaseDir/build/*-*-*.log"
shift
;;
-H|--hclog)
pidNames="hclog"
;;
-n|--noprocnames)
pidNames="nonames"
;;
-l|--hclogfile)
if [ -z "$2" ]; then
printf "\nMissing hc logfile value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
titansimLogFile="$2"
shift
;;
-f|--logfilename)
pidNames="logfilename"
;;
-E|--executable)
if [ -z "$2" ]; then
printf "\nMissing executable value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
executableName="$2"
shift
;;
-i|--interface)
if [ -z "$2" ]; then
usage >> $nonLogOutput
fi;
if [ ! -d /sys/class/net/$2 ]; then
printf "\nNetwork interface %s does not exist.\n" $2 >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
nic="$2"
shift
;;
-e|--nonlogoutput)
if [ -z "$2" ]; then
printf "\nMissing nonlogoutput value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
nonLogOutput="$2"
shift
;;
-s|--sysstatlog)
if [ -z "$2" ]; then
printf "\nMissing sysstatlog value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
sysstatLogFile="$2"
printf "" > $sysstatLogFile
shift
;;
-p|--pidstatlog)
if [ -z "$2" ]; then
printf "\nMissing pidstatlog value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
pidstatLogFile="$2"
printf "" > $pidstatLogFile
shift
;;
-t|--bootlog)
bootlog=true
;;
-T|--telnetPort)
if [ -z "$2" ]; then
printf "\nMissing telnet port value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
telnetPort="$2"
if [ $telnetPort -gt 0 ]
then
printf "\nUsing telnet port for CLI: %s\n" $2 >> $nonLogOutput
else
printf "\nNot waiting for startup\n" >> $nonLogOutput
fi
shift
;;
-S|--showThreads)
showThreads=true
;;
-o|--outputfile)
if [ -z "$2" ]; then
printf "\nMissing output file value\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi;
if [ -f $2 ]; then
printf "\nOutput file exists and it will be overwritten: %s\n" $2 >> $nonLogOutput
fi
outputfile="$2"
#printf "Using output file $2\n" >> $nonLogOutput
shift
;;
-h|--help)
usage >> $nonLogOutput
;;
-D|--showData)
print_available_sar_data
print_available_pidstat_data
exit 0
;;
*)
printf "\nInvalid option found.\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
;;
esac
shift
done
}
###############################################
# Print log file header
function print_all_header() {
logstart=$(date +%F\ %H:%M:%S)
printf "Logging started at: %s\n" $logstart >> $nonLogOutput
printf "Press ^C to stop logging performance data\n" >> $nonLogOutput
#print_table_header
}
###############################################
# Print the header line
function print_table_header() {
printf "%s\t" "${sarHeaders[@]}"
printf "${nic}_%s\t" "${sarHeaders_net[@]}"
printf "timestamp\tcountPs\tcountThreads\tsum%%MEM\tsum%%CPU\tmax%%CPU\tsumMLSim%%CPU\tmaxMLSim%%CPU"
printf "\t|"
printf "\t%s%%CPU" "${procNames[@]}"
printf "\t|"
printf "\t%s%%MEM" "${procNames[@]}"
printf "\n"
}
###############################################
# Gets TitanSim's status via the Playlist script
function get_titansim_status() {
TS_STATUS=`( echo "ds get <datadescription xmlns='http://ttcn.ericsson.se/protocolModules/xtdp/xtdl' element='ReadyToRun' source='ExecCtrl'></datadescription>"; sleep 0.1) | telnet localhost $telnetPort 2>&1 | grep TTCN`
echo $TS_STATUS | awk '/.+\[led:green\]ReadyToRun/ {print "ready to run"} /.+\[led:blue\]ReadyToRun/ {print "almost ready"} /.+Invalid/{print "starting up"}'
}
###############################################
# Wait until TitanSim boots up
function wait_for_titansim_bootup() {
if [[ $telnetPort -le 0 ]]
then
return
fi
lastplStatus="unknown"
if $bootlog ; then
printf "Logging basic data while is TitanSim booting up...\n" >> $nonLogOutput
print_table_header | tee -a $outputfile >> $sysstatLogFile
tmpSarStartupFile=$(mktemp -t $tmpTemplate)
rm $tmpSarStartupFile
else
printf "Waiting until TitanSim boots up... " >> $nonLogOutput
fi
while [ "$plOutput" != "ready to run" ]
do
plOutput=$(get_titansim_status)
if [ -z "$plOutput" ]
then
plOutput="not started"
fi
if [ "$lastStatus" != "$plOutput" ]
then
printf "Status: %s\n" $plOutput >> $nonLogOutput
lastStatus="$plOutput"
fi
if $bootlog && [ "$plOutput" != "ready to run" ] ; then
print_sysstat_filtered | tee -a $outputfile >> $sysstatLogFile
rm $tmpSarStartupFile
else
sleep 1;
fi
done
if $bootlog ; then
printf "\nStart full logging for individual processes...\n" >> $nonLogOutput
else
printf "\n" >> $nonLogOutput
fi;
}
###############################################
# Search HC log file in $titansimBaseDir/build/
function search_HC_log() {
printf "Searching HC logfile in %s ...\n" "$titansimBaseDir/build/*.log" >> $nonLogOutput
titansimLogFile=$titansimBaseDir/build/*.log
}
###############################################
# Initialize process names and PIDs
function gather_process_names() {
# Method#1: Get PIDs and process names from the name of the log files
if [ "$pidNames" == "logfilename" ]; then
printf "Getting PID names from log file names\n" >> $nonLogOutput
logFiles=$(ls -b -1 $titansimLogFiles 2>/dev/null)
if [[ "$logFiles" == "" ]]
then
printf "\nLog files not found with pattern: %s\n" $titansimLogFiles >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
for line in $logFiles;
do
pid=$(echo $line | sed 's/.*-\(.*\)-\([0-9]*\).log/\2/')
if [[ ! $pid =~ [0-9]{1,}$ ]]
then
printf "Pid not found in filename: %s. File ignored\n" $line >> $nonLogOutput
continue;
fi
ptc=$(echo $line | sed 's/.*-\(.*\)-\([0-9]*\).log/\1/')
procNames=("${procNames[@]}" "[$pid]$ptc")
procIds=("${procIds[@]}" "$pid")
done
fi
# Method#2: Get PIDs and process names from the content of HC log file
if [ "$pidNames" == "hclog" ]; then
printf "Getting PID names from HC log content: %s\n" "$titansimLogFile" >> $nonLogOutput
titansimMtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "MTC was created. Process id: ")
titansimPtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "PTC was created" | grep " process id: ")
if [[ "$titansimMtcInfo" == "" && "$titansimPtcInfo" == "" ]]; then
printf "\nMTC/PTC info was not found in HC log file: %s\n" "$titansimLogFile" >> $nonLogOutput
search_HC_log
titansimMtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "MTC was created. Process id: ")
titansimPtcInfo=$(cat $titansimLogFile 2> /dev/null | grep "PTC was created" | grep " process id: ")
fi
if [[ "$titansimMtcInfo" != "" ]]
then
procName="MTC"
procId=$(echo $titansimMtcInfo |sed 's/.*Process id: \([0-9]*\).*/\1/')
procNames=("${procNames[@]}" "[$procId]$procName")
procIds=("${procIds[@]}" "$procId")
fi
for line in $titansimPtcInfo
do
compName=$(echo $line | grep "component name:")
if [ "$compName" ]
then
procName=$(echo $line |sed 's/.*component\ name:\ \([a-zA-Z0-9_\/\.]*\),.*/\1/')
else
procName="unknown"
if [ $(echo $line | grep "HostAdmin") ]
then
procName="EPTF_HostAdmin"
fi;
if [ $(echo $line | grep "MSRP_Logger_CT") ]
then
procName="MSRP_Logger_CT"
fi;
if [ $(echo $line | grep "IMS_Setup_CT") ]
then
procName="IMS_Setup"
fi;
fi
procId=$(echo $line |sed 's/.*process id: \([0-9]*\).*/\1/')
procNames=("${procNames[@]}" "[$procId]$procName")
procIds=("${procIds[@]}" "$procId")
done
fi
if [[ "${procIds[0]}" != "" ]]
then
#check process name:
processNameFromPid=$(ps -p ${procIds[0]} | tail -n +2 | awk '{print $4}')
if [[ "$processNameFromPid" == "" ]]
then
printf "The process %s found in the log file is not running. The log file might be too old.\n" ${procNames[0]} >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
printf "Detected executable name from process %s: \"%s\"\n" ${procNames[0]} $processNameFromPid >> $nonLogOutput
executableName=$processNameFromPid
else
printf "No processes found in the log files. Searching processes with name \"%s\"\n" $executableName >> $nonLogOutput
fi
processPidlist=$(ps -C $executableName | tail -n +2 | awk '{if (n==0){n=1; printf "%s", $1}else {printf ",%s",$1}}')
#threadPidlist=$(ps -T -C $executableName | tail -n +2 | awk '{if (n==0){n=1; printf "%s", $2}else {printf ",%s",$2}}')
if [[ $processPidlist == "" ]]
then
printf "\nNo processes found to monitor.\n" >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
# Method#3: Get PIDs from the name of the executable. No process names just PIDs (default)
if [[ "$pidNames" == "nonames" || "${procIds[0]}" == "" ]]; then
printf "Getting PID names from executable name: %s\n" $executableName >> $nonLogOutput
printf "No process names will be logged just PIDs\n" >> $nonLogOutput
if [[ $showThreads == true ]]
then
titansimProcesses=$(ps -T -C $executableName | tail -n +2)
else
titansimProcesses=$(ps -C $executableName | tail -n +2)
fi
if [[ "$titansimProcesses" == "" ]]
then
printf "\nNo process found with name: %s\n" $executableName >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
for line in $titansimProcesses
do
if [[ $showThreads == true ]]
then
procId=$(echo $line | awk '{print $2}')
procName=$(echo $line | awk '{print $5}')
else
procId=$(echo $line | awk '{print $1}')
procName=$(echo $line | awk '{print $4}')
fi
procNames=("${procNames[@]}" "[$procId]$procName")
procIds=("${procIds[@]}" "$procId")
done
fi
}
###############################################
# Starts the sar command
function start_limited_capture() {
tmpSarLogFile=$(mktemp -t $tmpTemplate)
#start sys stat collector:
sar -o $tmpSarLogFile -u -q $granularity >> /dev/null &
SAR_PID=$!
}
###############################################
# Starts the pidstat command
function start_capture() {
proclist=""
for proc in "${procNames[@]}"
do
if [[ "$proclist" != "" ]]
then
proclist="$proclist,"
fi
proclist="$proclist$proc"
done
tmpPidstatLogFile=$(mktemp -t $tmpTemplate)
#start the pidstat collector
if [[ $showThreads == true ]]
then
pidstat -h -t -r -u -v -p $processPidlist,SELF $granularity | process_pidstat $proclist | tee $tmpPidstatLogFile >> $pidstatLogFile
else
pidstat -h -r -u -v -p $processPidlist $granularity | process_pidstat $proclist | tee $tmpPidstatLogFile >> $pidstatLogFile
fi
ctrl_c
exit
}
###############################################
# Select the columns from the sar data and print it in one line
function process_sarData() {
tmpsadfLogFile=$(mktemp -t $tmpTemplate)
#sadf -T -h -d $tmpSarLogFile -- -q -u -r -S | sed 's/;/\t/g'
sadf -T -h -d $tmpSarLogFile -- -q -u -r -S | awk '
BEGIN {
FS=";"
}
// {
#print $3,$5,$6,$7,$8,$9,$10,$28,$29,$30
#printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8,$9,$10,$11,$12,$13,$21,$22,$23,$28,$29,$30
printf "'$sarHeaders_columns_formatstr'\n", '$sarHeaders_columns_str'
fflush()
}
' > $tmpsadfLogFile
tmpsadfLogFile_nic=$(mktemp -t $tmpTemplate)
sadf -T -d $tmpSarLogFile -- -n DEV | grep -e "#\|;$nic;" | awk '
BEGIN {
FS=";"
}
// {
#printf "%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8
printf "'$sarHeaders_net_columns_formatstr'\n", '$sarHeaders_net_columns_str'
fflush()
}
' > $tmpsadfLogFile_nic
}
###############################################
# Print available stats for processes from pidstat command
function print_available_pidstat_data() {
echo "Available stats for processes: "
pidstatData=($(pidstat -h -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
echo ${pidstatData[@]}
}
###############################################
# Print available system stats from sar command
function print_available_sar_data() {
echo "Available system stats: "
tmpsarDataFile=$(mktemp -t $tmpTemplate)
sar -o $tmpsarDataFile -u -q 1 1 >> /dev/null
availableSarHeaders=($(sadf -T -h -d $tmpsarDataFile -- -q -u -r -S | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
availableSarHeaders_net=($(sadf -T -h -d $tmpsarDataFile -- -n DEV | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
rm $tmpsarDataFile
echo ${availableSarHeaders[@]}
echo "Available system stats for network interface $nic: "
echo ${availableSarHeaders_net[@]}
}
###############################################
# Select the columns of the system stats to collect from sar data
function detect_columns_in_sar_data() {
tmpsarDataFile=$(mktemp -t $tmpTemplate)
sar -o $tmpsarDataFile -u -q 1 1 >> /dev/null
availableSarHeaders=($(sadf -T -h -d $tmpsarDataFile -- -q -u -r -S | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
availableSarHeaders_net=($(sadf -T -h -d $tmpsarDataFile -- -n DEV | head -n +1 | awk '{gsub(/;/,"\n"); print;}'))
rm $tmpsarDataFile
#echo "availableSarHeaders: ${availableSarHeaders[@]}." >> $nonLogOutput
#echo "availableSarHeaders_net: ${availableSarHeaders_net[@]}." >> $nonLogOutput
#select columns to capture:
for i in $(seq 0 $((${#sarHeaders[@]}-1)))
do
# unknown by default:
sarHeaders_columns[i]="\"-\""
#echo "searching col #$i for ${sarHeaders[$i]}" >> $nonLogOutput
for j in $(seq 0 $((${#availableSarHeaders[@]}-1)))
do
if [[ "${sarHeaders[$i]}" == "${availableSarHeaders[$j]}" ]]
then
sarHeaders_columns[i]="\$$(($j+1))"
break
fi
done
done
#echo "foundSarHeaders: ${sarHeaders_columns[@]}" >> $nonLogOutput
#select columns to capture fro net data:
for i in $(seq 0 $((${#sarHeaders_net[@]}-1)))
do
# unknown by default:
sarHeaders_net_columns[i]="\"-\""
#echo "searching col for ${sarHeaders_net[$i]}" >> $nonLogOutput
for j in $(seq 0 $((${#availableSarHeaders_net[@]}-1)))
do
if [[ "${sarHeaders_net[$i]}" == "${availableSarHeaders_net[$j]}" ]]
then
sarHeaders_net_columns[i]="\$$(($j+1))"
break
fi
done
done
#echo "foundSarHeaders_net: ${sarHeaders_net_columns[@]}" >> $nonLogOutput
sarHeaders_columns_str=$(IFS=,; printf "%s" "${sarHeaders_columns[*]}")
sarHeaders_columns_formatstr=$(a=$(IFS=","; echo "${sarHeaders_columns[*]/*/%s}");echo "${a//,/\\t}")
#echo "sarHeaders_columns_str: $sarHeaders_columns_str" >> $nonLogOutput
#echo "sarHeaders_columns_formatstr: $sarHeaders_columns_formatstr" >> $nonLogOutput
sarHeaders_net_columns_str=$(IFS=,; printf "%s" "${sarHeaders_net_columns[*]}")
sarHeaders_net_columns_formatstr=$(a=$(IFS=","; echo "${sarHeaders_net_columns[*]/*/%s}");echo "${a//,/\\t}")
#echo "sarHeaders_net_columns_str: $sarHeaders_net_columns_str" >> $nonLogOutput
#echo "sarHeaders_net_columns_formatstr: $sarHeaders_net_columns_formatstr" >> $nonLogOutput
}
###############################################
# Select the columns of the process stats to collect from pidstat data
function detect_columns_in_pidstat_data() {
pidstatData=($(pidstat -h -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
for i in $(seq 0 $((${#pidstatData[@]}-1)))
do
if [[ ${pidstatData[$i]} == "PID" ]]
then
pidID_pidstat=$i
fi
if [[ ${pidstatData[$i]} == "%CPU" ]]
then
cpuID_pidstat=$i
fi
if [[ ${pidstatData[$i]} == "%MEM" ]]
then
memID_pidstat=$i
fi
if [[ ${pidstatData[$i]} == "Command" ]]
then
cmdID_pidstat=$i
fi
done
#echo "PID index: $pidID_pidstat"
#echo "%CPU index: $cpuID_pidstat"
#echo "%MEM index: $memID_pidstat"
#echo "Command index: $cmdID_pidstat"
}
###############################################
# Same as detect_columns_in_pidstat_data but with threads monitoring
function detect_columns_in_pidstat_t_data() {
pidstatData=($(pidstat -h -t -r -u -v -p SELF | grep "#" | awk '{gsub(/[ ]+/,"\n"); print;}'))
for i in $(seq 0 $((${#pidstatData[@]}-1)))
do
if [[ ${pidstatData[$i]} == "TID" ]]
then
tidID_pidstat_t=$i
fi
if [[ ${pidstatData[$i]} == "TGID" ]]
then
tgidID_pidstat_t=$i
fi
if [[ ${pidstatData[$i]} == "%CPU" ]]
then
cpuID_pidstat_t=$i
fi
if [[ ${pidstatData[$i]} == "%MEM" ]]
then
memID_pidstat_t=$i
fi
if [[ ${pidstatData[$i]} == "Command" ]]
then
cmdID_pidstat_t=$i
fi
done
#echo "TID index: $tidID_pidstat_t"
#echo "TGID index: $tgidID_pidstat_t"
#echo "%CPU index: $cpuID_pidstat_t"
#echo "%MEM index: $memID_pidstat_t"
#echo "Command index: $cmdID_pidstat_t"
}
###############################################
# Select and print the measured values for process statistics
function process_pidstat() {
awk -v proclist=$1 -v executableName=$executableName \
-v pidID=$pidID_pidstat -v cpuID=$cpuID_pidstat -v memID=$memID_pidstat -v cmdID=$cmdID_pidstat \
-v tidID=$tidID_pidstat_t -v tgidID=$tgidID_pidstat_t -v cpuID_t=$cpuID_pidstat_t -v memID_t=$memID_pidstat_t -v cmdID_t=$cmdID_pidstat_t \
'
function hms(s)
{
h=int(s/3600);
hh=s/3600%24;
mm=s/60%60;
ss=s%60
printf("%s", strftime("%F %H:%M:%S",s))
#printf("%d:%02d:%02d", hh, mm, ss);
}
BEGIN{
start=0;
#pids are indexed from 1!
split("",pids)
nofPids=split(proclist,procNames,",")
for (i=1; i<=nofPids; i++) {
split(procNames[i],tmp,"]")
pids[i]=substr(tmp[1],2)
}
useThreads=0
}
/TID/ {
# when TID is in the header, pidstat is running with -t option: thread mode
if ($(tidID+1)=="TID") {
useThreads=1;
}
}
/pidstat/ {
#have to kill pidstat if countPS==0 (it sometimes does not stop when the processes have terminated)
if (countPs==0 && start==1) {
pidstatpid=(useThreads==0) ? $pidID : $tidID
if (pidstatpid>0) {
command="kill -INT " pidstatpid
command | getline result
close(command)
}
}
next
}
/^$/{
#print the header:
if (start==0 && timestamp!=0) {
printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", "timestamp", "countPs", "countThreads", "sum%MEM", "sum%CPU", "max%CPU", "sumMLSim%CPU", "maxMLSim%CPU"
printf "|\t"
for (i=1; i<=nofPids;i++) {
printf("%s%CPU\t", procNames[i])
}
printf "|\t"
for (i=1; i<=nofPids;i++) {
printf("%s%MEM\t", procNames[i])
}
printf "\n"
fflush()
start=1
}
#print the data:
if (start==1) {
command="ps -C "executableName" | tail -n +2 | wc -l"
command | getline countPs
close(command)
command="ps -T -C "executableName" | tail -n +2 | wc -l"
command | getline countThreads
close(command)
printf "%s\t%d\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t", hms(timestamp), countPs, countThreads, summem, sumcpu, maxcpu, sumMLSimcpu, maxMLSimcpu
printf "|\t"
for (i=1; i<=nofPids;i++) {
printf "%s\t", cpus[pids[i]]
}
printf "|\t"
for (i=1; i<=nofPids;i++) {
printf "%s\t", mems[pids[i]]
}
printf "\n"
fflush()
n=0
sumcpu=0
maxcpu=0
summem=0
sumMLSimcpu=0
maxMLSimcpu=0
split("",cpus)
split("",mems)
}
}
/^ /{
#print $0
#print "pid:" $3 " cpu:" $7 " mem:" $13 " CMD:" $16
#print "with threads pid:" $3 " TID:" $4 " cpu:" $8 " mem:" $14 " CMD:" $17
if (useThreads==1) {
TID=$tidID
if (TID==0) {
next; #ignore the overall counters for the process
}
timestamp=$1
TGID=$tgidID
cpu=$cpuID_t
mem=$memID_t
cmd=$cmdID_t
pid=(TID==0) ? TGID : TID
} else {
timestamp=$1
pid=$pidID
cpu=$cpuID
mem=$memID
cmd=$cmdID
}
if (proclist=="") {
pids[++n]=pid
nofPids=n
}
cpus[pid]=cpu
if(maxcpu==0) {
maxcpu=cpu
} else {
maxcpu = (maxcpu > cpu) ? maxcpu : cpu
}
sumcpu=sumcpu+cpu
mems[pid]=mem
summem=summem+mem
if (index(cmd, "ML_")!=0) {
#mlsim process
sumMLSimcpu=sumMLSimcpu+cpu
if(maxMLSimcpu==0) {
maxMLSimcpu=cpu
} else {
maxMLSimcpu = (maxMLSimcpu > cpu) ? maxMLSimcpu : cpu
}
}
}'
}
###############################################
# Append lines together in separate files so that lineN = lineNInFile1 + lineNInFile2 + ...
function merge_outputs() {
print_table_header
awk '
{
timestamp="timestamp"
if ($1 != "timestamp") {
timestamp=$1$2
} else {
n=0
recordcounter++
}
if (recordcounter==1) {
timedata[n]=$0
} else {
if (timedata[n]=="") {
next
}
timedata[n]=sprintf("%s\t%s", timedata[n], $0)
}
n++;
}
END {
#i=0: print the header, i=1: no header (header is printed by print_table_header as well)
for (i=1; timedata[i]!=""; i++) {
printf "%s\n", timedata[i]
}
fflush()
}' $tmpsadfLogFile $tmpsadfLogFile_nic $tmpPidstatLogFile
}
###############################################
# Prints system statitics during process monitoring, full log
function print_sysstat_during_execution_full() {
sar -q -u -r -S -n DEV $granularity | awk -v date=$(date +%F) '
BEGIN {
headerIsPrinted=0
headerTime=""
dataTime=""
headerRow=""
dataRow=""
newdata=0
}
/^[^0-9]+.+/ {
next
}
/^$/ {
newdata=1
next
}
/.+/ {
#printf "current line: %s\n", $0
#printf "header time : %s\n", headerTime
#printf "data time : %s\n", dataTime
if (newdata==1 && dataTime == $1) {
#printf "End of data series for %s\n", dataTime
#end of data series for this period
if (!headerIsPrinted) {
printf "%s\n", headerRow
headerIsPrinted=1
}
printf "%s\n", dataRow
dataRow=""
dataTime=""
headerTime=$1
fflush();
}
if (newdata==1) {
newdata=0
}
if (headerTime=="") {
headerTime=$1
}
if ($1 == headerTime) {
#print "Header is detected"
if (!headerIsPrinted) {
gsub(/[ ]+/,"\t")
gsub(headerTime,"timestamp")
currentHeader=$0
}
next # exit for header line
}
# data row
#print "Data is detected"
#update header:
if (!headerIsPrinted) {
if (headerRow=="") {
headerRow=currentHeader
} else {
headerRow=sprintf("%s\t%s", headerRow, currentHeader)
}
}
if (dataTime == "") {
dataTime=$1
}
gsub(/[ ]+/,"\t")
if (dataRow=="") {
dataRow=sprintf("%s %s", date, $0)
} else {
dataRow=sprintf("%s\t%s %s", dataRow, date, $0)
}
}
' >> $sysstatLogFile &
SAR_SYSLOGGER_PID=$(jobs -p)
}
###############################################
# Prints system statitics, filtered log
function print_sysstat_filtered() {
sar -o $tmpSarStartupFile -u -q $granularity 1 >> /dev/null
TMP2=""
while [ "$TMP2" == "" ]
do
TMP2=`sadf -T -h -d $tmpSarStartupFile -- -q -u -r -S | tail -n +2`
done
echo "$TMP2" | awk '
BEGIN {
FS=";"
}
// {
#print $3,$5,$6,$7,$8,$9,$10,$28,$29,$30
#printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", $3,$5,$6,$7,$8,$9,$10,$11,$12,$13,$21,$22,$23,$28,$29,$30
printf "'$sarHeaders_columns_formatstr'\t", '$sarHeaders_columns_str'
fflush()
}
'
# append stats for the $nic interface:
sadf -T -d $tmpSarStartupFile -- -n DEV | grep -e "#\|;$nic;"| tail -n +2 | awk -v executableName=$executableName '
BEGIN {
FS=";"
}
// {
command="ps -C "executableName" | tail -n +2 | wc -l"
command | getline countPs
close(command)
command="ps -T -C "executableName" | tail -n +2 | wc -l"
command | getline countThreads
close(command)
#printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", $3,$5,$6,$7,$8,$3,countPs,countThreads
printf "'$sarHeaders_net_columns_formatstr'\t%s\t%s\t%s\n", '$sarHeaders_net_columns_str',$3,countPs,countThreads
fflush()
}
'
}
###############################################
# Prints system statitics during process monitoring, filtered log
function print_sysstat_during_execution_filtered() {
tmpSarStartupFile=$(mktemp -t $tmpTemplate)
if ! $bootlog || [ $telnetPort -le 0 ]
then
print_table_header >> $sysstatLogFile
fi
while [[ 1 == 1 ]]
do
print_sysstat_filtered >> $sysstatLogFile
rm $tmpSarStartupFile
done &
SAR_SYSLOGGER_PID=$!
}
###############################################
# MAIN PART
###############################################
chk_sysstat_version
set_defaults
parse_arguments "$@"
detect_columns_in_sar_data
detect_columns_in_pidstat_data
detect_columns_in_pidstat_t_data
if [ ! -d /sys/class/net/$nic ]; then
printf "\nNetwork interface %s does not exist.\n" $nic >> $nonLogOutput
nonLogOutput=/dev/null
ctrl_c 1
fi
trap ctrl_c INT TERM
printf "" > $outputfile
#start_limited_capture
wait_for_titansim_bootup
if [[ "$sysstatLogFile" != "/dev/stdout" ]]
then
print_sysstat_during_execution_filtered
#print_sysstat_during_execution_full
fi
gather_process_names
print_all_header >> $pidstatLogFile
start_limited_capture
start_capture