nagios-scripts/check-webcron.sh
Jeroen De Meerleer 253e922f99
Refactor check-webcron.sh to improve error messages
The commit refactors the check-webcron.sh script to improve error messages. Specifically, it changes "URL UNKNOWN" to "WEBCRON UNKNOWN" and adds that prefix to all related error messages.
2023-05-31 17:14:43 +02:00

138 lines
4.0 KiB
Bash

#!/bin/bash
#
#/
#/ Usage:
#/ check-webcron.sh --proxy=<proxy-url> [--daemon] --warning=<level> --critical=<level> <URL>
#/
#/ Checks webcron jobs
#/
#/ Options:
#/ -p, --proxy=<proxy-url> The proxy server to use
#/ -d, --daemon When no daemon is running trigger a critical warning
#/ -w, --warning=<ratio> The ratio of failed jobs to trigger a warning (1 = all jobs; 0.5 = half of all jobs)
#/ -c, --critical=<ratio> The ratio of failed jobs to trigger a critcal warning (1 = all jobs; 0.5 = half of all jobs)
#/
#/ Exit Codes:
#/ 0 Everything OK
#/ 1 Warning level exceeded
#/ 2 Critical level exceeded
#/ 3 Unknown status
#/
Usage() {
grep '^#/' "${script_dir}/${script_name}" | sed 's/^#\/\w*//'
}
GetOptions() {
# https://stackoverflow.com/a/29754866
OPTIONS=p:dw:c:
LONGOPTS=proxy:,daemon,warning:,critical:
# -use ! and PIPESTATUS to get exit code with errexit set
# -temporarily store output to be able to check for errors
# -activate quoting/enhanced mode (e.g. by writing out “--options”)
# -pass arguments only via -- "$@" to separate them correctly
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
# e.g. return value is 1
# then getopt has complained about wrong arguments to stdout
Usage
exit 2
fi
# read getopt's output this way to handle the quoting right:
eval set -- "$PARSED"
# now enjoy the options in order and nicely split until we see --
while true; do
case "$1" in
-p|--proxy)
proxy="$2"
shift 2
;;
-d|--daemon)
shift 1
;;
-w|--warning)
warn="$2"
shift 2
;;
-c|--critical)
crit="$2"
shift 2
;;
--)
shift
break
;;
*)
echo "WEBCRON UNKNOWN - ${1} is not a valid parameter"
exit 3
;;
esac
done
if [ -z ${1+x} ]; then
echo "WEBCRON UNKNOWN - url not given"
exit 3
else
url="${1}/health"
fi
}
LC_NUMERIC="C"
warn=0.05
crit=0.1
script_name=$(basename "${0}")
script_dir=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
url=''
GetOptions "$@"
eval $(curl -L -o /dev/null -s -w 'RESPONSE_CODE=%{response_code}' ${proxy:+"--proxy" "$proxy"} $url)
health=$(curl -s ${proxy:+"--proxy" "$proxy"} $url)
deamonrunning=$(echo $health | jq '.DaemonRunning')
jobstotal=$(echo $health | jq '.JobsTotal')
jobsdue=$(echo $health | jq '.JobsDue')
jobsrunning=$(echo $health | jq '.JobsRunning')
jobsfailing=$(echo $health | jq '.JobsFailing')
val=$(echo "$jobsfailing/$jobstotal" | bc -l)
rdetail=""
if (( $(echo "$RESPONSE_CODE != 200" | bc -l) )); then
rval=2
rmsg="CRITICAL"
rdetail="$url was not available"
elif [[ $deamonrunning != "true" ]]; then
rval=2
rmsg="CRITICAL"
rdetail="Daemon is not running"
elif (( $(echo "$jobsdue > 0" | bc -l) )); then
rval=1
rmsg="WARNING"
rdetail="You have jobs to be run"
elif (( $(echo "$val < ${warn}" | bc -l) )); then
rval=0
rmsg="OK"
elif (( $(echo "$val < ${crit}" | bc -l) )); then
rval=1
rmsg="WARNING"
else
rval=2
rmsg="CRITICAL"
fi
unitval=''
warnval=$warn
critval=$crit
minval=0
maxval=1
unitfail='jobs'
warnfail=$(echo ${jobstotal}\*${warn} | bc -l | awk '{ print int($1+0.5) }')
critfail=$(echo ${jobstotal}\*${crit} | bc -l | awk '{ print int($1+0.5) }')
minfail=0
maxfail=$(echo ${jobstotal})
echo "WEBCRON ${rmsg} - ${rdetail:+"$rdetail - "}Response code: ${RESPONSE_CODE} Jobs Total: ${jobstotal} Jobs Due: ${jobsdue} Jobs Running: ${jobsrunning} Jobs Failing: ${jobsfailing}|val=${val}${unitval};$warnval;$critval;$minval;$maxval failing=${jobsfailing}${unitfail};$warnfail;$critfail;$minfail;$maxfail "
exit $rval