====== Nagios plugin to get total size of Deleted-Open files ======
Nagios plugin to uses the ''lsof'' command and counts all open file sizes which are deleted. Alert if size exceeds specified warning and critical limits.
E.g. to report warning if total size of deleted files exceed 5M and critical if it exceeds 10M
/usr/bin/sudo /usr/lib/nagios/plugins/check_deleted_lsof -w 5000000 -c 10000000
Source
#!/bin/bash
#
# Check total size of Deleted-Open files for Nagios
# Written by Senthil Nathan
# Last Modified: Oct 23rd 2015
#
# Usage: ./check_deleted_lsof -w Size Warn -c Size Critical
#
# Description: Check total size of open deleted files
#
# This plugin is to check the total size of all open and deleted files
#
# Output:
#
# Deleted-Open files Size is OK/Warning/Critical|'Deleted-Open file size'=xxxxxxB;nnnnnn;mmmmmm;0
#
# Examples:
#
# Warn if total deleted open files are >= 10M
# Critical if total deleted open files are >= 20M
# check_deleted_lsof -w 10000000 -c 20000000
#
#
PROGNAME=`/usr/bin/basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.0"
. $PROGPATH/utils.sh
check_root()
{
# make sure script is running as root
if [ `whoami` != root ]; then
echo "UNKNOWN: please make sure script is running as root"
exit $STATE_UNKNOWN
fi
}
print_usage() {
echo "Usage: $PROGNAME -w -c "
echo "Usage: $PROGNAME --help"
echo "Usage: $PROGNAME --version"
}
print_revision() {
echo "Program: $PROGNAME"
echo "Version: $REVISION"
}
print_help() {
print_revision
echo ""
print_usage
echo ""
echo "Check total size of Deleted-open files for Nagios"
echo ""
}
# Check user is root
check_root
# Make sure the correct number of command line
# arguments have been supplied
if [ $# -lt 1 ]; then
print_usage
exit $STATE_UNKNOWN
fi
# Grab the command line arguments
exitstatus=$STATE_WARNING #default
while test -n "$1"; do
case "$1" in
--help)
print_help
exit $STATE_OK
;;
-h)
print_help
exit $STATE_OK
;;
--version)
print_revision
exit $STATE_OK
;;
-V)
print_revision
exit $STATE_OK
;;
--warning)
thewarn=$2
shift
;;
-w)
thewarn=$2
shift
;;
--critical)
thecrit=$2
shift
;;
-c)
thecrit=$2
shift
;;
*)
echo "Unknown argument: $1"
print_usage
exit $STATE_UNKNOWN
;;
esac
shift
done
# Validate arguments
if [ -z $thecrit ]; then
print_usage
exit $STATE_UNKNOWN
fi
if [ -z $thewarn ]; then
print_usage
exit $STATE_UNKNOWN
fi
# Check begins here
#
declare -i totaldeleted
totaldeleted=`lsof|grep "(deleted)"|awk '{ sum+=$8} END {print sum}'`
if [ $? -eq 1 ]; then
echo "Deleted-Open files Check Error"
exit $STATE_UNKNOWN
fi
#
if [ $totaldeleted -ge $thecrit ]; then
echo "Deleted-Open files Size is Critical|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
exit $STATE_CRITICAL
fi
if [ $totaldeleted -ge $thewarn ]; then
echo "Deleted-Open files Size is Warning|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
exit $STATE_WARNING
fi
if [ $totaldeleted -lt $thewarn ]; then
echo "Deleted-Open files Size is OK|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
exit $STATE_OK
fi
#
echo "Deleted-Open files Check Unknown"
exit $STATE_UNKNOWN
When setting up services, set the normal and retry check intervals to large values such as below
normal_check_interval 60
retry_check_interval 60
max_check_attempts 5
The above will only alert if the size of Deleted-Open files exceeds limits for 4 or more hours. The reasoning to set it high is because processes do create Deleted-Open files but release them in a few hours.