Table of Contents

Nagios plugin to get total size of Deleted-Open files

Nagios plugin to uses the lsof command and counts all open file sizes which are deleted. Alert if size exceeds specified warning and critical limits.

E.g. to report warning if total size of deleted files exceed 5M and critical if it exceeds 10M

/usr/bin/sudo /usr/lib/nagios/plugins/check_deleted_lsof -w 5000000 -c 10000000

Source

check_deleted_lsof
#!/bin/bash
#
# Check total size of Deleted-Open files for Nagios
# Written by Senthil Nathan
# Last Modified: Oct 23rd 2015
#
# Usage: ./check_deleted_lsof -w Size Warn -c Size Critical
#
# Description: Check total size of open deleted files
#
# This plugin is to check the total size of all open and deleted files
#
# Output:
#
#  Deleted-Open files Size is OK/Warning/Critical|'Deleted-Open file size'=xxxxxxB;nnnnnn;mmmmmm;0
#
# Examples:
#
#   Warn if total deleted open files are >= 10M
#   Critical if total deleted open files are >= 20M
#   check_deleted_lsof -w 10000000 -c 20000000
#
#
 
PROGNAME=`/usr/bin/basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.0"
 
. $PROGPATH/utils.sh
 
check_root()
{
    # make sure script is running as root
    if [ `whoami` != root ]; then
        echo "UNKNOWN: please make sure script is running as root"
        exit $STATE_UNKNOWN
    fi
}
 
print_usage() {
    echo "Usage: $PROGNAME -w <warning bytes> -c <critical bytes>"
    echo "Usage: $PROGNAME --help"
    echo "Usage: $PROGNAME --version"
}
 
print_revision() {
    echo "Program: $PROGNAME"
    echo "Version: $REVISION"
}
 
print_help() {
    print_revision
    echo ""
    print_usage
    echo ""
    echo "Check total size of Deleted-open files for Nagios"
    echo ""
}
 
# Check user is root
check_root
 
# Make sure the correct number of command line
# arguments have been supplied
 
if [ $# -lt 1 ]; then
    print_usage
    exit $STATE_UNKNOWN
fi
 
# Grab the command line arguments
 
exitstatus=$STATE_WARNING #default
while test -n "$1"; do
    case "$1" in
        --help)
            print_help
            exit $STATE_OK
            ;;
        -h)
            print_help
            exit $STATE_OK
            ;;
        --version)
            print_revision
            exit $STATE_OK
            ;;
        -V)
            print_revision
            exit $STATE_OK
            ;;
        --warning)
            thewarn=$2
            shift
            ;;
        -w)
            thewarn=$2
            shift
            ;;
        --critical)
            thecrit=$2
            shift
            ;;
        -c)
            thecrit=$2
            shift
            ;;
        *)
            echo "Unknown argument: $1"
            print_usage
            exit $STATE_UNKNOWN
            ;;
    esac
    shift
done
 
# Validate arguments
if [ -z $thecrit ]; then
  print_usage
  exit $STATE_UNKNOWN
fi
if [ -z $thewarn ]; then
  print_usage
  exit $STATE_UNKNOWN
fi
 
# Check begins here
 
#
declare -i totaldeleted
totaldeleted=`lsof|grep "(deleted)"|awk '{ sum+=$8} END {print sum}'`
 
if [ $? -eq 1 ]; then
  echo "Deleted-Open files Check Error"
  exit $STATE_UNKNOWN
fi
#
if [ $totaldeleted -ge $thecrit ]; then
  echo "Deleted-Open files Size is Critical|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
  exit $STATE_CRITICAL
fi
if [ $totaldeleted -ge $thewarn ]; then
  echo "Deleted-Open files Size is Warning|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
  exit $STATE_WARNING
fi
if [ $totaldeleted -lt $thewarn ]; then
  echo "Deleted-Open files Size is OK|'Deleted-Open files size'=${totaldeleted}B;${thewarn};${thecrit};0"
  exit $STATE_OK
fi
#
echo "Deleted-Open files Check Unknown"
exit $STATE_UNKNOWN

When setting up services, set the normal and retry check intervals to large values such as below

        normal_check_interval           60
        retry_check_interval            60
        max_check_attempts              5

The above will only alert if the size of Deleted-Open files exceeds limits for 4 or more hours. The reasoning to set it high is because processes do create Deleted-Open files but release them in a few hours.