You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
5.2 KiB

  1. #!/usr/bin/env bash
  2. #########################################################################
  3. # Script: check_zpools.sh
  4. # Purpose: Nagios plugin to monitor status of zfs pool
  5. # Authors: Aldo Fabi First version (2006-09-01)
  6. # Vitaliy Gladkevitch Forked (2013-02-04)
  7. # Claudio Kuenzler Complete redo, perfdata, etc (2013-2014)
  8. # Doc: http://www.claudiokuenzler.com/nagios-plugins/check_zpools.php
  9. # History:
  10. # 2006-09-01 Original first version
  11. # 2006-10-04 Updated (no change history known)
  12. # 2013-02-04 Forked and released
  13. # 2013-05-08 Make plugin work on different OS, pepp up plugin
  14. # 2013-05-09 Bugfix in exit code handling
  15. # 2013-05-10 Removed old exit vars (not used anymore)
  16. # 2013-05-21 Added performance data (percentage used)
  17. # 2013-07-11 Bugfix in zpool health check
  18. # 2014-02-10 Bugfix in threshold comparison
  19. # 2014-03-11 Allow plugin to run without enforced thresholds
  20. #########################################################################
  21. ### Begin vars
  22. STATE_OK=0 # define the exit code if status is OK
  23. STATE_WARNING=1 # define the exit code if status is Warning
  24. STATE_CRITICAL=2 # define the exit code if status is Critical
  25. STATE_UNKNOWN=3 # define the exit code if status is Unknown
  26. # Set path
  27. PATH=$PATH:/usr/sbin:/sbin
  28. export PATH
  29. ### End vars
  30. #########################################################################
  31. help="check_zpools.sh (c) 2006-2014 several authors\n
  32. Usage: $0 -p (poolname|ALL) [-w warnpercent] [-c critpercent]\n
  33. Example: $0 -p ALL -w 80 -c 90"
  34. #########################################################################
  35. # Check necessary commands are available
  36. for cmd in zpool awk [
  37. do
  38. if ! `which ${cmd} 1>/dev/null`
  39. then
  40. echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
  41. exit ${STATE_UNKNOWN}
  42. fi
  43. done
  44. #########################################################################
  45. # Check for people who need help - aren't we all nice ;-)
  46. if [ "${1}" = "--help" -o "${#}" = "0" ];
  47. then
  48. echo -e "${help}";
  49. exit ${STATE_UNKNOWN};
  50. fi
  51. #########################################################################
  52. # Get user-given variables
  53. while getopts "p:w:c:" Input;
  54. do
  55. case ${Input} in
  56. p) pool=${OPTARG};;
  57. w) warn=${OPTARG};;
  58. c) crit=${OPTARG};;
  59. *) echo -e $help
  60. exit $STATE_UNKNOWN
  61. ;;
  62. esac
  63. done
  64. #########################################################################
  65. # Did user obey to usage?
  66. if [ -z $pool ]; then echo -e $help; exit ${STATE_UNKNOWN}; fi
  67. #########################################################################
  68. # Verify threshold sense
  69. if [[ -n $warn ]] && [[ -z $crit ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi
  70. if [[ -z $warn ]] && [[ -n $crit ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi
  71. if [[ $warn -gt $crit ]]; then echo "Warning threshold cannot be greater than critical"; exit $STATE_UNKNOWN; fi
  72. #########################################################################
  73. # What needs to be checked?
  74. ## Check all pools
  75. if [ $pool = "ALL" ]
  76. then
  77. POOLS=($(zpool list -Ho name))
  78. p=0
  79. for POOL in ${POOLS[*]}
  80. do
  81. CAPACITY=$(zpool list -Ho capacity $POOL | awk -F"%" '{print $1}')
  82. HEALTH=$(zpool list -Ho health $POOL)
  83. # Check with thresholds
  84. if [[ -n $warn ]] && [[ -n $crit ]]
  85. then
  86. if [[ $CAPACITY -ge $crit ]]
  87. then error[${p}]="POOL $POOL usage is CRITICAL (${CAPACITY}%)"; fcrit=1
  88. elif [[ $CAPACITY -ge $warn && $CAPACITY -lt $crit ]]
  89. then error[$p]="POOL $POOL usage is WARNING (${CAPACITY}%)"
  90. elif [ $HEALTH != "ONLINE" ]
  91. then error[${p}]="$POOL health is $HEALTH"; fcrit=1
  92. fi
  93. # Check without thresholds
  94. else
  95. if [ $HEALTH != "ONLINE" ]
  96. then error[${p}]="$POOL health is $HEALTH"; fcrit=1
  97. fi
  98. fi
  99. perfdata[$p]="$POOL=${CAPACITY}% "
  100. let p++
  101. done
  102. if [[ ${#error[*]} -gt 0 ]]
  103. then
  104. if [[ $fcrit -eq 1 ]]; then exit_code=2; else exit_code=1; fi
  105. echo "ZFS POOL ALARM: ${error[*]}|${perfdata[*]}"; exit ${exit_code}
  106. else echo "ALL ZFS POOLS OK (${POOLS[*]})|${perfdata[*]}"; exit 0
  107. fi
  108. ## Check single pool
  109. else
  110. CAPACITY=$(zpool list -Ho capacity $pool | awk -F"%" '{print $1}')
  111. HEALTH=$(zpool list -Ho health $pool)
  112. if [[ -n $warn ]] && [[ -n $crit ]]
  113. then
  114. # Check with thresholds
  115. if [ $HEALTH != "ONLINE" ]; then echo "ZFS POOL $pool health is $HEALTH|$pool=${CAPACITY}%"; exit ${STATE_CRITICAL}
  116. elif [[ $CAPACITY -gt $crit ]]; then echo "ZFS POOL $pool usage is CRITICAL (${CAPACITY}%|$pool=${CAPACITY}%)"; exit ${STATE_CRITICAL}
  117. elif [[ $CAPACITY -gt $warn && $CAPACITY -lt $crit ]]; then echo "ZFS POOL $pool usage is WARNING (${CAPACITY}%)|$pool=${CAPACITY}%"; exit ${STATE_WARNING}
  118. else echo "ALL ZFS POOLS OK ($pool)|$pool=${CAPACITY}%"; exit ${STATE_OK}
  119. fi
  120. else
  121. # Check without thresholds
  122. if [ $HEALTH != "ONLINE" ]
  123. then echo "ZFS POOL $pool health is $HEALTH|$pool=${CAPACITY}%"; exit ${STATE_CRITICAL}
  124. else echo "ALL ZFS POOLS OK ($pool)|$pool=${CAPACITY}%"; exit ${STATE_OK}
  125. fi
  126. fi
  127. fi
  128. echo "UKNOWN - Should never reach this part"
  129. exit ${STATE_UNKNOWN}