check_apc_ups.pl 8.5 KB


  1. #! /usr/bin/perl -wT
  2. #
  3. # Check_apc_ups - Check APC UPS status via SNMP
  4. # Shamelessly copied from check_breeze.pl
  5. #
  6. # To do:
  7. # - Send SNMP queries directly, instead of forking `snmpget`.
  8. # - Make the status less verbose. Maybe we can send an "onLine, time
  9. # remaining: hh:mm:ss" if all is well, and a list of specific problems
  10. # if something is broken.
  11. use strict;
  12. use Getopt::Long;
  13. use vars qw($opt_V $opt_h $opt_H $opt_T $opt_t $opt_R $opt_r
  14. $opt_L $opt_l $PROGNAME);
  15. use lib "/usr/local/nagios/libexec";
  16. use utils qw(%ERRORS &print_revision &support &usage);
  17. sub print_help ();
  18. sub print_usage ();
  19. sub get_snmp_int_val ($);
  20. sub escalate_exitval ($);
  21. $ENV{'PATH'}='';
  22. $ENV{'BASH_ENV'}='';
  23. $ENV{'ENV'}='';
  24. Getopt::Long::Configure('bundling');
  25. GetOptions
  26. ("V" => \$opt_V, "version" => \$opt_V,
  27. "h" => \$opt_h, "help" => \$opt_h,
  28. "T=s" => \$opt_T, "temp-critical" => \$opt_T,
  29. "t=s" => \$opt_t, "temp-warning" => \$opt_t,
  30. "R=s" => \$opt_R, "runtime-critical" => \$opt_R,
  31. "r=s" => \$opt_r, "runtime-warning" => \$opt_r,
  32. "L=s" => \$opt_L, "load-critical" => \$opt_L,
  33. "l=s" => \$opt_l, "load-warning" => \$opt_l,
  34. "H=s" => \$opt_H, "hostname=s" => \$opt_H);
  35. if ($opt_V) {
  36. print_revision($PROGNAME,'$Revision: 1771 $');
  37. exit $ERRORS{'OK'};
  38. }
  39. if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
  40. ($opt_H) || ($opt_H = shift) || usage("Host name/address not specified\n");
  41. my $host = $1 if ($opt_H =~ /([-.A-Za-z0-9]+)/);
  42. ($host) || usage("Invalid host: $opt_H\n");
  43. # Defaults
  44. $opt_R *= 60 * 100 if (defined $opt_R); # Convert minutes to secs/100
  45. $opt_r *= 60 * 100 if (defined $opt_R);
  46. my $tempcrit = $opt_T || 60;
  47. my $tempwarn = $opt_t || 40;
  48. my $runtimecrit = $opt_R || 30 * 60 * 100; # Secs / 100
  49. my $runtimewarn = $opt_r || 60 * 60 * 100;
  50. my $loadcrit = $opt_L || 85;
  51. my $loadwarn = $opt_l || 50;
  52. if ($tempcrit !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
  53. if ($tempwarn !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
  54. if ($runtimecrit !~ /\d+/) {
  55. usage ("Invalid critical run time threshold.\n");
  56. }
  57. if ($runtimewarn !~ /\d+/) {
  58. usage ("Invalid warning run time threshold.\n");
  59. }
  60. if ($loadcrit !~ /\d+/ || $loadcrit < 0 || $loadcrit > 100) {
  61. usage ("Invalid critical load threshold.\n");
  62. }
  63. if ($loadwarn !~ /\d+/ || $loadwarn < 0 || $loadwarn > 100) {
  64. usage ("Invalid warning load threshold.\n");
  65. }
  66. # APC UPS OIDs
  67. # APC MIBs are available at ftp://ftp.apcftp.com/software/pnetmib/mib
  68. my $upsBasicOutputStatus = ".1.3.6.1.4.1.318.1.1.1.4.1.1.0";
  69. my $upsBasicBatteryStatus = ".1.3.6.1.4.1.318.1.1.1.2.1.1.0";
  70. my $upsAdvInputLineFailCause = ".1.3.6.1.4.1.318.1.1.1.3.2.5.0";
  71. my $upsAdvBatteryTemperature = ".1.3.6.1.4.1.318.1.1.1.2.2.2.0";
  72. my $upsAdvBatteryRunTimeRemaining = ".1.3.6.1.4.1.318.1.1.1.2.2.3.0";
  73. my $upsAdvBatteryReplaceIndicator = ".1.3.6.1.4.1.318.1.1.1.2.2.4.0";
  74. my $upsAdvOutputLoad = ".1.3.6.1.4.1.318.1.1.1.4.2.3.0";
  75. my $upsAdvTestDiagnosticsResults = ".1.3.6.1.4.1.318.1.1.1.7.2.3.0";
  76. my @outputStatVals = (
  77. [ undef, undef ], # pad 0
  78. [ undef, undef ], # pad 1
  79. [ "onLine", $ERRORS{'OK'} ], # 2
  80. [ "onBattery", $ERRORS{'WARNING'} ], # 3
  81. [ "onSmartBoost", $ERRORS{'WARNING'} ], # 4
  82. [ "timedSleeping", $ERRORS{'WARNING'} ], # 5
  83. [ "softwareBypass", $ERRORS{'WARNING'} ], # 6
  84. [ "off", $ERRORS{'CRITICAL'} ], # 7
  85. [ "rebooting", $ERRORS{'WARNING'} ], # 8
  86. [ "switchedBypass", $ERRORS{'WARNING'} ], # 9
  87. [ "hardwareFailureBypass", $ERRORS{'CRITICAL'} ], # 10
  88. [ "sleepingUntilPowerReturn", $ERRORS{'CRITICAL'} ], # 11
  89. [ "onSmartTrim", $ERRORS{'WARNING'} ], # 12
  90. );
  91. my @failCauseVals = (
  92. undef,
  93. "noTransfer",
  94. "highLineVoltage",
  95. "brownout",
  96. "blackout",
  97. "smallMomentarySag",
  98. "deepMomentarySag",
  99. "smallMomentarySpike",
  100. "largeMomentarySpike",
  101. "selfTest",
  102. "rateOfVoltageChnage",
  103. );
  104. my @battStatVals = (
  105. [ undef, undef ], # pad 0
  106. [ undef, undef ], # pad 1
  107. [ "batteryNormal", $ERRORS{'OK'} ], # 2
  108. [ "batteryLow", $ERRORS{'CRITICAL'} ], # 3
  109. );
  110. my @battReplVals = (
  111. [ undef, undef ], # pad 0
  112. [ "noBatteryNeedsReplacing", $ERRORS{'OK'} ], # 1
  113. [ "batteryNeedsReplacing", $ERRORS{'CRITICAL'} ], # 2
  114. );
  115. my @diagnosticsResultsVals = (
  116. [ undef, undef ], # pad 0
  117. [ "OK", $ERRORS{'OK'} ], # 1
  118. [ "failed", $ERRORS{'CRITICAL'} ], # 2
  119. [ "invalidTest", $ERRORS{'CRITICAL'} ], # 3
  120. [ "testInProgress", $ERRORS{'OK'} ], # 4
  121. );
  122. my $exitval = $ERRORS{'UNKNOWN'};
  123. my $data;
  124. my $onbattery = 3;
  125. $data = get_snmp_int_val( $upsBasicOutputStatus );
  126. print "Output status: ";
  127. if (defined ($data) && defined ($outputStatVals[$data][0])) {
  128. print "$outputStatVals[$data][0] | ";
  129. escalate_exitval($outputStatVals[$data][1]);
  130. } else {
  131. print "unknown | ";
  132. }
  133. $data = get_snmp_int_val( $upsAdvBatteryRunTimeRemaining );
  134. print "Rem time: ";
  135. if (defined ($data)) {
  136. my $hrs = int($data / (60 * 60 * 100)); # Data is hundredths of a second
  137. my $mins = int($data / (60 * 100)) % 60;
  138. my $secs = ($data % 100) / 100;
  139. printf "%d:%02d:%05.2f | ", $hrs, $mins, $secs;
  140. if ($data <= $runtimecrit) {
  141. escalate_exitval($ERRORS{'CRITICAL'});
  142. } elsif ($data <= $runtimewarn) {
  143. escalate_exitval($ERRORS{'WARNING'});
  144. } else {
  145. escalate_exitval($ERRORS{'OK'});
  146. }
  147. } else {
  148. print "unknown | ";
  149. }
  150. $data = get_snmp_int_val( $upsBasicBatteryStatus );
  151. print "Battery status: ";
  152. if (defined ($data) && defined ($battStatVals[$data][0])) {
  153. my $failcause = "unknown";
  154. my $fc = get_snmp_int_val( $upsAdvInputLineFailCause );
  155. if ($data == $onbattery) {
  156. if (defined ($failCauseVals[$fc])) { $failcause = $failCauseVals[$fc]; }
  157. print "$battStatVals[$data][0] ($failcause) | ";
  158. } else {
  159. print "$battStatVals[$data][0] | ";
  160. }
  161. escalate_exitval($battStatVals[$data][1]);
  162. } else {
  163. print "unknown | ";
  164. }
  165. $data = get_snmp_int_val( $upsAdvBatteryTemperature );
  166. print "Battery temp(C): ";
  167. if (defined ($data)) {
  168. print "$data | ";
  169. if ($data >= $tempcrit) {
  170. escalate_exitval($ERRORS{'CRITICAL'});
  171. } elsif ($data >= $tempwarn) {
  172. escalate_exitval($ERRORS{'WARNING'});
  173. } else {
  174. escalate_exitval($ERRORS{'OK'});
  175. }
  176. } else {
  177. print "unknown | ";
  178. }
  179. $data = get_snmp_int_val( $upsAdvBatteryReplaceIndicator );
  180. print "Battery repl: ";
  181. if (defined ($data) && defined ($battReplVals[$data][0])) {
  182. print "$battReplVals[$data][0] | ";
  183. escalate_exitval($battReplVals[$data][1]);
  184. } else {
  185. print "unknown | ";
  186. }
  187. $data = get_snmp_int_val( $upsAdvOutputLoad );
  188. print "Output load (%): ";
  189. if (defined ($data)) {
  190. print "$data | ";
  191. if ($data >= $loadcrit) {
  192. escalate_exitval($ERRORS{'CRITICAL'});
  193. } elsif ($data >= $loadwarn) {
  194. escalate_exitval($ERRORS{'WARNING'});
  195. } else {
  196. escalate_exitval($ERRORS{'OK'});
  197. }
  198. } else {
  199. print "unknown | ";
  200. }
  201. $data = get_snmp_int_val( $upsAdvTestDiagnosticsResults );
  202. print "Diag result: ";
  203. if (defined ($data) && defined ($diagnosticsResultsVals[$data][0])) {
  204. print "$diagnosticsResultsVals[$data][0]\n";
  205. escalate_exitval($diagnosticsResultsVals[$data][1]);
  206. } else {
  207. print "unknown\n";
  208. }
  209. exit $exitval;
  210. sub print_usage () {
  211. print "Usage: $PROGNAME -H <host> -T temp -t temp -R minutes -r minutes\n";
  212. print " -L percent -l percent\n";
  213. }
  214. sub print_help () {
  215. print_revision($PROGNAME,'$Revision: 1771 $');
  216. print "Copyright (c) 2001 Gerald Combs/Jeffrey Blank/Karl DeBisschop
  217. This plugin reports the status of an APC UPS equipped with an SNMP management
  218. module.
  219. ";
  220. print_usage();
  221. print "
  222. -H, --hostname=HOST
  223. Name or IP address of host to check
  224. -T --temp-critical
  225. Battery degrees C above which a CRITICAL status will result (default: 60)
  226. -t --temp-warning
  227. Battery degrees C above which a WARNING status will result (default: 40)
  228. -R --runtime-critical
  229. Minutes remaining below which a CRITICAL status will result (default: 30)
  230. -r --runtime-warning
  231. Minutes remaining below which a WARNING status will result (default: 60)
  232. -L --load-critical
  233. Output load pct above which a CRITICAL status will result (default: 85
  234. -l --load-warning
  235. Output load pct above which a WARNING status will result (default: 50
  236. ";
  237. support();
  238. }
  239. sub get_snmp_int_val ($) {
  240. my $val=0;
  241. my $oid = shift(@_);
  242. $val = `/usr/bin/snmpget $host public $oid 2> /dev/null`;
  243. my @test = split(/ /,$val,3);
  244. return undef unless (defined ($test[2]));
  245. if ($test[2] =~ /\(\d+\)/) { # Later versions of UCD SNMP
  246. ($val) = ($test[2] =~ /\((\d+)\)/);
  247. } elsif ($test[2] =~ /: \d+/) {
  248. ($val) = ($test[2] =~ /: (\d+)/);
  249. } else {
  250. $val = $test[2];
  251. }
  252. return $val;
  253. }
  254. sub escalate_exitval ($) {
  255. my $newval = shift(@_);
  256. if ($newval > $exitval) { $exitval = $newval; }
  257. }