check_apc_ups.pl 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. #! /usr/bin/perl -wT
  2. #
  3. # Check_apc_ups - Check APC UPS status via SNMP
  4. # Shamelessly copied from check_breeze.pl
  5. #
  6. # To do:
  7. # - Send SNMP queries directly, instead of forking `snmpget`.
  8. # - Make the status less verbose. Maybe we can send an "onLine, time
  9. # remaining: hh:mm:ss" if all is well, and a list of specific problems
  10. # if something is broken.
  11. BEGIN {
  12. if ($0 =~ m/^(.*?)[\/\\]([^\/\\]+)$/) {
  13. $runtimedir = $1;
  14. $PROGNAME = $2;
  15. }
  16. }
  17. use strict;
  18. use Getopt::Long;
  19. use vars qw($opt_V $opt_h $opt_H $opt_T $opt_t $opt_R $opt_r
  20. $opt_L $opt_l $PROGNAME);
  21. use lib $main::runtimedir;
  22. use utils qw(%ERRORS &print_revision &support &usage);
  23. sub print_help ();
  24. sub print_usage ();
  25. sub get_snmp_int_val ($);
  26. sub escalate_exitval ($);
  27. $ENV{'PATH'}='';
  28. $ENV{'BASH_ENV'}='';
  29. $ENV{'ENV'}='';
  30. Getopt::Long::Configure('bundling');
  31. GetOptions
  32. ("V" => \$opt_V, "version" => \$opt_V,
  33. "h" => \$opt_h, "help" => \$opt_h,
  34. "T=s" => \$opt_T, "temp-critical" => \$opt_T,
  35. "t=s" => \$opt_t, "temp-warning" => \$opt_t,
  36. "R=s" => \$opt_R, "runtime-critical" => \$opt_R,
  37. "r=s" => \$opt_r, "runtime-warning" => \$opt_r,
  38. "L=s" => \$opt_L, "load-critical" => \$opt_L,
  39. "l=s" => \$opt_l, "load-warning" => \$opt_l,
  40. "H=s" => \$opt_H, "hostname=s" => \$opt_H);
  41. if ($opt_V) {
  42. print_revision($PROGNAME,'$Revision$');
  43. exit $ERRORS{'OK'};
  44. }
  45. if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
  46. ($opt_H) || ($opt_H = shift) || usage("Host name/address not specified\n");
  47. my $host = $1 if ($opt_H =~ /([-.A-Za-z0-9]+)/);
  48. ($host) || usage("Invalid host: $opt_H\n");
  49. # Defaults
  50. $opt_R *= 60 * 100 if (defined $opt_R); # Convert minutes to secs/100
  51. $opt_r *= 60 * 100 if (defined $opt_R);
  52. my $tempcrit = $opt_T || 60;
  53. my $tempwarn = $opt_t || 40;
  54. my $runtimecrit = $opt_R || 30 * 60 * 100; # Secs / 100
  55. my $runtimewarn = $opt_r || 60 * 60 * 100;
  56. my $loadcrit = $opt_L || 85;
  57. my $loadwarn = $opt_l || 50;
  58. if ($tempcrit !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
  59. if ($tempwarn !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
  60. if ($runtimecrit !~ /\d+/) {
  61. usage ("Invalid critical run time threshold.\n");
  62. }
  63. if ($runtimewarn !~ /\d+/) {
  64. usage ("Invalid warning run time threshold.\n");
  65. }
  66. if ($loadcrit !~ /\d+/ || $loadcrit < 0 || $loadcrit > 100) {
  67. usage ("Invalid critical load threshold.\n");
  68. }
  69. if ($loadwarn !~ /\d+/ || $loadwarn < 0 || $loadwarn > 100) {
  70. usage ("Invalid warning load threshold.\n");
  71. }
  72. # APC UPS OIDs
  73. # APC MIBs are available at ftp://ftp.apcftp.com/software/pnetmib/mib
  74. my $upsBasicOutputStatus = ".1.3.6.1.4.1.318.1.1.1.4.1.1.0";
  75. my $upsBasicBatteryStatus = ".1.3.6.1.4.1.318.1.1.1.2.1.1.0";
  76. my $upsAdvInputLineFailCause = ".1.3.6.1.4.1.318.1.1.1.3.2.5.0";
  77. my $upsAdvBatteryTemperature = ".1.3.6.1.4.1.318.1.1.1.2.2.2.0";
  78. my $upsAdvBatteryRunTimeRemaining = ".1.3.6.1.4.1.318.1.1.1.2.2.3.0";
  79. my $upsAdvBatteryReplaceIndicator = ".1.3.6.1.4.1.318.1.1.1.2.2.4.0";
  80. my $upsAdvOutputLoad = ".1.3.6.1.4.1.318.1.1.1.4.2.3.0";
  81. my $upsAdvTestDiagnosticsResults = ".1.3.6.1.4.1.318.1.1.1.7.2.3.0";
  82. my @outputStatVals = (
  83. [ undef, undef ], # pad 0
  84. [ undef, undef ], # pad 1
  85. [ "onLine", $ERRORS{'OK'} ], # 2
  86. [ "onBattery", $ERRORS{'WARNING'} ], # 3
  87. [ "onSmartBoost", $ERRORS{'WARNING'} ], # 4
  88. [ "timedSleeping", $ERRORS{'WARNING'} ], # 5
  89. [ "softwareBypass", $ERRORS{'WARNING'} ], # 6
  90. [ "off", $ERRORS{'CRITICAL'} ], # 7
  91. [ "rebooting", $ERRORS{'WARNING'} ], # 8
  92. [ "switchedBypass", $ERRORS{'WARNING'} ], # 9
  93. [ "hardwareFailureBypass", $ERRORS{'CRITICAL'} ], # 10
  94. [ "sleepingUntilPowerReturn", $ERRORS{'CRITICAL'} ], # 11
  95. [ "onSmartTrim", $ERRORS{'WARNING'} ], # 12
  96. );
  97. my @failCauseVals = (
  98. undef,
  99. "noTransfer",
  100. "highLineVoltage",
  101. "brownout",
  102. "blackout",
  103. "smallMomentarySag",
  104. "deepMomentarySag",
  105. "smallMomentarySpike",
  106. "largeMomentarySpike",
  107. "selfTest",
  108. "rateOfVoltageChnage",
  109. );
  110. my @battStatVals = (
  111. [ undef, undef ], # pad 0
  112. [ undef, undef ], # pad 1
  113. [ "batteryNormal", $ERRORS{'OK'} ], # 2
  114. [ "batteryLow", $ERRORS{'CRITICAL'} ], # 3
  115. );
  116. my @battReplVals = (
  117. [ undef, undef ], # pad 0
  118. [ "noBatteryNeedsReplacing", $ERRORS{'OK'} ], # 1
  119. [ "batteryNeedsReplacing", $ERRORS{'CRITICAL'} ], # 2
  120. );
  121. my @diagnosticsResultsVals = (
  122. [ undef, undef ], # pad 0
  123. [ "OK", $ERRORS{'OK'} ], # 1
  124. [ "failed", $ERRORS{'CRITICAL'} ], # 2
  125. [ "invalidTest", $ERRORS{'CRITICAL'} ], # 3
  126. [ "testInProgress", $ERRORS{'OK'} ], # 4
  127. );
  128. my $exitval = $ERRORS{'UNKNOWN'};
  129. my $data;
  130. my $onbattery = 3;
  131. $data = get_snmp_int_val( $upsBasicOutputStatus );
  132. print "Output status: ";
  133. if (defined ($data) && defined ($outputStatVals[$data][0])) {
  134. print "$outputStatVals[$data][0] | ";
  135. escalate_exitval($outputStatVals[$data][1]);
  136. } else {
  137. print "unknown | ";
  138. }
  139. $data = get_snmp_int_val( $upsAdvBatteryRunTimeRemaining );
  140. print "Rem time: ";
  141. if (defined ($data)) {
  142. my $hrs = int($data / (60 * 60 * 100)); # Data is hundredths of a second
  143. my $mins = int($data / (60 * 100)) % 60;
  144. my $secs = ($data % 100) / 100;
  145. printf "%d:%02d:%05.2f | ", $hrs, $mins, $secs;
  146. if ($data <= $runtimecrit) {
  147. escalate_exitval($ERRORS{'CRITICAL'});
  148. } elsif ($data <= $runtimewarn) {
  149. escalate_exitval($ERRORS{'WARNING'});
  150. } else {
  151. escalate_exitval($ERRORS{'OK'});
  152. }
  153. } else {
  154. print "unknown | ";
  155. }
  156. $data = get_snmp_int_val( $upsBasicBatteryStatus );
  157. print "Battery status: ";
  158. if (defined ($data) && defined ($battStatVals[$data][0])) {
  159. my $failcause = "unknown";
  160. my $fc = get_snmp_int_val( $upsAdvInputLineFailCause );
  161. if ($data == $onbattery) {
  162. if (defined ($failCauseVals[$fc])) { $failcause = $failCauseVals[$fc]; }
  163. print "$battStatVals[$data][0] ($failcause) | ";
  164. } else {
  165. print "$battStatVals[$data][0] | ";
  166. }
  167. escalate_exitval($battStatVals[$data][1]);
  168. } else {
  169. print "unknown | ";
  170. }
  171. $data = get_snmp_int_val( $upsAdvBatteryTemperature );
  172. print "Battery temp(C): ";
  173. if (defined ($data)) {
  174. print "$data | ";
  175. if ($data >= $tempcrit) {
  176. escalate_exitval($ERRORS{'CRITICAL'});
  177. } elsif ($data >= $tempwarn) {
  178. escalate_exitval($ERRORS{'WARNING'});
  179. } else {
  180. escalate_exitval($ERRORS{'OK'});
  181. }
  182. } else {
  183. print "unknown | ";
  184. }
  185. $data = get_snmp_int_val( $upsAdvBatteryReplaceIndicator );
  186. print "Battery repl: ";
  187. if (defined ($data) && defined ($battReplVals[$data][0])) {
  188. print "$battReplVals[$data][0] | ";
  189. escalate_exitval($battReplVals[$data][1]);
  190. } else {
  191. print "unknown | ";
  192. }
  193. $data = get_snmp_int_val( $upsAdvOutputLoad );
  194. print "Output load (%): ";
  195. if (defined ($data)) {
  196. print "$data | ";
  197. if ($data >= $loadcrit) {
  198. escalate_exitval($ERRORS{'CRITICAL'});
  199. } elsif ($data >= $loadwarn) {
  200. escalate_exitval($ERRORS{'WARNING'});
  201. } else {
  202. escalate_exitval($ERRORS{'OK'});
  203. }
  204. } else {
  205. print "unknown | ";
  206. }
  207. $data = get_snmp_int_val( $upsAdvTestDiagnosticsResults );
  208. print "Diag result: ";
  209. if (defined ($data) && defined ($diagnosticsResultsVals[$data][0])) {
  210. print "$diagnosticsResultsVals[$data][0]\n";
  211. escalate_exitval($diagnosticsResultsVals[$data][1]);
  212. } else {
  213. print "unknown\n";
  214. }
  215. exit $exitval;
  216. sub print_usage () {
  217. print "Usage: $PROGNAME -H <host> -T temp -t temp -R minutes -r minutes\n";
  218. print " -L percent -l percent\n";
  219. }
  220. sub print_help () {
  221. print_revision($PROGNAME,'$Revision$');
  222. print "Copyright (c) 2001 Gerald Combs/Jeffrey Blank/Karl DeBisschop
  223. This plugin reports the status of an APC UPS equipped with an SNMP management
  224. module.
  225. ";
  226. print_usage();
  227. print "
  228. -H, --hostname=HOST
  229. Name or IP address of host to check
  230. -T --temp-critical
  231. Battery degrees C above which a CRITICAL status will result (default: 60)
  232. -t --temp-warning
  233. Battery degrees C above which a WARNING status will result (default: 40)
  234. -R --runtime-critical
  235. Minutes remaining below which a CRITICAL status will result (default: 30)
  236. -r --runtime-warning
  237. Minutes remaining below which a WARNING status will result (default: 60)
  238. -L --load-critical
  239. Output load pct above which a CRITICAL status will result (default: 85
  240. -l --load-warning
  241. Output load pct above which a WARNING status will result (default: 50
  242. ";
  243. support();
  244. }
  245. sub get_snmp_int_val ($) {
  246. my $val=0;
  247. my $oid = shift(@_);
  248. $val = `/usr/bin/snmpget $host public $oid 2> /dev/null`;
  249. my @test = split(/ /,$val,3);
  250. return undef unless (defined ($test[2]));
  251. if ($test[2] =~ /\(\d+\)/) { # Later versions of UCD SNMP
  252. ($val) = ($test[2] =~ /\((\d+)\)/);
  253. } elsif ($test[2] =~ /: \d+/) {
  254. ($val) = ($test[2] =~ /: (\d+)/);
  255. } else {
  256. $val = $test[2];
  257. }
  258. return $val;
  259. }
  260. sub escalate_exitval ($) {
  261. my $newval = shift(@_);
  262. if ($newval > $exitval) { $exitval = $newval; }
  263. }