check_snmp_process_monitor.pl 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/local/bin/perl
  2. # author: Al Tobey <albert.tobey@priority-health.com>
  3. # what: monitor a process using the host-resources mib
  4. # license: GPL - http://www.fsf.org/licenses/gpl.txt
  5. #
  6. # Todo:
  7. # * implement memory and cpu utilization checks
  8. # * maybe cache pids in DBM files if snmp agents get overworked
  9. ###############################################################################
  10. # to get a list of processes over snmp try this command:
  11. # snmptable -v2c -c public hostname hrSWRunTable
  12. # for just a list of valid arguments for the '-e' option:
  13. # snmpwalk -v2c -c public hostname hrSWRunName |perl -pe 's:.*/::'
  14. ###############################################################################
  15. use strict;
  16. require 5.6.0;
  17. use lib qw( /opt/nagios/libexec /usr/local/libexec );
  18. use utils qw(%ERRORS $TIMEOUT &print_revision &support &usage);
  19. use SNMP 5.0;
  20. use Getopt::Long;
  21. use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats %processes $snmp_session $PROGNAME $TIMEOUT );
  22. $PROGNAME = "snmp_process_monitor.pl";
  23. $opt_verbose = undef;
  24. $opt_host = undef;
  25. $opt_community = 'public';
  26. $opt_command = undef;
  27. $opt_warning = [ 1, -1 ];
  28. $opt_critical = [ 1, -1 ];
  29. $opt_memory = undef;
  30. $opt_cpu = undef;
  31. $opt_port = 161;
  32. %processes = ();
  33. $exit = 'OK';
  34. sub process_options {
  35. my( $opt_crit, $opt_warn ) = ();
  36. Getopt::Long::Configure( 'bundling' );
  37. GetOptions(
  38. 'V' => \$opt_version, 'version' => \$opt_version,
  39. 'v' => \$opt_verbose, 'verbose' => \$opt_verbose,
  40. 'h' => \$opt_help, 'help' => \$opt_help,
  41. 's' => \$opt_stats, 'statistics' => \$opt_stats,
  42. 'H:s' => \$opt_host, 'hostname:s' => \$opt_host,
  43. 'p:i' => \$opt_port, 'port:i' => \$opt_port,
  44. 'C:s' => \$opt_community, 'community:s' => \$opt_community,
  45. 'c:s' => \$opt_crit, 'critical:s' => \$opt_crit,
  46. 'w:s' => \$opt_warn, 'warning:s' => \$opt_warn,
  47. 't:i' => \$TIMEOUT, 'timeout:i' => \$TIMEOUT,
  48. 'e:s' => \$opt_command, 'command:s' => \$opt_command,
  49. 'r:s' => \$opt_regex, 'regex:s' => \$opt_regex,
  50. 'cpu:i' => \$opt_cpu, 'memory:i' => \$opt_memory,
  51. );
  52. if ( defined($opt_version) ) { local_print_revision(); }
  53. if ( defined($opt_verbose) ) { $SNMP::debugging = 1; }
  54. if ( !defined($opt_host) || defined($opt_help) || (!defined($opt_command) && !defined($opt_regex)) ) {
  55. print_help();
  56. exit $ERRORS{UNKNOWN};
  57. }
  58. if ( defined($opt_crit) ) {
  59. if ( $opt_crit =~ /,/ ) {
  60. $opt_critical = [ split(',', $opt_crit) ];
  61. }
  62. else {
  63. $opt_critical = [ $opt_crit, -1 ];
  64. }
  65. }
  66. if ( defined($opt_warn) ) {
  67. if ( $opt_warn =~ /,/ ) {
  68. $opt_warning = [ split(',', $opt_warn) ];
  69. }
  70. else {
  71. $opt_warning = [ $opt_crit, -1 ];
  72. }
  73. }
  74. }
  75. sub local_print_revision {
  76. print_revision( $PROGNAME, '$Revision$ ' )
  77. }
  78. sub print_usage {
  79. print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>]\n";
  80. }
  81. sub print_help {
  82. local_print_revision();
  83. print "Copyright (c) 2002 Al Tobey <albert.tobey\@priority-health.com>\n\n",
  84. "SNMP Process Monitor plugin for Nagios\n\n";
  85. print_usage();
  86. print <<EOT;
  87. -v, --verbose
  88. print extra debugging information
  89. -h, --help
  90. print this help message
  91. -H, --hostname=HOST
  92. name or IP address of host to check
  93. -C, --community=COMMUNITY NAME
  94. community name for the host's SNMP agent
  95. -e, --command=COMMAND NAME (ps -e style)
  96. what command should be monitored?
  97. -r, --regex=Perl RE
  98. use a perl regular expression to find your process
  99. -w, --warning=INTEGER[,INTEGER]
  100. minimum and maximum number of processes before a warning is issued (Default 1,-1)
  101. -c, --critical=INTEGER[,INTEGER]
  102. minimum and maximum number of processes before a critical is issued (Default 1,-1)
  103. EOT
  104. }
  105. sub verbose (@) {
  106. return if ( !defined($opt_verbose) );
  107. print @_;
  108. }
  109. sub check_for_errors {
  110. if ( $snmp_session->{ErrorNum} ) {
  111. print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n";
  112. exit $ERRORS{UNKNOWN};
  113. }
  114. }
  115. # =========================================================================== #
  116. # =====> MAIN
  117. # =========================================================================== #
  118. process_options();
  119. alarm( $TIMEOUT ); # make sure we don't hang Nagios
  120. $snmp_session = new SNMP::Session(
  121. DestHost => $opt_host,
  122. Community => $opt_community,
  123. RemotePort => $opt_port,
  124. Version => '2c'
  125. );
  126. my $process_count = SNMP::Varbind->new( ['hrSystemProcesses', 0] );
  127. $snmp_session->get( $process_count );
  128. check_for_errors();
  129. # retrieve the data from the remote host
  130. my( $names, $index ) = $snmp_session->bulkwalk( 0, $process_count->val, [['hrSWRunName'], ['hrSWRunIndex']] );
  131. check_for_errors();
  132. alarm( 0 ); # all done with the network connection
  133. my %namecount = ();
  134. foreach my $row ( @$names ) {
  135. $processes{$row->iid}->{name} = $row->val;
  136. $processes{$row->iid}->{name} =~ s#.*/##; # strip path
  137. if ( defined($opt_regex) ||
  138. ($row->val =~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/
  139. && $opt_command !~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/) ) {
  140. # fetch the runtime parameters of the process
  141. my $parm_var = SNMP::Varbind->new( ['hrSWRunParameters', $row->iid] );
  142. $snmp_session->get( $parm_var );
  143. check_for_errors();
  144. # only strip if we're looking for a specific command
  145. if ( defined($opt_command) ) {
  146. verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n";
  147. $processes{$row->iid}->{name} = $parm_var->val;
  148. # strip path name off the front
  149. $processes{$row->iid}->{name} =~ s#.*/##;
  150. # strip everything from the first space to the end
  151. $processes{$row->iid}->{name} =~ s/\s+.*$//;
  152. }
  153. else {
  154. # get the longer full-path style listing
  155. my $path_var = SNMP::Varbind->new( ['hrSWRunPath', $row->iid] );
  156. $snmp_session->get( $path_var );
  157. check_for_errors();
  158. # use the full 'ps -efl' style listing for regular expression matching
  159. $processes{$row->iid}->{name} = $path_var->val.' '.$parm_var->val;
  160. }
  161. }
  162. }
  163. foreach my $row ( @$index ) {
  164. $processes{$row->iid}->{pid} = $row->val;
  165. }
  166. my @pids = ();
  167. my @matches = ();
  168. foreach my $key ( keys(%processes) ) {
  169. if ( defined($opt_command) && $processes{$key}->{name} eq $opt_command ) {
  170. push( @matches, $processes{$key} );
  171. push( @pids, $processes{$key}->{pid} );
  172. verbose "process '$processes{$key}->{name}' has pid ",
  173. "$processes{$key}->{pid} and index $key\n";
  174. }
  175. elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) {
  176. push( @matches, $processes{$key} );
  177. push( @pids, $processes{$key}->{pid} );
  178. verbose "process '$processes{$key}->{name}' has pid ",
  179. "$processes{$key}->{pid} and index $key\n";
  180. }
  181. }
  182. my $count = @matches;
  183. # warning, critical
  184. if ( ($opt_warning->[0] > 0 && $opt_warning->[0] > $count)
  185. || ($opt_warning->[1] > 0 && $opt_warning->[1] <= $count) ) {
  186. $exit = 'WARNING';
  187. }
  188. if ( ($opt_critical->[0] > 0 && $opt_critical->[0] > $count)
  189. || ($opt_critical->[1] > 0 && $opt_critical->[1] <= $count) ) {
  190. $exit = 'CRITICAL';
  191. }
  192. print "$exit - $count processes with pid(s) ",join(',',@pids);
  193. # print the number of processes if statistics are requested
  194. if ( defined($opt_stats) ) {
  195. print "|count:$count\n";
  196. }
  197. else {
  198. print "\n";
  199. }
  200. exit $ERRORS{$exit};