|
|
@@ -0,0 +1,666 @@
|
|
|
+#!/usr/bin/perl -w
|
|
|
+
|
|
|
+# check_status.pl Nagios Plugin - Version 1.3
|
|
|
+# Last Updated: 1/9/2003
|
|
|
+#
|
|
|
+# Report any bugs/questions to Russell Scibetti at russell@quadrix.com
|
|
|
+#
|
|
|
+# check_status Change Log:
|
|
|
+#
|
|
|
+# To do for 1.4
|
|
|
+# - Better help and documentation (separate doc?)
|
|
|
+# - Take argument (patterns to match) from a separate spec file
|
|
|
+#
|
|
|
+# New Addition to 1.3
|
|
|
+# - Added ChangeLog information and updated --help output
|
|
|
+# - hostdown (hd) argument for how a service check should respond
|
|
|
+# when its host is Down/Unreachable
|
|
|
+# (--hostdown="ok|warning|critical|unknown")
|
|
|
+# - Changed name from check_state to check_status
|
|
|
+# - Set hostdown to default to OK when the argument isn't specified
|
|
|
+# - Number of Hosts checked is now output in OK result
|
|
|
+#
|
|
|
+# Version 1.2 additions:
|
|
|
+#
|
|
|
+# - Added ability to handle ack'd and downtimed services differently
|
|
|
+# depending on argument provided
|
|
|
+# (--ack="ok|warning|critical|unknown|down|unreachable"
|
|
|
+# --dt="ok|warning|critical|unknown|down|unreachable")
|
|
|
+#
|
|
|
+# Version 1.1 additions:
|
|
|
+#
|
|
|
+# - Added --host=<regex>, --servhost=<regex> to allow for specific field
|
|
|
+# matching (host for matching hostname in host checks, servhost for
|
|
|
+# matching the hostname in service checks, service for matching the
|
|
|
+# service name in service checks)
|
|
|
+# - Output the number of OK services for an OK output
|
|
|
+#
|
|
|
+# Version 1.0 features:
|
|
|
+#
|
|
|
+# - Freshness check of status.log (timestamp)
|
|
|
+# - Match service or host checks
|
|
|
+# - Can ignore acknowledged or downtimes services/hosts (--ack, --dt)
|
|
|
+# - Can output different levels of detail dependent on # of problems
|
|
|
+# - Can check for number of critical, warning, or unknowns
|
|
|
+#
|
|
|
+#############################################################
|
|
|
+
|
|
|
+use Getopt::Long;
|
|
|
+use File::stat;
|
|
|
+
|
|
|
+Getopt::Long::Configure('bundling');
|
|
|
+
|
|
|
+GetOptions
|
|
|
+ ("V" => \$version, "version" => \$version,
|
|
|
+ "h" => \$help, "help" => \$help,
|
|
|
+ "v" => \$verbose, "verbose" => \$verbose,
|
|
|
+ "w=s" => \$warning, "warning=s" => \$warning,
|
|
|
+ "c=s" => \$critical, "critical=s" => \$critical,
|
|
|
+ "u=s" => \$unknown, "unknown=s" => \$unknown,
|
|
|
+ "p=s" => \$pattern, "pattern=s" => \$pattern,
|
|
|
+ "S:s" => \$service, "service:s" => \$service,
|
|
|
+ "s=s" => \$status, "status=s" => \$status,
|
|
|
+ "d=s" => \$dir, "dir=s" => \$dir,
|
|
|
+ "D=s" => \$details, "details=s" => \$details,
|
|
|
+ "H:s" => \$host, "host:s" => \$host,
|
|
|
+ "f=s" => \$freshness, "freshness=s" => \$freshness,
|
|
|
+ "servhost=s" => \$servhost,
|
|
|
+ "a:s" => \$ack, "ack:s" => \$ack,
|
|
|
+ "dt:s"=> \$dt, "downtime:s" => \$dt,
|
|
|
+ "hd:s"=> \$hdown, "hostdown:s" => \$hdown,
|
|
|
+ "ok" => \$ok);
|
|
|
+
|
|
|
+#Constants:
|
|
|
+my $OK = 0;
|
|
|
+my $WARNING = 1;
|
|
|
+my $CRITICAL = 2;
|
|
|
+my $UNKNOWN = 3;
|
|
|
+
|
|
|
+my $crit="CRITICAL";
|
|
|
+my $warn="WARNING";
|
|
|
+my $unk="UNKNOWN";
|
|
|
+my $down="DOWN";
|
|
|
+my $unreach="UNREACHABLE";
|
|
|
+
|
|
|
+# Print out Help information
|
|
|
+if ($help) {
|
|
|
+ printVersion();
|
|
|
+ printHelp();
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+}
|
|
|
+
|
|
|
+# Print out version information
|
|
|
+if ($version) {
|
|
|
+ printVersion();
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+}
|
|
|
+
|
|
|
+# Check for status log or directory argument or print usage
|
|
|
+if (!$status) {
|
|
|
+ if (!$dir) {
|
|
|
+ print "Usage: $0 -s <status file> | -d <Nagios log dir>\n";
|
|
|
+ print "Use the --help option for full list of arguments\n";
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+ }
|
|
|
+ elsif ($dir =~ m#[^/]/$#) {
|
|
|
+ $status = $dir . "status.log";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $status = $dir . "/status.log";
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+if (defined $host) {
|
|
|
+ if (!$host) {
|
|
|
+ $host="[^\\s]*";
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+if (!$host && !$servhost) {
|
|
|
+ $servhost="[^\\s]*";
|
|
|
+}
|
|
|
+
|
|
|
+if (!$host && !$service) {
|
|
|
+ $service="[^\\s]*";
|
|
|
+}
|
|
|
+
|
|
|
+if (defined $ack) {
|
|
|
+ if (!$ack) {
|
|
|
+ $ack="ok";
|
|
|
+ }
|
|
|
+ elsif (!($ack =~ "ok|critical|warning|unknown|down|unreachable")) {
|
|
|
+ print "Invalid value for ack\n";
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+if (defined $dt) {
|
|
|
+ if (!$dt) {
|
|
|
+ $dt="ok";
|
|
|
+ }
|
|
|
+ elsif (!($dt =~ "ok|critical|warning|unknown|down|unreachable")) {
|
|
|
+ print "Invalid value for dt\n";
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+if (defined $hdown) {
|
|
|
+ if (!$hdown) {
|
|
|
+ $hdown="ok";
|
|
|
+ }
|
|
|
+ elsif (!($hdown =~ "ok|critical|warning|unknown|down|unreachable")) {
|
|
|
+ print "Invalid value for hostdown\n";
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+my $much_details = 0;
|
|
|
+
|
|
|
+my $ServiceNotOK = "CRITICAL|WARNING|UNKNOWN";
|
|
|
+my $HostNotOK = "DOWN|UNREACHABLE";
|
|
|
+
|
|
|
+my %numprob = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
|
|
+
|
|
|
+my $CritOnly = 0;
|
|
|
+my $WarnOnly = 0;
|
|
|
+my $UnkOnly = 0;
|
|
|
+
|
|
|
+my @wlev;
|
|
|
+my @clev;
|
|
|
+my @ulev;
|
|
|
+my %warnlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
|
|
+my %critlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
|
|
+my %unklevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
|
|
+my %hostlevel = ("DOWN",0,"UNREACHABLE",0);
|
|
|
+
|
|
|
+# Store Hosts in downtime
|
|
|
+my @hostdowntime;
|
|
|
+my $numdowntime = 0;
|
|
|
+
|
|
|
+# Store Hosts in a Down/Unreachable state
|
|
|
+my @hostdown;
|
|
|
+my $numdown = 0;
|
|
|
+
|
|
|
+# Hash for storing state-change to OK times for hosts:
|
|
|
+my %hostoktimes;
|
|
|
+
|
|
|
+# Number of matches in parsing
|
|
|
+my $nummatch = 0;
|
|
|
+
|
|
|
+if ($warning) {
|
|
|
+ if ($warning =~ /,/) {
|
|
|
+ @wlev = split /,/,$warning;
|
|
|
+ $warnlevel{"WARNING"} = $wlev[0];
|
|
|
+ $warnlevel{"CRITICAL"} = $wlev[1];
|
|
|
+ if ($wlev[2] ) {
|
|
|
+ $warnlevel{"UNKNOWN"} = $wlev[2];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $WarnOnly = $warning;
|
|
|
+ }
|
|
|
+}
|
|
|
+else {
|
|
|
+ $WarnOnly = 1;
|
|
|
+}
|
|
|
+
|
|
|
+if ($critical) {
|
|
|
+ if ($critical =~ /,/) {
|
|
|
+ @clev = split /,/,$critical;
|
|
|
+ $critlevel{"WARNING"} = $clev[0];
|
|
|
+ $critlevel{"CRITICAL"} = $clev[1];
|
|
|
+ if ($clev[2] ) {
|
|
|
+ $critlevel{"UNKNOWN"} = $clev[2];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $CritOnly = $critical;
|
|
|
+ }
|
|
|
+}
|
|
|
+else {
|
|
|
+ $CritOnly = 1;
|
|
|
+}
|
|
|
+
|
|
|
+if ($unknown) {
|
|
|
+ if ($unknown =~ /,/) {
|
|
|
+ @ulev = split /,/,$unknown;
|
|
|
+ $unklevel{"WARNING"} = $ulev[0];
|
|
|
+ $unklevel{"CRITICAL"} = $ulev[1];
|
|
|
+ if ($ulev[2] ) {
|
|
|
+ $unklevel{"UNKNOWN"} = $ulev[2];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $UnkOnly = $unknown;
|
|
|
+ }
|
|
|
+}
|
|
|
+else {
|
|
|
+ $UnkOnly = 1;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+if (!$freshness) {
|
|
|
+ $freshness = 30 * 60;
|
|
|
+}
|
|
|
+else {
|
|
|
+ $freshness = $freshness * 60;
|
|
|
+}
|
|
|
+
|
|
|
+my %ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
|
|
+my %much_ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
|
|
+
|
|
|
+my %output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
|
|
|
+my %much_output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
|
|
|
+
|
|
|
+if ($details) {
|
|
|
+ if ($details =~ /,/) {
|
|
|
+ my @tempv = split /,/,$details;
|
|
|
+ $much_details = $tempv[0];
|
|
|
+ $details = $tempv[1];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+open("sta","$status") || die "Cannot open status file $status!";
|
|
|
+
|
|
|
+$curr_time = time;
|
|
|
+$file_time = stat($status)->mtime;
|
|
|
+
|
|
|
+if ($curr_time - $file_time > $freshness) {
|
|
|
+ printf "State CRITICAL - Status file is stale!!!\n";
|
|
|
+ exitcheck($CRITICAL);
|
|
|
+}
|
|
|
+
|
|
|
+while(<sta>) {
|
|
|
+ chomp;
|
|
|
+ if (/^[^\s]+[\s]+HOST;/) {
|
|
|
+ @hdata = split /;/,$_;
|
|
|
+
|
|
|
+# If you care about matching hosts (not services):
|
|
|
+ if ($host && $hdata[1] =~ /$host/) {
|
|
|
+ $nummatch++;
|
|
|
+ if ( $hdata[2] =~ /$HostNotOK/ ) {
|
|
|
+ addproblem($_,$hdata[2]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+# If you are matching services, gather host information:
|
|
|
+ else {
|
|
|
+ if ( $hdata[2] =~ /$HostNotOK/ ) {
|
|
|
+ $hostdown[$numdown] = $hdata[1];
|
|
|
+ $numdown++;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $hostoktimes{$hdata[1]} = $hdata[4];
|
|
|
+ }
|
|
|
+ if ( $hdata[17] ne "0" ) {
|
|
|
+ $hostdowntime[$numdowntime] = $hdata[1];
|
|
|
+ $numdowntime++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif (!$host && /^[^\s]+[\s]+SERVICE;/) {
|
|
|
+ @servdata = split /;/,$_;
|
|
|
+ if ( ( $pattern && ($_ =~ /$pattern/)) ||
|
|
|
+ (($servdata[1] =~ /$servhost/) && ($servdata[2] =~ /$service/)) ){
|
|
|
+ $nummatch++;
|
|
|
+ if (($servdata[5] eq "HARD") && ($servdata[3] =~ /$ServiceNotOK/)) {
|
|
|
+ addproblem($_,$servdata[3]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+close("sta");
|
|
|
+
|
|
|
+if ($nummatch==0) {
|
|
|
+ print "Nothing Matches your criteria!\n";
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+}
|
|
|
+
|
|
|
+# Count the number of problems (for reference):
|
|
|
+if ($host) {
|
|
|
+ $total = $numprob{"DOWN"} + $numprob{"UNREACHABLE"};
|
|
|
+}
|
|
|
+else {
|
|
|
+ $total = $numprob{"WARNING"} + $numprob{"CRITICAL"} + $numprob{"UNKNOWN"};
|
|
|
+}
|
|
|
+
|
|
|
+my $numok = $nummatch - $total;
|
|
|
+
|
|
|
+# If this is a host state check:
|
|
|
+if ($host) {
|
|
|
+ if ($numprob{"DOWN"}>0 || $numprob{"UNREACHABLE"}>0 ) {
|
|
|
+ if ($details && ($total <= $details)) {
|
|
|
+ print "State CRITICAL - $total Host Problems: $output{$down} $output{$unreach}\n";
|
|
|
+ exitcheck($CRITICAL);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ print "State CRITICAL - $numprob{$down} Hosts Down, $numprob{$unreach} Hosts Unreachable\n";
|
|
|
+ exitcheck($CRITICAL);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ print "State OK - $numok Hosts Up, $total Problems\n";
|
|
|
+ exitcheck($OK);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#If you only defined a Critical level in terms of # of criticals...
|
|
|
+elsif ($CritOnly && ($numprob{"CRITICAL"} >= $CritOnly)) {
|
|
|
+ countAndPrint($crit,$numprob{$crit},0);
|
|
|
+ exitcheck($CRITICAL);
|
|
|
+}
|
|
|
+
|
|
|
+#Critical in terms on # criticals and # warnings...
|
|
|
+elsif (!$CritOnly && ($numprob{"WARNING"} >= $critlevel{"WARNING"} ||
|
|
|
+ $numprob{"CRITICAL"} >= $critlevel{"CRITICAL"} ||
|
|
|
+ $numprob{"UNKNOWN"} >= $critlevel{"UNKNOWN"} )) {
|
|
|
+ countAndPrint($crit,$total,1);
|
|
|
+ exitcheck($CRITICAL);
|
|
|
+}
|
|
|
+
|
|
|
+#Warning in terms of # warnings only...
|
|
|
+elsif ($WarnOnly && ($numprob{"WARNING"} >= $WarnOnly)) {
|
|
|
+ countAndPrint($warn,$numprob{$warn},0);
|
|
|
+ exitcheck($WARNING);
|
|
|
+}
|
|
|
+
|
|
|
+#Warning in terms of # warnings and # criticals...
|
|
|
+elsif (!$WarnOnly && ($numprob{"WARNING"} >= $warnlevel{"WARNING"} ||
|
|
|
+ $numprob{"CRITICAL"} >= $warnlevel{"CRITICAL"} ||
|
|
|
+ $numprob{"UNKNOWN"} >= $warnlevel{"UNKNOWN"})) {
|
|
|
+ countAndPrint($warn,$total,1);
|
|
|
+ exitcheck($WARNING);
|
|
|
+}
|
|
|
+
|
|
|
+#Unknown in terms on # unknown only...
|
|
|
+elsif ( $UnkOnly && ($numprob{"UNKNOWN"}>=$UnkOnly) ) {
|
|
|
+ countAndPrint($unk,$numprob{$unk},0);
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+}
|
|
|
+
|
|
|
+#Unknown in terms of # warning, critical, and unknown...
|
|
|
+elsif (!$UnkOnly && ($numprob{"WARNING"} >= $unklevel{"WARNING"} ||
|
|
|
+ $numprob{"CRITICAL"} >= $unklevel{"CRITICAL"} ||
|
|
|
+ $numprob{"UNKNOWN"} >= $unklevel{"UNKNOWN"})) {
|
|
|
+ countAndPrint($unk,$total,1);
|
|
|
+ exitcheck($UNKNOWN);
|
|
|
+}
|
|
|
+
|
|
|
+# Everything is OK!
|
|
|
+else {
|
|
|
+ print "State OK - $numok OK, $total problems\n";
|
|
|
+ exitcheck($OK);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+############################
|
|
|
+# Subroutines
|
|
|
+############################
|
|
|
+
|
|
|
+# Return the proper exit code for Critical, Warning, Unknown, or OK
|
|
|
+sub exitcheck {
|
|
|
+ if ($ok) {
|
|
|
+ exit 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ exit $_[0];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+# Decide what to print for services:
|
|
|
+sub countAndPrint {
|
|
|
+ my $state = $_[0];
|
|
|
+ my $count = $_[1];
|
|
|
+ my $alltypes = $_[2];
|
|
|
+ my $output = "State $state - ";
|
|
|
+
|
|
|
+ if ($details) {
|
|
|
+ if ($count<=$much_details) {
|
|
|
+ if ($alltypes) {
|
|
|
+ $output .= "$count problems: $much_output{$crit} $much_output{$warn} $much_output{$unk}";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $output .= "$count \L$state\E: $much_output{$state}";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif ($count<=$details) {
|
|
|
+ if ($alltypes) {
|
|
|
+ $output .= "$count problems: $output{$crit} $output{$warn} $output{$unk}";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $output .= "$count \L$state\E: $output{$state}";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ($alltypes) {
|
|
|
+ $output .= "$numprob{$crit} critical, $numprob{$warn} warning, $numprob{$unk} unknown";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $output .= "$count \L$state\E";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $output .= "$count problems";
|
|
|
+ }
|
|
|
+
|
|
|
+ print "$output\n";
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+# Add-in the problem found in the status log
|
|
|
+sub addproblem {
|
|
|
+
|
|
|
+ $test = 1;
|
|
|
+ $type = $_[1];
|
|
|
+ my $diffout = "";
|
|
|
+
|
|
|
+ my @values = split /;/,$_[0];
|
|
|
+
|
|
|
+ if (!$host) {
|
|
|
+ my $namehold = $values[1];
|
|
|
+ if ($ack && ($values[13] eq "1")) {
|
|
|
+ if ($ack =~ "ok") {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $type = "\U$ack";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif ($hdown && grep /$namehold/, @hostdown) {
|
|
|
+ if ($hdown =~ "ok") {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $type = "\U$hdown";
|
|
|
+ $diffout = "$values[1] is down";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif ($dt && (($values[27] ne "0") || (grep /$namehold/, @hostdowntime))){
|
|
|
+ if ($dt =~ "ok") {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $type = "\U$dt";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif (exists $hostoktimes{$namehold}) {
|
|
|
+ # If the state change time of the host is more recent than the last
|
|
|
+ # service check, must wait until the next service check runs!
|
|
|
+ if ($hostoktimes{$namehold} > $values[6]) {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ($ack && $values[5]) {
|
|
|
+ if ($ack =~ "ok") {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $type = "\U$ack";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ elsif ($dt && ($values[17] ne "0")) {
|
|
|
+ if ($dt =~ "ok") {
|
|
|
+ $test = 0;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $type = "\U$dt";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($details && $test) {
|
|
|
+ if (!$host) {
|
|
|
+ if ($diffout) {
|
|
|
+ $much_output{$type} .= " $diffout;";
|
|
|
+ $output{$type} .= "$diffout;";
|
|
|
+ $much_ct{$type}++;
|
|
|
+ $ct{$type}++;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ if ($much_details && $much_ct{$type}<$much_details) {
|
|
|
+ $much_output{$type} .= " $values[2] on $values[1] $values[31];";
|
|
|
+ $much_ct{$type}++;
|
|
|
+ }
|
|
|
+ if ($ct{$type} < $details) {
|
|
|
+ $output{$type} .= " $values[2] on $values[1];";
|
|
|
+ $ct{$type}++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $much_output{$type} .= " $values[1] $_[1] $values[20],";
|
|
|
+ $much_ct{type}++;
|
|
|
+ $output{$type} .= " $values[1] HOST $_[1],";
|
|
|
+ $ct{$type}++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ($test) {
|
|
|
+ $numprob{$type}++;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+################################
|
|
|
+#
|
|
|
+# Version and Help Information
|
|
|
+#
|
|
|
+################################
|
|
|
+
|
|
|
+sub printVersion {
|
|
|
+ printf <<EndVersion;
|
|
|
+$0 (nagios-plugins) 1.3
|
|
|
+The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute
|
|
|
+copies of the plugins under the terms of the GNU General Public License.
|
|
|
+For more information about these matters, see the file named COPYING.
|
|
|
+EndVersion
|
|
|
+}
|
|
|
+
|
|
|
+sub printHelp {
|
|
|
+ printf <<EOF;
|
|
|
+
|
|
|
+This plugin parses through the Nagios status log and will return a
|
|
|
+Critical, Warning, or Unknown state depending on the number of
|
|
|
+Critical, Warning, and/or Unknown services found in the log
|
|
|
+(or Down/Unreachable hosts when matching against hosts)
|
|
|
+
|
|
|
+Usage: $0 -s <Status File> | -d <Nagios Log Directory>
|
|
|
+ [-w #[,#][,#]] [-c #[,#][,#]] [-u #[,#][,#]]
|
|
|
+ [--service=<RegEx> | --servhost=<RegEx> | --pattern=<RegEx> |
|
|
|
+ --host | --host=<RegEx>]
|
|
|
+ [--ack[=string]] [--dt[=string]] [--hostdown[=string]]
|
|
|
+ [-D #[,#]] [--ok] [-f <Log freshness in # minutes>]
|
|
|
+ $0 --help
|
|
|
+ $0 --version
|
|
|
+NOTE: One of -s and -d must be specified
|
|
|
+
|
|
|
+Options:
|
|
|
+ -s, --status=FILE_NAME
|
|
|
+ Location and name of status log (e.g. /usr/local/nagios/var/status.log)
|
|
|
+ -d, --dir=DIRECTORY_NAME
|
|
|
+ Directory that contains the nagios logs (e.g. /usr/local/nagios/var/)
|
|
|
+ -w, --warning=INTEGER[,INTEGER][,INTEGER]
|
|
|
+ #: Number of warnings to result in a WARNING state
|
|
|
+ OR
|
|
|
+ #,#: Warning,Criticals to result in a WARNING state
|
|
|
+ OR
|
|
|
+ #,#,#: Warning,Critical,Unknown to result in a WARNING state
|
|
|
+ Default: -w=1
|
|
|
+ -c, --critical=INTEGER[,INTEGER][,INTEGER]
|
|
|
+ #: Number of criticals to result in a CRITICAL state
|
|
|
+ OR
|
|
|
+ #,#: Warning,Criticals to result in a CRITICAL state
|
|
|
+ OR
|
|
|
+ #,#,#: Warning,Critical,Unknown to result in a CRITICAL state
|
|
|
+ Default: -c=1
|
|
|
+ -u, --unknown=INTEGER[,INTEGER][,INTEGER]
|
|
|
+ #: Number of unknowns to result in a UNKNOWN state
|
|
|
+ OR
|
|
|
+ #,#: Warning,Criticals to result in a UNKNOWN state
|
|
|
+ OR
|
|
|
+ #,#,#: Warning,Critical,Unknown to result in a UNKNOWN state
|
|
|
+ Default: -u=1
|
|
|
+ -r, --service[=REGEX]
|
|
|
+ Only match services [that match the RegEx]
|
|
|
+ (--service is default setting if no other matching arguments provided)
|
|
|
+ --servhost=REGEX
|
|
|
+ Only match services whose host match the RegEx
|
|
|
+ -p, --pattern=REGEX
|
|
|
+ Only parse for this regular expression (services only, not hosts)
|
|
|
+ --host[=REGEX]
|
|
|
+ Report on the state of hosts (whose name matches the RegEx if provided)
|
|
|
+ -a, --ack[=ok|warning|critical|unknown|down|unreachable]
|
|
|
+ Handle Acknowledged problems [--ack defaults to ok]
|
|
|
+ --dt, --downtime[=ok|warning|critical|unknown|down|unreachable]
|
|
|
+ Handle problems in scheduled downtime [--dt defaults to ok]
|
|
|
+ --hd, --hostdown[=ok|warning|critical|unknown|down|unreachable]
|
|
|
+ Handle services whose Host is down [--hd defaults to ok]
|
|
|
+ -D, --details=INTEGER[,INTEGER]
|
|
|
+ Amount of verbosity to output
|
|
|
+ If # problems:
|
|
|
+ <= 1st integer, return full details (each plugin's output)
|
|
|
+ <= 2nd integer, return some details (list each service host pair)
|
|
|
+ > 2nd integer, return the # of problems
|
|
|
+ -f, --freshness=INTEGER
|
|
|
+ Number of minutes old the log can be to make sure Nagios is running
|
|
|
+ (Default = 30 minutes)
|
|
|
+ --ok
|
|
|
+ Return an OK exit code, regardless of number of problems found
|
|
|
+ -h, --help
|
|
|
+ Print detailed help screen
|
|
|
+ -V, --version
|
|
|
+ Print version information
|
|
|
+
|
|
|
+For service checking (use --service and/or --servhost):
|
|
|
+1. The values of warning, critical, and unknown default to 1, i.e.
|
|
|
+$0 will return CRITICAL if there is at least 1 critical service,
|
|
|
+WARNING if there is at least 1 warning service, and UNKNOWN if there is
|
|
|
+at least one unknown service.
|
|
|
+
|
|
|
+2. If a service's host is DOWN or UNREACHABLE, $0 will use the
|
|
|
+value of --hostdown to determine how to treat the service. Without that
|
|
|
+argument, $0 will count the service as OK.
|
|
|
+
|
|
|
+3. If a service's host is OK, but the last host-state change occurred more
|
|
|
+recently than the last service check, $0 will ignore that service
|
|
|
+(want to wait until the service has been checked after a host has recovered
|
|
|
+or you may get service alert for services that still need to be checked)
|
|
|
+
|
|
|
+4. If the --dt, --ack, or --hd tags are used, $0 will use the value
|
|
|
+of the arguments to determine how to handle services in downtime, acknowledged,
|
|
|
+or with down hosts (default=OK). For service checks, --dt will also check
|
|
|
+if the service's host is in a downtime.
|
|
|
+
|
|
|
+For host checking (use --host):
|
|
|
+1. Using the --host argument, $0 will look for DOWN and UNREACHABLE
|
|
|
+hosts. If any are found, $0 will return a CRITICAL. You can provide
|
|
|
+an REGEX for --host to only check hosts with matching host names.
|
|
|
+
|
|
|
+2. If the --dt or --ack tags are used, $0 will use the value of the
|
|
|
+--dt/--ack arguments to determine the state of the host (default is OK)
|
|
|
+
|
|
|
+EOF
|
|
|
+}
|