check_ntp.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. #!/usr/bin/perl -w
  2. # (c)1999 Ian Cass, Knowledge Matters Ltd.
  3. # Read the GNU copyright stuff for all the legalese
  4. #
  5. # Check NTP time servers plugin. This plugin requires the ntpdate utility to
  6. # be installed on the system, however since it's part of the ntp suite, you
  7. # should already have it installed.
  8. #
  9. # $Id$
  10. #
  11. # Nothing clever done in this program - its a very simple bare basics hack to
  12. # get the job done.
  13. #
  14. # Things to do...
  15. # check @words[9] for time differences greater than +/- x secs & return a
  16. # warning.
  17. #
  18. # (c) 1999 Mark Jewiss, Knowledge Matters Limited
  19. # 22-9-1999, 12:45
  20. #
  21. # Modified script to accept 2 parameters or set defaults.
  22. # Now issues warning or critical alert is time difference is greater than the
  23. # time passed.
  24. #
  25. # These changes have not been tested completely due to the unavailability of a
  26. # server with the incorrect time.
  27. #
  28. # (c) 1999 Bo Kersey, VirCIO - Managed Server Solutions <bo@vircio.com>
  29. # 22-10-99, 12:17
  30. #
  31. # Modified the script to give useage if no parameters are input.
  32. #
  33. # Modified the script to check for negative as well as positive
  34. # time differences.
  35. #
  36. # Modified the script to work with ntpdate 3-5.93e Wed Apr 14 20:23:03 EDT 1999
  37. #
  38. # Modified the script to work with ntpdate's that return adjust or offset...
  39. #
  40. #
  41. # Script modified 2000 June 01 by William Pietri <william@bianca.com>
  42. #
  43. # Modified script to handle weird cases:
  44. # o NTP server doesn't respond (e.g., has died)
  45. # o Server has correct time but isn't suitable synchronization
  46. # source. This happens while starting up and if contact
  47. # with master has been lost.
  48. #
  49. # Modifed to run under Embedded Perl (sghosh@users.sf.net)
  50. # - combined logic some blocks together..
  51. #
  52. # Added ntpdate check for stratum 16 desynch peer (James Fidell) Feb 03, 2003
  53. #
  54. require 5.004;
  55. use POSIX;
  56. use strict;
  57. use Getopt::Long;
  58. use vars qw($opt_V $opt_h $opt_H $opt_w $opt_c $opt_j $opt_k $verbose $PROGNAME);
  59. use lib utils.pm;
  60. use utils qw($TIMEOUT %ERRORS &print_revision &support);
  61. $PROGNAME="check_ntp";
  62. sub print_help ();
  63. sub print_usage ();
  64. $ENV{'PATH'}='';
  65. $ENV{'BASH_ENV'}='';
  66. $ENV{'ENV'}='';
  67. # defaults in millisec
  68. my $DEFAULT_OFFSET_WARN = 60000;
  69. my $DEFAULT_OFFSET_CRIT = 120000;
  70. my $DEFAULT_JITTER_WARN = 5000;
  71. my $DEFAULT_JITTER_CRIT = 10000;
  72. Getopt::Long::Configure('bundling');
  73. GetOptions
  74. ("V" => \$opt_V, "version" => \$opt_V,
  75. "h" => \$opt_h, "help" => \$opt_h,
  76. "v" => \$verbose, "verbose" => \$verbose,
  77. "w=f" => \$opt_w, "warning=f" => \$opt_w, # offset|adjust warning if above this number
  78. "c=f" => \$opt_c, "critical=f" => \$opt_c, # offset|adjust critical if above this number
  79. "j=s" => \$opt_j, "jwarn=s" => \$opt_j, # jitter warning if above this number
  80. "k=s" => \$opt_k, "jcrit=s" => \$opt_k, # jitter critical if above this number
  81. "H=s" => \$opt_H, "hostname=s" => \$opt_H);
  82. if ($opt_V) {
  83. print_revision($PROGNAME,'$Revision$ ');
  84. exit $ERRORS{'OK'};
  85. }
  86. if ($opt_h) {
  87. print_help();
  88. exit $ERRORS{'OK'};
  89. }
  90. $opt_H = shift unless ($opt_H);
  91. my $host = $1 if ($opt_H && $opt_H =~ m/^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[a-zA-Z][-a-zA-Z0-9]+(\.[a-zA-Z][-a-zA-Z0-9]+)*)$/);
  92. unless ($host) {
  93. print "No target host specified\n";
  94. print_usage();
  95. exit $ERRORS{'UNKNOWN'};
  96. }
  97. ($opt_w) || ($opt_w = $DEFAULT_OFFSET_WARN);
  98. my $owarn = $1 if ($opt_w =~ /([0-9.]+)/);
  99. ($opt_c) || ($opt_c = $DEFAULT_OFFSET_CRIT);
  100. my $ocrit = $1 if ($opt_c =~ /([0-9.]+)/);
  101. ($opt_j) || ($opt_j = $DEFAULT_JITTER_WARN);
  102. my $jwarn = $1 if ($opt_j =~ /([0-9]+)/);
  103. ($opt_k) || ($opt_k = $DEFAULT_JITTER_CRIT);
  104. my $jcrit = $1 if ($opt_k =~ /([0-9]+)/);
  105. if ($ocrit < $owarn ) {
  106. print "Critical offset should be larger than warning offset\n";
  107. print_usage();
  108. exit $ERRORS{"UNKNOWN"};
  109. }
  110. if ($opt_k < $opt_j) {
  111. print "Critical jitter should be larger than warning jitter\n";
  112. print_usage();
  113. exit $ERRORS{'UNKNOWN'};
  114. }
  115. my $stratum = -1;
  116. my $ignoreret = 0;
  117. my $answer = undef;
  118. my $offset = undef;
  119. my $jitter = undef;
  120. my $syspeer = undef;
  121. my $candidates = 0;
  122. my $msg; # first line of output to print if format is invalid
  123. my $state = $ERRORS{'UNKNOWN'};
  124. my $ntpdate_error = $ERRORS{'UNKNOWN'};
  125. my $jitter_error = $ERRORS{'UNKNOWN'};
  126. # some systems don't have a proper ntpq (migrated from ntpdc)
  127. my $have_ntpq = undef;
  128. if ($utils::PATH_TO_NTPQ && -x $utils::PATH_TO_NTPQ ) {
  129. $have_ntpq = 1;
  130. }else{
  131. $have_ntpq = 0;
  132. }
  133. # Just in case of problems, let's not hang Nagios
  134. $SIG{'ALRM'} = sub {
  135. print ("ERROR: No response from ntp server (alarm)\n");
  136. exit $ERRORS{"UNKNOWN"};
  137. };
  138. alarm($TIMEOUT);
  139. ###
  140. ###
  141. ### First, check ntpdate
  142. ###
  143. ###
  144. if (!open (NTPDATE, "$utils::PATH_TO_NTPDATE -q $host 2>&1 |")) {
  145. print "Could not open ntpdate\n";
  146. exit $ERRORS{"UNKNOWN"};
  147. }
  148. while (<NTPDATE>) {
  149. print if ($verbose);
  150. $msg = $_ unless ($msg);
  151. if (/stratum\s(\d+)/) {
  152. $stratum = $1;
  153. }
  154. if (/(offset|adjust)\s+([-.\d]+)/i) {
  155. $offset = $2;
  156. # An offset of 0.000000 with an error is probably bogus. Actually,
  157. # it's probably always bogus, but let's be paranoid here.
  158. if ($offset == 0) { undef $offset;}
  159. $ntpdate_error = defined ($offset) ? $ERRORS{"OK"} : $ERRORS{"CRITICAL"};
  160. print "ntperr = $ntpdate_error \n" if $verbose;
  161. }
  162. if (/no server suitable for synchronization found/) {
  163. if ($stratum == 16) {
  164. $ntpdate_error = $ERRORS{"WARNING"};
  165. $msg = "Desynchronized peer server found";
  166. $ignoreret=1;
  167. }
  168. else {
  169. $ntpdate_error = $ERRORS{"CRITICAL"};
  170. $msg = "No suitable peer server found - ";
  171. }
  172. }
  173. }
  174. close (NTPDATE);
  175. # declare an error if we also get a non-zero return code from ntpdate
  176. # unless already set to critical
  177. if ( $? && !$ignoreret ) {
  178. print "stderr = $? : $! \n" if $verbose;
  179. $ntpdate_error = $ntpdate_error == $ERRORS{"CRITICAL"} ? $ERRORS{"CRITICAL"} : $ERRORS{"UNKNOWN"} ;
  180. print "ntperr = $ntpdate_error : $!\n" if $verbose;
  181. }
  182. ###
  183. ###
  184. ### Then scan xntpq/ntpq if it exists
  185. ### and look in the 11th column for jitter
  186. ###
  187. # Field 1: Tally Code ( Space, 'x','.','-','+','#','*','o')
  188. # Only match for '*' which implies sys.peer
  189. # or 'o' which implies pps.peer
  190. # If both exist, the last one is picked.
  191. # Field 2: address of the remote peer
  192. # Field 3: Refid of the clock (0.0.0.0 if unknown)
  193. # Field 4: stratum (0-15)
  194. # Field 5: Type of the peer: local (l), unicast (u), multicast (m)
  195. # broadcast (b); not sure about multicast/broadcast
  196. # Field 6: last packet receive (in seconds)
  197. # Field 7: polling interval
  198. # Field 8: reachability resgister (octal)
  199. # Field 9: delay
  200. # Field 10: offset
  201. # Field 11: dispersion/jitter
  202. #
  203. if ($have_ntpq) {
  204. if ( open(NTPQ,"$utils::PATH_TO_NTPQ -np $host 2>&1 |") ) {
  205. while (<NTPQ>) {
  206. print $_ if ($verbose);
  207. # number of candidates on <host> for sys.peer
  208. if (/^(\*|\+|\#|o])/) {
  209. ++$candidates;
  210. print "Candiate count= $candidates\n" if ($verbose);
  211. }
  212. # match sys.peer or pps.peer
  213. if (/^(\*|o)([-0-9.\s]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([lumb]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)/) {
  214. $syspeer = $2;
  215. $jitter = $11;
  216. print "match $_ \n" if $verbose;
  217. if ($jitter > $jcrit) {
  218. print "Jitter_crit = $11 :$jcrit\n" if ($verbose);
  219. $jitter_error = $ERRORS{'CRITICAL'};
  220. } elsif ($jitter > $jwarn ) {
  221. print "Jitter_warn = $11 :$jwarn \n" if ($verbose);
  222. $jitter_error = $ERRORS{'WARNING'};
  223. } else {
  224. $jitter_error = $ERRORS{'OK'};
  225. }
  226. }
  227. }
  228. close NTPQ;
  229. }
  230. }
  231. if ($ntpdate_error != $ERRORS{'OK'}) {
  232. $state = $ntpdate_error;
  233. if ($ntpdate_error == $ERRORS{'WARNING'} ) {
  234. $answer = $msg . "\n";
  235. }
  236. else {
  237. $answer = $msg . "Server for ntp probably down\n";
  238. }
  239. if (defined($offset) && abs($offset) > $ocrit) {
  240. $state = $ERRORS{'CRITICAL'};
  241. $answer = "Server Error and offset $offset msec > +/- $ocrit msec\n";
  242. } elsif (defined($offset) && abs($offset) > $owarn) {
  243. $answer = "Server error and offset $offset msec > +/- $owarn msec\n";
  244. } elsif (defined($jitter) && abs($jitter) > $jcrit) {
  245. $answer = "Server error and jitter $jitter msec > +/- $jcrit msec\n";
  246. } elsif (defined($jitter) && abs($jitter) > $jwarn) {
  247. $answer = "Server error and jitter $jitter msec > +/- $jwarn msec\n";
  248. }
  249. } elsif ($have_ntpq && $jitter_error != $ERRORS{'OK'}) {
  250. $state = $jitter_error;
  251. $answer = "Jitter $jitter too high\n";
  252. if (defined($offset) && abs($offset) > $ocrit) {
  253. $state = $ERRORS{'CRITICAL'};
  254. $answer = "Jitter error and offset $offset msec > +/- $ocrit msec\n";
  255. } elsif (defined($offset) && abs($offset) > $owarn) {
  256. $answer = "Jitter error and offset $offset msec > +/- $owarn msec\n";
  257. } elsif (defined($jitter) && abs($jitter) > $jcrit) {
  258. $answer = "Jitter error and jitter $jitter msec > +/- $jcrit msec\n";
  259. } elsif (defined($jitter) && abs($jitter) > $jwarn) {
  260. $answer = "Jitter error and jitter $jitter msec > +/- $jwarn msec\n";
  261. }
  262. } else { # no errors from ntpdate or ntpq
  263. if (abs($offset) > $ocrit) {
  264. $state = $ERRORS{'CRITICAL'};
  265. $answer = "Offset $offset msec > +/- $ocrit msec, jitter $jitter msec\n";
  266. } elsif (abs($jitter) > $jcrit ) {
  267. $state = $ERRORS{'CRITICAL'};
  268. $answer = "Jitter $jitter msec> +/- $jcrit msec, offset $offset msec \n";
  269. } elsif (abs($offset) > $owarn) {
  270. $state = $ERRORS{'WARNING'};
  271. $answer = "Offset $offset msec > +/- $owarn msec, jitter $jitter msec\n";
  272. } elsif (abs($jitter) > $jwarn ) {
  273. $state = $ERRORS{'WARNING'};
  274. $answer = "Jitter $jitter msec> +/- $jwarn msec, offset $offset msec \n";
  275. } else {
  276. $state = $ERRORS{'OK'};
  277. $answer = "Offset $offset msecs, jitter $jitter msec\n";
  278. }
  279. # else { # no offset defined
  280. # $state = $ERRORS{'UNKNOWN'};
  281. # $answer = "Invalid format returned from ntpdate ($msg)\n";
  282. # }
  283. }
  284. foreach my $key (keys %ERRORS) {
  285. if ($state==$ERRORS{$key}) {
  286. print ("$key: $answer");
  287. last;
  288. }
  289. }
  290. exit $state;
  291. ####
  292. #### subs
  293. sub print_usage () {
  294. print "Usage: $PROGNAME -H <host> [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n";
  295. }
  296. sub print_help () {
  297. print_revision($PROGNAME,'$Revision$');
  298. print "Copyright (c) 2000 Bo Kersey/Karl DeBisschop\n";
  299. print "\n";
  300. print_usage();
  301. print "
  302. Checks the local timestamp offset versus <host> with ntpdate
  303. Checks the jitter/dispersion of clock signal between <host> and its sys.peer with ntpq\n
  304. -w ( --warning)
  305. Clock offset in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_OFFSET_WARN.
  306. -c (--critical)
  307. Clock offset in milliseconds at which a critical message will be generated.\n Defaults to $DEFAULT_OFFSET_CRIT.
  308. -j (--jwarn)
  309. Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_WARN.
  310. -k (--jcrit)
  311. Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_CRIT.\n";
  312. support();
  313. }