check_pcpmetric.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #! /usr/bin/env python
  2. #
  3. # Nagios client for checking Performance Co-Pilot metrics
  4. #
  5. #
  6. from sys import argv,exit
  7. import popen2, getopt, string, types
  8. DEBUG=0
  9. nagios_pcpclient_version = 0.01
  10. PMVAL='/usr/bin/pmval'
  11. COMMANDLINE=PMVAL + " -s 1"
  12. METRIC='undefined'
  13. CRITICAL=0
  14. WARNING=0
  15. def usage():
  16. print "Usage:", argv[0], "[options]"
  17. print "Options:"
  18. print "\t[-H host]\tHostname to contact"
  19. print "\t[-m metric]\tPCP metric to check"
  20. print "\t[-i instance]\tPCP metric instance"
  21. print "\t[-w warn]\tIssue warning alert if value is larger than this"
  22. print "\t[-c critical]\tIssue critical alert value is larger than this"
  23. print "\t[-V]\t\tProgram version"
  24. print "\t[-h]\t\tThis helptext"
  25. print ""
  26. print "F.ex. to check 5 minute loadaverage, warn if the load is above 2,"
  27. print "and give critical warning if it's above 10:"
  28. print "\n\t%", argv[0], " -i 5 -m kernel.all.load -w 2 -c 10"
  29. print ""
  30. print "A list of all PCP metrics can be found with the command 'pminfo'."
  31. print "A list of all instances within a metric can be found with 'pminfo -f metric'."
  32. print "F.ex. to see all available instances of 'filesys.full' execute:"
  33. print "\n\t% pminfo -f filesys.full"
  34. print "\tfilesys.full"
  35. print """\t\tinst [0 or "/dev/root"] value 45.35514044640914"""
  36. print """\t\tinst [1 or "/dev/sda1"] value 46.74285959344712"""
  37. print """\t\tinst [2 or "/dev/sdb1"] value 0.807766570678168"""
  38. print ""
  39. print "And the command to have nagios monitor the /dev/sda1 filesystem would be:"
  40. print "\n\t", argv[0], " -i /dev/sda1 -m filesys.full -w 70 -c 90"
  41. opts, args = getopt.getopt(argv[1:],'hH:c:w:m:i:V')
  42. for opt in opts:
  43. key,value = opt
  44. if key == '-H':
  45. COMMANDLINE = COMMANDLINE + " -h " + value
  46. elif key == '-m':
  47. METRIC=value
  48. elif key == '-i':
  49. COMMANDLINE = COMMANDLINE + " -i " + value
  50. elif key == '-c':
  51. CRITICAL = value
  52. elif key == '-w':
  53. WARNING = value
  54. elif key == '-h':
  55. usage()
  56. exit(0)
  57. elif key == '-V':
  58. print "Nagios Performance CoPilot client v%.2f" % nagios_pcpclient_version
  59. print "Written by Jan-Frode Myklebust <janfrode@parallab.uib.no>"
  60. exit(0)
  61. if METRIC == 'undefined':
  62. usage()
  63. exit(3)
  64. COMMANDLINE = COMMANDLINE + " " + METRIC
  65. if DEBUG: print COMMANDLINE
  66. p=popen2.Popen4(COMMANDLINE)
  67. exitcode=p.wait()
  68. # Get the last line of output from 'pmval':
  69. buffer = p.fromchild.readline()
  70. while (buffer != ''):
  71. output=buffer
  72. buffer = p.fromchild.readline()
  73. returndata = string.split(output)[0]
  74. # Confirm that we have gotten a float, and not
  75. # some errormessage in the returndata. If not,
  76. # print the error, and give the UNKNOWN exit code:
  77. try:
  78. retval = string.atof(returndata)
  79. except ValueError, e:
  80. print e
  81. exit(3)
  82. if (retval < WARNING):
  83. EXITCODE=0
  84. elif (retval > CRITICAL):
  85. EXITCODE=2
  86. elif (retval > WARNING):
  87. EXITCODE=1
  88. else:
  89. EXITCODE=3
  90. print retval
  91. exit(EXITCODE)