check_overcr.c 14 KB


  1. /******************************************************************************
  2. *
  3. * CHECK_OVERCR.C
  4. *
  5. * Program: Over-CR collector plugin for Nagios
  6. * License: GPL
  7. * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
  8. *
  9. * $Id$
  10. *
  11. * Description:
  12. *
  13. * Notes:
  14. * - This plugin requires that Eric Molitors' Over-CR collector daemon
  15. * be running on any UNIX boxes you want to monitor. Over-CR
  16. * is available from * http://www.molitor.org/overcr/
  17. *
  18. * Modifications:
  19. *
  20. * 08-11-999 Jacob Lundqvist <jaclu@grm.se>
  21. * Load was presented as a one digit percentage - changed to two digit
  22. * value * before load of 11.2 was presented as "1.2%" (not very
  23. * high). Warning and Critical params were int's, not very good
  24. * for load, changed to doubles, so we can trap loadlimits like
  25. * 1.5. Also added more informative LOAD error messages.
  26. *
  27. * License Information:
  28. *
  29. * This program is free software; you can redistribute it and/or modify
  30. * it under the terms of the GNU General Public License as published by
  31. * the Free Software Foundation; either version 2 of the License, or
  32. * (at your option) any later version.
  33. *
  34. * This program is distributed in the hope that it will be useful,
  35. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  36. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  37. * GNU General Public License for more details.
  38. *
  39. * You should have received a copy of the GNU General Public License
  40. * along with this program; if not, write to the Free Software
  41. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  42. *
  43. *****************************************************************************/
  44. #include "config.h"
  45. #include "common.h"
  46. #include "netutils.h"
  47. #include "utils.h"
  48. #define CHECK_NONE 0
  49. #define CHECK_LOAD1 1
  50. #define CHECK_LOAD5 2
  51. #define CHECK_LOAD15 4
  52. #define CHECK_DPU 8
  53. #define CHECK_PROCS 16
  54. #define CHECK_NETSTAT 32
  55. #define CHECK_UPTIME 64
  56. #define PORT 2000
  57. const char *progname = "check_overcr";
  58. char *server_address = NULL;
  59. int server_port = PORT;
  60. double warning_value = 0L;
  61. double critical_value = 0L;
  62. int check_warning_value = FALSE;
  63. int check_critical_value = FALSE;
  64. int vars_to_check = CHECK_NONE;
  65. int cmd_timeout = 1;
  66. int netstat_port = 0;
  67. char *disk_name = NULL;
  68. char *process_name = NULL;
  69. int process_arguments (int, char **);
  70. void print_usage (void);
  71. void print_help (void);
  72. int
  73. main (int argc, char **argv)
  74. {
  75. int result;
  76. char send_buffer[MAX_INPUT_BUFFER];
  77. char recv_buffer[MAX_INPUT_BUFFER];
  78. char output_message[MAX_INPUT_BUFFER];
  79. char temp_buffer[MAX_INPUT_BUFFER];
  80. char *temp_ptr = NULL;
  81. int found_disk = FALSE;
  82. unsigned long percent_used_disk_space = 100;
  83. double load;
  84. double load_1min;
  85. double load_5min;
  86. double load_15min;
  87. int port_connections = 0;
  88. int processes = 0;
  89. double uptime_raw_hours;
  90. int uptime_raw_minutes = 0;
  91. int uptime_days = 0;
  92. int uptime_hours = 0;
  93. int uptime_minutes = 0;
  94. if (process_arguments (argc, argv) == ERROR)
  95. usage ("Could not parse arguments\n");
  96. /* initialize alarm signal handling */
  97. signal (SIGALRM, socket_timeout_alarm_handler);
  98. /* set socket timeout */
  99. alarm (socket_timeout);
  100. result = STATE_OK;
  101. if (vars_to_check == CHECK_LOAD1 || vars_to_check == CHECK_LOAD5
  102. || vars_to_check == CHECK_LOAD15) {
  103. strcpy (send_buffer, "LOAD\r\nQUIT\r\n");
  104. result =
  105. process_tcp_request2 (server_address, server_port, send_buffer,
  106. recv_buffer, sizeof (recv_buffer));
  107. if (result != STATE_OK)
  108. return result;
  109. temp_ptr = (char *) strtok (recv_buffer, "\r\n");
  110. if (temp_ptr == NULL) {
  111. printf ("Invalid response from server - no load information\n");
  112. return STATE_CRITICAL;
  113. }
  114. load_1min = strtod (temp_ptr, NULL);
  115. temp_ptr = (char *) strtok (NULL, "\r\n");
  116. if (temp_ptr == NULL) {
  117. printf ("Invalid response from server after load 1\n");
  118. return STATE_CRITICAL;
  119. }
  120. load_5min = strtod (temp_ptr, NULL);
  121. temp_ptr = (char *) strtok (NULL, "\r\n");
  122. if (temp_ptr == NULL) {
  123. printf ("Invalid response from server after load 5\n");
  124. return STATE_CRITICAL;
  125. }
  126. load_15min = strtod (temp_ptr, NULL);
  127. switch (vars_to_check) {
  128. case CHECK_LOAD1:
  129. strcpy (temp_buffer, "1");
  130. load = load_1min;
  131. break;
  132. case CHECK_LOAD5:
  133. strcpy (temp_buffer, "5");
  134. load = load_5min;
  135. break;
  136. default:
  137. strcpy (temp_buffer, "15");
  138. load = load_15min;
  139. break;
  140. }
  141. if (check_critical_value == TRUE && (load >= critical_value))
  142. result = STATE_CRITICAL;
  143. else if (check_warning_value == TRUE && (load >= warning_value))
  144. result = STATE_WARNING;
  145. sprintf (output_message, "Load %s - %s-min load average = %0.2f",
  146. (result == STATE_OK) ? "ok" : "problem", temp_buffer, load);
  147. }
  148. else if (vars_to_check == CHECK_DPU) {
  149. sprintf (send_buffer, "DISKSPACE\r\n");
  150. result =
  151. process_tcp_request2 (server_address, server_port, send_buffer,
  152. recv_buffer, sizeof (recv_buffer));
  153. if (result != STATE_OK)
  154. return result;
  155. for (temp_ptr = (char *) strtok (recv_buffer, " "); temp_ptr != NULL;
  156. temp_ptr = (char *) strtok (NULL, " ")) {
  157. if (!strcmp (temp_ptr, disk_name)) {
  158. found_disk = TRUE;
  159. temp_ptr = (char *) strtok (NULL, "%");
  160. if (temp_ptr == NULL) {
  161. printf ("Invalid response from server\n");
  162. return STATE_CRITICAL;
  163. }
  164. percent_used_disk_space = strtoul (temp_ptr, NULL, 10);
  165. break;
  166. }
  167. temp_ptr = (char *) strtok (NULL, "\r\n");
  168. }
  169. /* error if we couldn't find the info for the disk */
  170. if (found_disk == FALSE) {
  171. sprintf (output_message, "Error: Disk '%s' non-existent or not mounted",
  172. disk_name);
  173. result = STATE_CRITICAL;
  174. }
  175. /* else check the disk space used */
  176. else {
  177. if (check_critical_value == TRUE
  178. && (percent_used_disk_space >= critical_value)) result =
  179. STATE_CRITICAL;
  180. else if (check_warning_value == TRUE
  181. && (percent_used_disk_space >= warning_value)) result =
  182. STATE_WARNING;
  183. sprintf (output_message, "Disk %s - %lu%% used on %s",
  184. (result == STATE_OK) ? "ok" : "problem",
  185. percent_used_disk_space, disk_name);
  186. }
  187. }
  188. else if (vars_to_check == CHECK_NETSTAT) {
  189. sprintf (send_buffer, "NETSTAT %d\r\n", netstat_port);
  190. result =
  191. process_tcp_request2 (server_address, server_port, send_buffer,
  192. recv_buffer, sizeof (recv_buffer));
  193. if (result != STATE_OK)
  194. return result;
  195. port_connections = strtod (recv_buffer, NULL);
  196. if (check_critical_value == TRUE && (port_connections >= critical_value))
  197. result = STATE_CRITICAL;
  198. else if (check_warning_value == TRUE
  199. && (port_connections >= warning_value)) result = STATE_WARNING;
  200. sprintf (output_message, "Net %s - %d connection%s on port %d",
  201. (result == STATE_OK) ? "ok" : "problem", port_connections,
  202. (port_connections == 1) ? "" : "s", netstat_port);
  203. }
  204. else if (vars_to_check == CHECK_PROCS) {
  205. sprintf (send_buffer, "PROCESS %s\r\n", process_name);
  206. result =
  207. process_tcp_request2 (server_address, server_port, send_buffer,
  208. recv_buffer, sizeof (recv_buffer));
  209. if (result != STATE_OK)
  210. return result;
  211. temp_ptr = (char *) strtok (recv_buffer, "(");
  212. if (temp_ptr == NULL) {
  213. printf ("Invalid response from server\n");
  214. return STATE_CRITICAL;
  215. }
  216. temp_ptr = (char *) strtok (NULL, ")");
  217. if (temp_ptr == NULL) {
  218. printf ("Invalid response from server\n");
  219. return STATE_CRITICAL;
  220. }
  221. processes = strtod (temp_ptr, NULL);
  222. if (check_critical_value == TRUE && (processes >= critical_value))
  223. result = STATE_CRITICAL;
  224. else if (check_warning_value == TRUE && (processes >= warning_value))
  225. result = STATE_WARNING;
  226. sprintf (output_message, "Process %s - %d instance%s of %s running",
  227. (result == STATE_OK) ? "ok" : "problem", processes,
  228. (processes == 1) ? "" : "s", process_name);
  229. }
  230. else if (vars_to_check == CHECK_UPTIME) {
  231. sprintf (send_buffer, "UPTIME\r\n");
  232. result =
  233. process_tcp_request2 (server_address, server_port, send_buffer,
  234. recv_buffer, sizeof (recv_buffer));
  235. if (result != STATE_OK)
  236. return result;
  237. uptime_raw_hours = strtod (recv_buffer, NULL);
  238. uptime_raw_minutes = (unsigned long) (uptime_raw_hours * 60.0);
  239. if (check_critical_value == TRUE
  240. && (uptime_raw_minutes <= critical_value)) result = STATE_CRITICAL;
  241. else if (check_warning_value == TRUE
  242. && (uptime_raw_minutes <= warning_value)) result = STATE_WARNING;
  243. uptime_days = uptime_raw_minutes / 1440;
  244. uptime_raw_minutes %= 1440;
  245. uptime_hours = uptime_raw_minutes / 60;
  246. uptime_raw_minutes %= 60;
  247. uptime_minutes = uptime_raw_minutes;
  248. sprintf (output_message, "Uptime %s - Up %d days %d hours %d minutes",
  249. (result == STATE_OK) ? "ok" : "problem", uptime_days,
  250. uptime_hours, uptime_minutes);
  251. }
  252. else {
  253. strcpy (output_message, "Nothing to check!\n");
  254. result = STATE_UNKNOWN;
  255. }
  256. /* reset timeout */
  257. alarm (0);
  258. printf ("%s\n", output_message);
  259. return result;
  260. }
  261. /* process command-line arguments */
  262. int
  263. process_arguments (int argc, char **argv)
  264. {
  265. int c;
  266. int option_index = 0;
  267. static struct option long_options[] = {
  268. {"port", required_argument, 0, 'p'},
  269. {"timeout", required_argument, 0, 't'},
  270. {"critical", required_argument, 0, 'c'},
  271. {"warning", required_argument, 0, 'w'},
  272. {"variable", required_argument, 0, 'v'},
  273. {"hostname", required_argument, 0, 'H'},
  274. {"version", no_argument, 0, 'V'},
  275. {"help", no_argument, 0, 'h'},
  276. {0, 0, 0, 0}
  277. };
  278. /* no options were supplied */
  279. if (argc < 2)
  280. return ERROR;
  281. /* backwards compatibility */
  282. if (!is_option (argv[1])) {
  283. server_address = argv[1];
  284. argv[1] = argv[0];
  285. argv = &argv[1];
  286. argc--;
  287. }
  288. for (c = 1; c < argc; c++) {
  289. if (strcmp ("-to", argv[c]) == 0)
  290. strcpy (argv[c], "-t");
  291. else if (strcmp ("-wv", argv[c]) == 0)
  292. strcpy (argv[c], "-w");
  293. else if (strcmp ("-cv", argv[c]) == 0)
  294. strcpy (argv[c], "-c");
  295. }
  296. while (1) {
  297. c = getopt_long (argc, argv, "+hVH:t:c:w:p:v:", long_options,
  298. &option_index);
  299. if (c == -1 || c == EOF || c == 1)
  300. break;
  301. switch (c) {
  302. case '?': /* print short usage statement if args not parsable */
  303. printf ("%s: Unknown argument: %s\n\n", progname, optarg);
  304. print_usage ();
  305. exit (STATE_UNKNOWN);
  306. case 'h': /* help */
  307. print_help ();
  308. exit (STATE_OK);
  309. case 'V': /* version */
  310. print_revision (progname, "$Revision$");
  311. exit (STATE_OK);
  312. case 'H': /* hostname */
  313. server_address = optarg;
  314. break;
  315. case 'p': /* port */
  316. if (is_intnonneg (optarg))
  317. server_port = atoi (optarg);
  318. else
  319. terminate (STATE_UNKNOWN,
  320. "Server port an integer (seconds)\nType '%s -h' for additional help\n",
  321. progname);
  322. break;
  323. case 'v': /* variable */
  324. if (strcmp (optarg, "LOAD1") == 0)
  325. vars_to_check = CHECK_LOAD1;
  326. else if (strcmp (optarg, "LOAD5") == 0)
  327. vars_to_check = CHECK_LOAD5;
  328. else if (strcmp (optarg, "LOAD15") == 0)
  329. vars_to_check = CHECK_LOAD15;
  330. else if (strcmp (optarg, "UPTIME") == 0)
  331. vars_to_check = CHECK_UPTIME;
  332. else if (strstr (optarg, "PROC") == optarg) {
  333. vars_to_check = CHECK_PROCS;
  334. process_name = strscpy (process_name, optarg + 4);
  335. }
  336. else if (strstr (optarg, "NET") == optarg) {
  337. vars_to_check = CHECK_NETSTAT;
  338. netstat_port = atoi (optarg + 3);
  339. }
  340. else if (strstr (optarg, "DPU") == optarg) {
  341. vars_to_check = CHECK_DPU;
  342. disk_name = strscpy (disk_name, optarg + 3);
  343. }
  344. else
  345. return ERROR;
  346. break;
  347. case 'w': /* warning threshold */
  348. warning_value = strtoul (optarg, NULL, 10);
  349. check_warning_value = TRUE;
  350. break;
  351. case 'c': /* critical threshold */
  352. critical_value = strtoul (optarg, NULL, 10);
  353. check_critical_value = TRUE;
  354. break;
  355. case 't': /* timeout */
  356. socket_timeout = atoi (optarg);
  357. if (socket_timeout <= 0)
  358. return ERROR;
  359. }
  360. }
  361. return OK;
  362. }
  363. void
  364. print_usage (void)
  365. {
  366. printf
  367. ("Usage: %s -H host [-p port] [-v variable] [-w warning] [-c critical] [-t timeout]\n",
  368. progname);
  369. }
  370. void
  371. print_help (void)
  372. {
  373. print_revision (progname, "$Revision$");
  374. printf
  375. ("Copyright (c) 2000 Ethan Galstad/Karl DeBisschop\n\n"
  376. "This plugin attempts to contact the Over-CR collector daemon running on the\n"
  377. "remote UNIX server in order to gather the requested system information. This\n"
  378. "plugin requres that Eric Molitors' Over-CR collector daemon be running on the\n"
  379. "remote server. Over-CR can be downloaded from http://www.molitor.org/overcr\n"
  380. "(This plugin was tested with version 0.99.53 of the Over-CR collector)\n\n");
  381. print_usage ();
  382. printf
  383. ("\nOptions:\n"
  384. "-H, --hostname=HOST\n"
  385. " Name of the host to check\n"
  386. "-p, --port=INTEGER\n"
  387. " Optional port number (default: %d)\n"
  388. "-v, --variable=STRING\n"
  389. " Variable to check. Valid variables include:\n"
  390. " LOAD1 = 1 minute average CPU load\n"
  391. " LOAD5 = 5 minute average CPU load\n"
  392. " LOAD15 = 15 minute average CPU load\n"
  393. " DPU<filesys> = percent used disk space on filesystem <filesys>\n"
  394. " PROC<process> = number of running processes with name <process>\n"
  395. " NET<port> = number of active connections on TCP port <port>\n"
  396. " UPTIME = system uptime in seconds\n"
  397. " -w, --warning=INTEGER\n"
  398. " Threshold which will result in a warning status\n"
  399. " -c, --critical=INTEGER\n"
  400. " Threshold which will result in a critical status\n"
  401. " -t, --timeout=INTEGER\n"
  402. " Seconds before connection attempt times out (default: %d)\n"
  403. "-h, --help\n"
  404. " Print this help screen\n"
  405. "-V, --version\n"
  406. " Print version information\n\n"
  407. "Notes:\n"
  408. " - For the available options, the critical threshold value should always be\n"
  409. " higher than the warning threshold value, EXCEPT with the uptime variable\n"
  410. " (i.e. lower uptimes are worse).\n", PORT, DEFAULT_SOCKET_TIMEOUT);
  411. }