4
0

check_ide_smart.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /*****************************************************************************
  2. *
  3. * Nagios check_ide_smart plugin
  4. * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
  5. *
  6. * License: GPL
  7. * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
  8. * 1998 Gadi Oxman <gadio@netvision.net.il>
  9. * Copyright (c) 2000 Robert Dale <rdale@digital-mission.com>
  10. * Copyright (c) 2000-2007 Nagios Plugins Development Team
  11. *
  12. * Description:
  13. *
  14. * This file contains the check_ide_smart plugin
  15. *
  16. * This plugin checks a local hard drive with the (Linux specific) SMART
  17. * interface
  18. *
  19. *
  20. * This program is free software: you can redistribute it and/or modify
  21. * it under the terms of the GNU General Public License as published by
  22. * the Free Software Foundation, either version 3 of the License, or
  23. * (at your option) any later version.
  24. *
  25. * This program is distributed in the hope that it will be useful,
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  28. * GNU General Public License for more details.
  29. *
  30. * You should have received a copy of the GNU General Public License
  31. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  32. *
  33. *
  34. *****************************************************************************/
  35. const char *progname = "check_ide_smart";
  36. const char *copyright = "1998-2007";
  37. const char *email = "nagiosplug-devel@lists.sourceforge.net";
  38. #include "common.h"
  39. #include "utils.h"
  40. void print_help (void);
  41. void print_usage (void);
  42. #include <sys/stat.h>
  43. #include <sys/ioctl.h>
  44. #include <fcntl.h>
  45. #include <linux/hdreg.h>
  46. #include <linux/types.h>
  47. #include <errno.h>
  48. #define NR_ATTRIBUTES 30
  49. #ifndef TRUE
  50. #define TRUE 1
  51. #endif /* */
  52. #define PREFAILURE 2
  53. #define ADVISORY 1
  54. #define OPERATIONAL 0
  55. #define UNKNOWN -1
  56. typedef struct threshold_s
  57. {
  58. __u8 id;
  59. __u8 threshold;
  60. __u8 reserved[10];
  61. }
  62. __attribute__ ((packed)) threshold_t;
  63. typedef struct thresholds_s
  64. {
  65. __u16 revision;
  66. threshold_t thresholds[NR_ATTRIBUTES];
  67. __u8 reserved[18];
  68. __u8 vendor[131];
  69. __u8 checksum;
  70. }
  71. __attribute__ ((packed)) thresholds_t;
  72. typedef struct value_s
  73. {
  74. __u8 id;
  75. __u16 status;
  76. __u8 value;
  77. __u8 vendor[8];
  78. }
  79. __attribute__ ((packed)) value_t;
  80. typedef struct values_s
  81. {
  82. __u16 revision;
  83. value_t values[NR_ATTRIBUTES];
  84. __u8 offline_status;
  85. __u8 vendor1;
  86. __u16 offline_timeout;
  87. __u8 vendor2;
  88. __u8 offline_capability;
  89. __u16 smart_capability;
  90. __u8 reserved[16];
  91. __u8 vendor[125];
  92. __u8 checksum;
  93. }
  94. __attribute__ ((packed)) values_t;
  95. struct
  96. {
  97. __u8 value;
  98. char *text;
  99. }
  100. offline_status_text[] =
  101. {
  102. {0x00, "NeverStarted"},
  103. {0x02, "Completed"},
  104. {0x04, "Suspended"},
  105. {0x05, "Aborted"},
  106. {0x06, "Failed"},
  107. {0, 0}
  108. };
  109. struct
  110. {
  111. __u8 value;
  112. char *text;
  113. }
  114. smart_command[] =
  115. {
  116. {SMART_ENABLE, "SMART_ENABLE"},
  117. {SMART_DISABLE, "SMART_DISABLE"},
  118. {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
  119. {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
  120. };
  121. /* Index to smart_command table, keep in order */
  122. enum SmartCommand
  123. { SMART_CMD_ENABLE,
  124. SMART_CMD_DISABLE,
  125. SMART_CMD_IMMEDIATE_OFFLINE,
  126. SMART_CMD_AUTO_OFFLINE
  127. };
  128. void print_values (values_t * p, thresholds_t * t);
  129. int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error);
  130. int
  131. main (int argc, char *argv[])
  132. {
  133. char *device = NULL;
  134. int command = -1;
  135. int o, longindex;
  136. int retval = 0;
  137. thresholds_t thresholds;
  138. values_t values;
  139. int fd;
  140. /* Parse extra opts if any */
  141. argv=np_extra_opts (&argc, argv, progname);
  142. static struct option longopts[] = {
  143. {"device", required_argument, 0, 'd'},
  144. {"immediate", no_argument, 0, 'i'},
  145. {"quiet-check", no_argument, 0, 'q'},
  146. {"auto-on", no_argument, 0, '1'},
  147. {"auto-off", no_argument, 0, '0'},
  148. {"nagios", no_argument, 0, 'n'},
  149. {"help", no_argument, 0, 'h'},
  150. {"version", no_argument, 0, 'V'},
  151. {0, 0, 0, 0}
  152. };
  153. setlocale (LC_ALL, "");
  154. bindtextdomain (PACKAGE, LOCALEDIR);
  155. textdomain (PACKAGE);
  156. while (1) {
  157. o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
  158. if (o == -1 || o == EOF || o == 1)
  159. break;
  160. switch (o) {
  161. case 'd':
  162. device = optarg;
  163. break;
  164. case 'q':
  165. command = 3;
  166. break;
  167. case 'i':
  168. command = 2;
  169. break;
  170. case '1':
  171. command = 1;
  172. break;
  173. case '0':
  174. command = 0;
  175. break;
  176. case 'n':
  177. command = 4;
  178. break;
  179. case 'h':
  180. print_help ();
  181. return STATE_OK;
  182. case 'V':
  183. print_revision (progname, NP_VERSION);
  184. return STATE_OK;
  185. default:
  186. usage5 ();
  187. }
  188. }
  189. if (optind < argc) {
  190. device = argv[optind];
  191. }
  192. if (!device) {
  193. print_help ();
  194. return STATE_OK;
  195. }
  196. fd = open (device, O_RDONLY);
  197. if (fd < 0) {
  198. printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
  199. return STATE_CRITICAL;
  200. }
  201. if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
  202. printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
  203. return STATE_CRITICAL;
  204. }
  205. switch (command) {
  206. case 0:
  207. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
  208. break;
  209. case 1:
  210. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
  211. break;
  212. case 2:
  213. retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
  214. break;
  215. case 3:
  216. smart_read_values (fd, &values);
  217. smart_read_thresholds (fd, &thresholds);
  218. retval = values_not_passed (&values, &thresholds);
  219. break;
  220. case 4:
  221. smart_read_values (fd, &values);
  222. smart_read_thresholds (fd, &thresholds);
  223. retval = nagios (&values, &thresholds);
  224. break;
  225. default:
  226. smart_read_values (fd, &values);
  227. smart_read_thresholds (fd, &thresholds);
  228. print_values (&values, &thresholds);
  229. break;
  230. }
  231. close (fd);
  232. return retval;
  233. }
  234. char *
  235. get_offline_text (int status)
  236. {
  237. int i;
  238. for (i = 0; offline_status_text[i].text; i++) {
  239. if (offline_status_text[i].value == status) {
  240. return offline_status_text[i].text;
  241. }
  242. }
  243. return "UNKNOW";
  244. }
  245. int
  246. smart_read_values (int fd, values_t * values)
  247. {
  248. int e;
  249. __u8 args[4 + 512];
  250. args[0] = WIN_SMART;
  251. args[1] = 0;
  252. args[2] = SMART_READ_VALUES;
  253. args[3] = 1;
  254. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  255. e = errno;
  256. printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
  257. return e;
  258. }
  259. memcpy (values, args + 4, 512);
  260. return 0;
  261. }
  262. int
  263. values_not_passed (values_t * p, thresholds_t * t)
  264. {
  265. value_t * value = p->values;
  266. threshold_t * threshold = t->thresholds;
  267. int failed = 0;
  268. int passed = 0;
  269. int i;
  270. for (i = 0; i < NR_ATTRIBUTES; i++) {
  271. if (value->id && threshold->id && value->id == threshold->id) {
  272. if (value->value <= threshold->threshold) {
  273. ++failed;
  274. }
  275. else {
  276. ++passed;
  277. }
  278. }
  279. ++value;
  280. ++threshold;
  281. }
  282. return (passed ? -failed : 2);
  283. }
  284. int
  285. nagios (values_t * p, thresholds_t * t)
  286. {
  287. value_t * value = p->values;
  288. threshold_t * threshold = t->thresholds;
  289. int status = OPERATIONAL;
  290. int prefailure = 0;
  291. int advisory = 0;
  292. int failed = 0;
  293. int passed = 0;
  294. int total = 0;
  295. int i;
  296. for (i = 0; i < NR_ATTRIBUTES; i++) {
  297. if (value->id && threshold->id && value->id == threshold->id) {
  298. if (value->value <= threshold->threshold) {
  299. ++failed;
  300. if (value->status & 1) {
  301. status = PREFAILURE;
  302. ++prefailure;
  303. }
  304. else {
  305. status = ADVISORY;
  306. ++advisory;
  307. }
  308. }
  309. else {
  310. ++passed;
  311. }
  312. ++total;
  313. }
  314. ++value;
  315. ++threshold;
  316. }
  317. switch (status) {
  318. case PREFAILURE:
  319. printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
  320. prefailure,
  321. prefailure > 1 ? 's' : ' ',
  322. failed,
  323. total);
  324. status=STATE_CRITICAL;
  325. break;
  326. case ADVISORY:
  327. printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
  328. advisory,
  329. advisory > 1 ? "ies" : "y",
  330. failed,
  331. total);
  332. status=STATE_WARNING;
  333. break;
  334. case OPERATIONAL:
  335. printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
  336. status=STATE_OK;
  337. break;
  338. default:
  339. printf (_("ERROR - Status '%d' unkown. %d/%d tests passed\n"), status,
  340. passed, total);
  341. status = STATE_UNKNOWN;
  342. break;
  343. }
  344. return status;
  345. }
  346. void
  347. print_value (value_t * p, threshold_t * t)
  348. {
  349. printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
  350. p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
  351. p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
  352. p->value > t->threshold ? "Passed" : "Failed");
  353. }
  354. void
  355. print_values (values_t * p, thresholds_t * t)
  356. {
  357. value_t * value = p->values;
  358. threshold_t * threshold = t->thresholds;
  359. int i;
  360. for (i = 0; i < NR_ATTRIBUTES; i++) {
  361. if (value->id && threshold->id && value->id == threshold->id) {
  362. print_value (value++, threshold++);
  363. }
  364. }
  365. printf
  366. (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
  367. p->offline_status,
  368. get_offline_text (p->offline_status & 0x7f),
  369. (p->offline_status & 0x80 ? "Yes" : "No"),
  370. p->offline_timeout / 60);
  371. printf
  372. (_("OffLineCapability=%d {%s %s %s}\n"),
  373. p->offline_capability,
  374. p->offline_capability & 1 ? "Immediate" : "",
  375. p->offline_capability & 2 ? "Auto" : "",
  376. p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
  377. printf
  378. (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
  379. p->revision,
  380. p->checksum,
  381. p->smart_capability,
  382. p->smart_capability & 1 ? "SaveOnStandBy" : "",
  383. p->smart_capability & 2 ? "AutoSave" : "");
  384. }
  385. int
  386. smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error)
  387. {
  388. int e = 0;
  389. __u8 args[4];
  390. args[0] = WIN_SMART;
  391. args[1] = val0;
  392. args[2] = smart_command[command].value;
  393. args[3] = 0;
  394. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  395. e = errno;
  396. if (show_error) {
  397. printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
  398. }
  399. }
  400. return e;
  401. }
  402. int
  403. smart_read_thresholds (int fd, thresholds_t * thresholds)
  404. {
  405. int e;
  406. __u8 args[4 + 512];
  407. args[0] = WIN_SMART;
  408. args[1] = 0;
  409. args[2] = SMART_READ_THRESHOLDS;
  410. args[3] = 1;
  411. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  412. e = errno;
  413. printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
  414. return e;
  415. }
  416. memcpy (thresholds, args + 4, 512);
  417. return 0;
  418. }
  419. void
  420. print_help (void)
  421. {
  422. print_revision (progname, NP_VERSION);
  423. printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
  424. printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
  425. printf (COPYRIGHT, copyright, email);
  426. printf (_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php]."));
  427. printf ("\n\n");
  428. print_usage ();
  429. printf (_(UT_HELP_VRSN));
  430. printf (_(UT_EXTRA_OPTS));
  431. printf (" %s\n", "-d, --device=DEVICE");
  432. printf (" %s\n", _("Select device DEVICE"));
  433. printf (" %s\n", _("Note: if the device is selected with this option, _no_ other options are accepted"));
  434. printf (" %s\n", "-i, --immediate");
  435. printf (" %s\n", _("Perform immediately offline tests"));
  436. printf (" %s\n", "-q, --quiet-check");
  437. printf (" %s\n", _("Returns the number of failed tests"));
  438. printf (" %s\n", "-1, --auto-on");
  439. printf (" %s\n", _("Turn on automatic offline tests"));
  440. printf (" %s\n", "-0, --auto-off");
  441. printf (" %s\n", _("Turn off automatic offline tests"));
  442. printf (" %s\n", "-n, --nagios");
  443. printf (" %s\n", _("Output suitable for Nagios"));
  444. #ifdef NP_EXTRA_OPTS
  445. printf ("\n");
  446. printf ("%s\n", _("Notes:"));
  447. printf (_(UT_EXTRA_OPTS_NOTES));
  448. #endif
  449. printf (_(UT_SUPPORT));
  450. }
  451. /* todo : add to the long nanual as example
  452. *
  453. * Run with: check_ide-smart --nagios [-d] <DRIVE>
  454. * Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
  455. *
  456. * - Returns 0 on no errors
  457. * - Returns 1 on advisories
  458. * - Returns 2 on prefailure
  459. * - Returns -1 not too often
  460. */
  461. void
  462. print_usage (void)
  463. {
  464. printf (_("Usage:"));
  465. printf ("%s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]",progname);
  466. printf (" [-O <auto-off>] [-n <nagios>]\n");
  467. }