check_ide_smart.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. /******************************************************************************
  2. * Nagios check_ide_smart plugin
  3. *
  4. * License: GPL
  5. *
  6. * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
  7. * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
  8. * 1998 Gadi Oxman <gadio@netvision.net.il>
  9. *
  10. * Copyright (c) 2000 Robert Dale <rdale@digital-mission.com>
  11. * Copyright (c) 2000-2006 nagios-plugins team
  12. *
  13. * Last Modified: $Date$
  14. *
  15. * Description:
  16. *
  17. * This file contains the check_ide_smart plugin
  18. *
  19. * This plugin checks a local hard drive with the (Linux specific) SMART interface
  20. *
  21. *
  22. * License Information:
  23. *
  24. * This program is free software; you can redistribute it and/or modify
  25. * it under the terms of the GNU General Public License as published by
  26. * the Free Software Foundation; either version 2 of the License, or
  27. * (at your option) any later version.
  28. *
  29. * This program is distributed in the hope that it will be useful,
  30. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  31. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  32. * GNU General Public License for more details.
  33. *
  34. * You should have received a copy of the GNU General Public License
  35. * along with this program; if not, write to the Free Software
  36. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  37. *
  38. * $Id$
  39. */
  40. const char *progname = "check_ide_smart";
  41. const char *revision = "$Revision$";
  42. const char *copyright = "2000-2006";
  43. const char *email = "nagiosplug-devel@lists.sourceforge.net";
  44. #include "common.h"
  45. #include "utils.h"
  46. void print_help (void);
  47. void print_usage (void);
  48. #include <sys/stat.h>
  49. #include <sys/ioctl.h>
  50. #include <fcntl.h>
  51. #include <linux/hdreg.h>
  52. #include <linux/types.h>
  53. #include <errno.h>
  54. #define NR_ATTRIBUTES 30
  55. #ifndef TRUE
  56. #define TRUE 1
  57. #endif /* */
  58. #define PREFAILURE 2
  59. #define ADVISORY 1
  60. #define OPERATIONAL 0
  61. #define UNKNOWN -1
  62. typedef struct threshold_s
  63. {
  64. __u8 id;
  65. __u8 threshold;
  66. __u8 reserved[10];
  67. }
  68. __attribute__ ((packed)) threshold_t;
  69. typedef struct thresholds_s
  70. {
  71. __u16 revision;
  72. threshold_t thresholds[NR_ATTRIBUTES];
  73. __u8 reserved[18];
  74. __u8 vendor[131];
  75. __u8 checksum;
  76. }
  77. __attribute__ ((packed)) thresholds_t;
  78. typedef struct value_s
  79. {
  80. __u8 id;
  81. __u16 status;
  82. __u8 value;
  83. __u8 vendor[8];
  84. }
  85. __attribute__ ((packed)) value_t;
  86. typedef struct values_s
  87. {
  88. __u16 revision;
  89. value_t values[NR_ATTRIBUTES];
  90. __u8 offline_status;
  91. __u8 vendor1;
  92. __u16 offline_timeout;
  93. __u8 vendor2;
  94. __u8 offline_capability;
  95. __u16 smart_capability;
  96. __u8 reserved[16];
  97. __u8 vendor[125];
  98. __u8 checksum;
  99. }
  100. __attribute__ ((packed)) values_t;
  101. struct
  102. {
  103. __u8 value;
  104. char *text;
  105. }
  106. offline_status_text[] =
  107. {
  108. {0x00, "NeverStarted"},
  109. {0x02, "Completed"},
  110. {0x04, "Suspended"},
  111. {0x05, "Aborted"},
  112. {0x06, "Failed"},
  113. {0, 0}
  114. };
  115. struct
  116. {
  117. __u8 value;
  118. char *text;
  119. }
  120. smart_command[] =
  121. {
  122. {SMART_ENABLE, "SMART_ENABLE"},
  123. {SMART_DISABLE, "SMART_DISABLE"},
  124. {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
  125. {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
  126. };
  127. /* Index to smart_command table, keep in order */
  128. enum SmartCommand
  129. { SMART_CMD_ENABLE,
  130. SMART_CMD_DISABLE,
  131. SMART_CMD_IMMEDIATE_OFFLINE,
  132. SMART_CMD_AUTO_OFFLINE
  133. };
  134. void print_values (values_t * p, thresholds_t * t);
  135. int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error);
  136. int
  137. main (int argc, char *argv[])
  138. {
  139. char *device = NULL;
  140. int command = -1;
  141. int o, longindex;
  142. int retval = 0;
  143. thresholds_t thresholds;
  144. values_t values;
  145. int fd;
  146. static struct option longopts[] = {
  147. {"device", required_argument, 0, 'd'},
  148. {"immediate", no_argument, 0, 'i'},
  149. {"quiet-check", no_argument, 0, 'q'},
  150. {"auto-on", no_argument, 0, '1'},
  151. {"auto-off", no_argument, 0, '0'},
  152. {"nagios", no_argument, 0, 'n'},
  153. {"help", no_argument, 0, 'h'},
  154. {"version", no_argument, 0, 'V'}, {0, 0, 0, 0}
  155. };
  156. setlocale (LC_ALL, "");
  157. bindtextdomain (PACKAGE, LOCALEDIR);
  158. textdomain (PACKAGE);
  159. while (1) {
  160. o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
  161. if (o == -1 || o == EOF || o == 1)
  162. break;
  163. switch (o) {
  164. case 'd':
  165. device = optarg;
  166. break;
  167. case 'q':
  168. command = 3;
  169. break;
  170. case 'i':
  171. command = 2;
  172. break;
  173. case '1':
  174. command = 1;
  175. break;
  176. case '0':
  177. command = 0;
  178. break;
  179. case 'n':
  180. command = 4;
  181. break;
  182. case 'h':
  183. print_help ();
  184. return STATE_OK;
  185. case 'V':
  186. print_revision (progname, revision);
  187. return STATE_OK;
  188. default:
  189. usage2 (_("Unknown argument"), optarg);
  190. }
  191. }
  192. if (optind < argc) {
  193. device = argv[optind];
  194. }
  195. if (!device) {
  196. print_help ();
  197. return STATE_OK;
  198. }
  199. fd = open (device, O_RDONLY);
  200. if (fd < 0) {
  201. printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
  202. return STATE_CRITICAL;
  203. }
  204. if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
  205. printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
  206. return STATE_CRITICAL;
  207. }
  208. switch (command) {
  209. case 0:
  210. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
  211. break;
  212. case 1:
  213. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
  214. break;
  215. case 2:
  216. retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
  217. break;
  218. case 3:
  219. smart_read_values (fd, &values);
  220. smart_read_thresholds (fd, &thresholds);
  221. retval = values_not_passed (&values, &thresholds);
  222. break;
  223. case 4:
  224. smart_read_values (fd, &values);
  225. smart_read_thresholds (fd, &thresholds);
  226. retval = nagios (&values, &thresholds);
  227. break;
  228. default:
  229. smart_read_values (fd, &values);
  230. smart_read_thresholds (fd, &thresholds);
  231. print_values (&values, &thresholds);
  232. break;
  233. }
  234. close (fd);
  235. return retval;
  236. }
  237. char *
  238. get_offline_text (int status)
  239. {
  240. int i;
  241. for (i = 0; offline_status_text[i].text; i++) {
  242. if (offline_status_text[i].value == status) {
  243. return offline_status_text[i].text;
  244. }
  245. }
  246. return "UNKNOW";
  247. }
  248. int
  249. smart_read_values (int fd, values_t * values)
  250. {
  251. int e;
  252. __u8 args[4 + 512];
  253. args[0] = WIN_SMART;
  254. args[1] = 0;
  255. args[2] = SMART_READ_VALUES;
  256. args[3] = 1;
  257. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  258. e = errno;
  259. printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
  260. return e;
  261. }
  262. memcpy (values, args + 4, 512);
  263. return 0;
  264. }
  265. int
  266. values_not_passed (values_t * p, thresholds_t * t)
  267. {
  268. value_t * value = p->values;
  269. threshold_t * threshold = t->thresholds;
  270. int failed = 0;
  271. int passed = 0;
  272. int i;
  273. for (i = 0; i < NR_ATTRIBUTES; i++) {
  274. if (value->id && threshold->id && value->id == threshold->id) {
  275. if (value->value <= threshold->threshold) {
  276. ++failed;
  277. }
  278. else {
  279. ++passed;
  280. }
  281. }
  282. ++value;
  283. ++threshold;
  284. }
  285. return (passed ? -failed : 2);
  286. }
  287. int
  288. nagios (values_t * p, thresholds_t * t)
  289. {
  290. value_t * value = p->values;
  291. threshold_t * threshold = t->thresholds;
  292. int status = OPERATIONAL;
  293. int prefailure = 0;
  294. int advisory = 0;
  295. int failed = 0;
  296. int passed = 0;
  297. int total = 0;
  298. int i;
  299. for (i = 0; i < NR_ATTRIBUTES; i++) {
  300. if (value->id && threshold->id && value->id == threshold->id) {
  301. if (value->value <= threshold->threshold) {
  302. ++failed;
  303. if (value->status & 1) {
  304. status = PREFAILURE;
  305. ++prefailure;
  306. }
  307. else {
  308. status = ADVISORY;
  309. ++advisory;
  310. }
  311. }
  312. else {
  313. ++passed;
  314. }
  315. ++total;
  316. }
  317. ++value;
  318. ++threshold;
  319. }
  320. switch (status) {
  321. case PREFAILURE:
  322. printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
  323. prefailure,
  324. prefailure > 1 ? 's' : ' ',
  325. failed,
  326. total);
  327. status=STATE_CRITICAL;
  328. break;
  329. case ADVISORY:
  330. printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
  331. advisory,
  332. advisory > 1 ? "ies" : "y",
  333. failed,
  334. total);
  335. status=STATE_WARNING;
  336. break;
  337. case OPERATIONAL:
  338. printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
  339. status=STATE_OK;
  340. break;
  341. default:
  342. printf (_("ERROR - Status '%d' unkown. %d/%d tests passed\n"), status,
  343. passed, total);
  344. status = STATE_UNKNOWN;
  345. break;
  346. }
  347. return status;
  348. }
  349. void
  350. print_value (value_t * p, threshold_t * t)
  351. {
  352. printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
  353. p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
  354. p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
  355. p->value > t->threshold ? "Passed" : "Failed");
  356. }
  357. void
  358. print_values (values_t * p, thresholds_t * t)
  359. {
  360. value_t * value = p->values;
  361. threshold_t * threshold = t->thresholds;
  362. int i;
  363. for (i = 0; i < NR_ATTRIBUTES; i++) {
  364. if (value->id && threshold->id && value->id == threshold->id) {
  365. print_value (value++, threshold++);
  366. }
  367. }
  368. printf
  369. (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
  370. p->offline_status,
  371. get_offline_text (p->offline_status & 0x7f),
  372. (p->offline_status & 0x80 ? "Yes" : "No"),
  373. p->offline_timeout / 60);
  374. printf
  375. (_("OffLineCapability=%d {%s %s %s}\n"),
  376. p->offline_capability,
  377. p->offline_capability & 1 ? "Immediate" : "",
  378. p->offline_capability & 2 ? "Auto" : "",
  379. p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
  380. printf
  381. (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
  382. p->revision,
  383. p->checksum,
  384. p->smart_capability,
  385. p->smart_capability & 1 ? "SaveOnStandBy" : "",
  386. p->smart_capability & 2 ? "AutoSave" : "");
  387. }
  388. void
  389. print_thresholds (thresholds_t * p)
  390. {
  391. threshold_t * threshold = p->thresholds;
  392. int i;
  393. printf ("\n");
  394. printf ("SmartRevision=%d\n", p->revision);
  395. for (i = 0; i < NR_ATTRIBUTES; i++) {
  396. if (threshold->id) {
  397. printf ("Id=%3d, Threshold=%3d\n", threshold->id,
  398. threshold->threshold); }
  399. ++threshold;
  400. }
  401. printf ("CheckSum=%d\n", p->checksum);
  402. }
  403. int
  404. smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error)
  405. {
  406. int e = 0;
  407. __u8 args[4];
  408. args[0] = WIN_SMART;
  409. args[1] = val0;
  410. args[2] = smart_command[command].value;
  411. args[3] = 0;
  412. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  413. e = errno;
  414. if (show_error) {
  415. printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
  416. }
  417. }
  418. return e;
  419. }
  420. int
  421. smart_read_thresholds (int fd, thresholds_t * thresholds)
  422. {
  423. int e;
  424. __u8 args[4 + 512];
  425. args[0] = WIN_SMART;
  426. args[1] = 0;
  427. args[2] = SMART_READ_THRESHOLDS;
  428. args[3] = 1;
  429. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  430. e = errno;
  431. printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
  432. return e;
  433. }
  434. memcpy (thresholds, args + 4, 512);
  435. return 0;
  436. }
  437. void
  438. print_help (void)
  439. {
  440. print_revision (progname, revision);
  441. printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
  442. printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
  443. printf (COPYRIGHT, copyright, email);
  444. printf (_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php]."));
  445. printf ("\n\n");
  446. print_usage ();
  447. printf (_(UT_HELP_VRSN));
  448. printf (" %s\n", "-d, --device=DEVICE");
  449. printf (" %s\n", _("Select device DEVICE"));
  450. printf (" %s\n", _("Note: if the device is selected with this option, _no_ other options are accepted"));
  451. printf (" %s\n", "-i, --immediate");
  452. printf (" %s\n", _("Perform immediately offline tests"));
  453. printf (" %s\n", "-q, --quiet-check");
  454. printf (" %s\n", _("Returns the number of failed tests"));
  455. printf (" %s\n", "-1, --auto-on");
  456. printf (" %s\n", _("Turn on automatic offline tests"));
  457. printf (" %s\n", "-0, --auto-off");
  458. printf (" %s\n", _("Turn off automatic offline tests"));
  459. printf (" %s\n", "-n, --nagios");
  460. printf (" %s\n", _("Output suitable for Nagios\n"));
  461. printf (_(UT_SUPPORT));
  462. }
  463. /* todo : add to the long nanual as example
  464. *
  465. * Run with: check_ide-smart --nagios [-d] <DRIVE>
  466. * Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
  467. *
  468. * - Returns 0 on no errors
  469. * - Returns 1 on advisories
  470. * - Returns 2 on prefailure
  471. * - Returns -1 not too often
  472. */
  473. void
  474. print_usage (void)
  475. {
  476. printf (_("Usage:");
  477. printf ("%s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]",progname);
  478. pritnf (" [-O <auto-off>] [-n <nagios>]\n");
  479. }