check_ide_smart.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. /*
  2. * check_ide-smart v.1 - hacked version of ide-smart for Nagios
  3. * Copyright (C) 2000 Robert Dale <rdale@digital-mission.com>
  4. *
  5. * Nagios - http://www.nagios.org
  6. *
  7. * Notes:
  8. * ide-smart has the same functionality as before. Some return
  9. * values were changed, otherwise the --nagios option was added.
  10. *
  11. * Run with: check_ide-smart --nagios [-d] <DRIVE>
  12. * Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
  13. *
  14. * - Returns 0 on no errors
  15. * - Returns 1 on advisories
  16. * - Returns 2 on prefailure
  17. * - Returns -1 not too often
  18. *
  19. * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
  20. * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
  21. * 1998 Gadi Oxman <gadio@netvision.net.il>
  22. *
  23. * This program is free software; you can redistribute it and/or modify
  24. * it under the terms of the GNU General Public License as published by
  25. * the Free Software Foundation; either version 2 of the License, or
  26. * (at your option) any later version.
  27. *
  28. * This program is distributed in the hope that it will be useful,
  29. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  30. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  31. * GNU General Public License for more details.
  32. *
  33. * You should have received a copy of the GNU General Public License
  34. * along with this program; if not, write to the Free Software
  35. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  36. *
  37. * $Id$
  38. */
  39. const char *progname = "check_ide_smart";
  40. const char *revision = "$Revision$";
  41. const char *copyright = "2000-2004";
  42. const char *email = "nagiosplug-devel@lists.sourceforge.net";
  43. #include "common.h"
  44. #include "utils.h"
  45. void print_help (void);
  46. void print_usage (void);
  47. #include <sys/stat.h>
  48. #include <sys/ioctl.h>
  49. #include <fcntl.h>
  50. #include <linux/hdreg.h>
  51. #include <linux/types.h>
  52. #include <errno.h>
  53. #define NR_ATTRIBUTES 30
  54. #ifndef TRUE
  55. #define TRUE 1
  56. #endif /* */
  57. #define PREFAILURE 2
  58. #define ADVISORY 1
  59. #define OPERATIONAL 0
  60. #define UNKNOWN -1
  61. typedef struct threshold_s
  62. {
  63. __u8 id;
  64. __u8 threshold;
  65. __u8 reserved[10];
  66. }
  67. __attribute__ ((packed)) threshold_t;
  68. typedef struct thresholds_s
  69. {
  70. __u16 revision;
  71. threshold_t thresholds[NR_ATTRIBUTES];
  72. __u8 reserved[18];
  73. __u8 vendor[131];
  74. __u8 checksum;
  75. }
  76. __attribute__ ((packed)) thresholds_t;
  77. typedef struct value_s
  78. {
  79. __u8 id;
  80. __u16 status;
  81. __u8 value;
  82. __u8 vendor[8];
  83. }
  84. __attribute__ ((packed)) value_t;
  85. typedef struct values_s
  86. {
  87. __u16 revision;
  88. value_t values[NR_ATTRIBUTES];
  89. __u8 offline_status;
  90. __u8 vendor1;
  91. __u16 offline_timeout;
  92. __u8 vendor2;
  93. __u8 offline_capability;
  94. __u16 smart_capability;
  95. __u8 reserved[16];
  96. __u8 vendor[125];
  97. __u8 checksum;
  98. }
  99. __attribute__ ((packed)) values_t;
  100. struct
  101. {
  102. __u8 value;
  103. char *text;
  104. }
  105. offline_status_text[] =
  106. {
  107. {0x00, "NeverStarted"},
  108. {0x02, "Completed"},
  109. {0x04, "Suspended"},
  110. {0x05, "Aborted"},
  111. {0x06, "Failed"},
  112. {0, 0}
  113. };
  114. struct
  115. {
  116. __u8 value;
  117. char *text;
  118. }
  119. smart_command[] =
  120. {
  121. {SMART_ENABLE, "SMART_ENABLE"},
  122. {SMART_DISABLE, "SMART_DISABLE"},
  123. {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
  124. {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
  125. };
  126. /* Index to smart_command table, keep in order */
  127. enum SmartCommand
  128. { SMART_CMD_ENABLE,
  129. SMART_CMD_DISABLE,
  130. SMART_CMD_IMMEDIATE_OFFLINE,
  131. SMART_CMD_AUTO_OFFLINE
  132. };
  133. void print_values (values_t * p, thresholds_t * t);
  134. int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error);
  135. int
  136. main (int argc, char *argv[])
  137. {
  138. char *device = NULL;
  139. int command = -1;
  140. int o, longindex;
  141. int retval = 0;
  142. thresholds_t thresholds;
  143. values_t values;
  144. int fd;
  145. static struct option longopts[] = {
  146. {"device", required_argument, 0, 'd'},
  147. {"immediate", no_argument, 0, 'i'},
  148. {"quiet-check", no_argument, 0, 'q'},
  149. {"auto-on", no_argument, 0, '1'},
  150. {"auto-off", no_argument, 0, '0'},
  151. {"nagios", no_argument, 0, 'n'},
  152. {"help", no_argument, 0, 'h'},
  153. {"version", no_argument, 0, 'V'}, {0, 0, 0, 0}
  154. };
  155. setlocale (LC_ALL, "");
  156. bindtextdomain (PACKAGE, LOCALEDIR);
  157. textdomain (PACKAGE);
  158. while (1) {
  159. o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
  160. if (o == -1 || o == EOF || o == 1)
  161. break;
  162. switch (o) {
  163. case 'd':
  164. device = optarg;
  165. break;
  166. case 'q':
  167. command = 3;
  168. break;
  169. case 'i':
  170. command = 2;
  171. break;
  172. case '1':
  173. command = 1;
  174. break;
  175. case '0':
  176. command = 0;
  177. break;
  178. case 'n':
  179. command = 4;
  180. break;
  181. case 'h':
  182. print_help ();
  183. return STATE_OK;
  184. case 'V':
  185. print_revision (progname, revision);
  186. return STATE_OK;
  187. default:
  188. usage2 (_("Unknown argument"), optarg);
  189. }
  190. }
  191. if (optind < argc) {
  192. device = argv[optind];
  193. }
  194. if (!device) {
  195. print_help ();
  196. return STATE_OK;
  197. }
  198. fd = open (device, O_RDONLY);
  199. if (fd < 0) {
  200. printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
  201. return STATE_CRITICAL;
  202. }
  203. if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
  204. printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
  205. return STATE_CRITICAL;
  206. }
  207. switch (command) {
  208. case 0:
  209. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
  210. break;
  211. case 1:
  212. retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
  213. break;
  214. case 2:
  215. retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
  216. break;
  217. case 3:
  218. smart_read_values (fd, &values);
  219. smart_read_thresholds (fd, &thresholds);
  220. retval = values_not_passed (&values, &thresholds);
  221. break;
  222. case 4:
  223. smart_read_values (fd, &values);
  224. smart_read_thresholds (fd, &thresholds);
  225. retval = nagios (&values, &thresholds);
  226. break;
  227. default:
  228. smart_read_values (fd, &values);
  229. smart_read_thresholds (fd, &thresholds);
  230. print_values (&values, &thresholds);
  231. break;
  232. }
  233. close (fd);
  234. return retval;
  235. }
  236. char *
  237. get_offline_text (int status)
  238. {
  239. int i;
  240. for (i = 0; offline_status_text[i].text; i++) {
  241. if (offline_status_text[i].value == status) {
  242. return offline_status_text[i].text;
  243. }
  244. }
  245. return "UNKNOW";
  246. }
  247. int
  248. smart_read_values (int fd, values_t * values)
  249. {
  250. int e;
  251. __u8 args[4 + 512];
  252. args[0] = WIN_SMART;
  253. args[1] = 0;
  254. args[2] = SMART_READ_VALUES;
  255. args[3] = 1;
  256. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  257. e = errno;
  258. printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
  259. return e;
  260. }
  261. memcpy (values, args + 4, 512);
  262. return 0;
  263. }
  264. int
  265. values_not_passed (values_t * p, thresholds_t * t)
  266. {
  267. value_t * value = p->values;
  268. threshold_t * threshold = t->thresholds;
  269. int failed = 0;
  270. int passed = 0;
  271. int i;
  272. for (i = 0; i < NR_ATTRIBUTES; i++) {
  273. if (value->id && threshold->id && value->id == threshold->id) {
  274. if (value->value <= threshold->threshold) {
  275. ++failed;
  276. }
  277. else {
  278. ++passed;
  279. }
  280. }
  281. ++value;
  282. ++threshold;
  283. }
  284. return (passed ? -failed : 2);
  285. }
  286. int
  287. nagios (values_t * p, thresholds_t * t)
  288. {
  289. value_t * value = p->values;
  290. threshold_t * threshold = t->thresholds;
  291. int status = OPERATIONAL;
  292. int prefailure = 0;
  293. int advisory = 0;
  294. int failed = 0;
  295. int passed = 0;
  296. int total = 0;
  297. int i;
  298. for (i = 0; i < NR_ATTRIBUTES; i++) {
  299. if (value->id && threshold->id && value->id == threshold->id) {
  300. if (value->value <= threshold->threshold) {
  301. ++failed;
  302. if (value->status & 1) {
  303. status = PREFAILURE;
  304. ++prefailure;
  305. }
  306. else {
  307. status = ADVISORY;
  308. ++advisory;
  309. }
  310. }
  311. else {
  312. ++passed;
  313. }
  314. ++total;
  315. }
  316. ++value;
  317. ++threshold;
  318. }
  319. switch (status) {
  320. case PREFAILURE:
  321. printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
  322. prefailure,
  323. prefailure > 1 ? 's' : ' ',
  324. failed,
  325. total);
  326. status=STATE_CRITICAL;
  327. break;
  328. case ADVISORY:
  329. printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
  330. advisory,
  331. advisory > 1 ? "ies" : "y",
  332. failed,
  333. total);
  334. status=STATE_WARNING;
  335. break;
  336. case OPERATIONAL:
  337. printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
  338. status=STATE_OK;
  339. break;
  340. default:
  341. printf (_("ERROR - Status '%d' unkown. %d/%d tests passed\n"), status,
  342. passed, total);
  343. status = STATE_UNKNOWN;
  344. break;
  345. }
  346. return status;
  347. }
  348. void
  349. print_value (value_t * p, threshold_t * t)
  350. {
  351. printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
  352. p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
  353. p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
  354. p->value > t->threshold ? "Passed" : "Failed");
  355. }
  356. void
  357. print_values (values_t * p, thresholds_t * t)
  358. {
  359. value_t * value = p->values;
  360. threshold_t * threshold = t->thresholds;
  361. int i;
  362. for (i = 0; i < NR_ATTRIBUTES; i++) {
  363. if (value->id && threshold->id && value->id == threshold->id) {
  364. print_value (value++, threshold++);
  365. }
  366. }
  367. printf
  368. (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
  369. p->offline_status,
  370. get_offline_text (p->offline_status & 0x7f),
  371. (p->offline_status & 0x80 ? "Yes" : "No"),
  372. p->offline_timeout / 60);
  373. printf
  374. (_("OffLineCapability=%d {%s %s %s}\n"),
  375. p->offline_capability,
  376. p->offline_capability & 1 ? "Immediate" : "",
  377. p->offline_capability & 2 ? "Auto" : "",
  378. p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
  379. printf
  380. (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
  381. p->revision,
  382. p->checksum,
  383. p->smart_capability,
  384. p->smart_capability & 1 ? "SaveOnStandBy" : "",
  385. p->smart_capability & 2 ? "AutoSave" : "");
  386. }
  387. void
  388. print_thresholds (thresholds_t * p)
  389. {
  390. threshold_t * threshold = p->thresholds;
  391. int i;
  392. printf ("\n");
  393. printf ("SmartRevision=%d\n", p->revision);
  394. for (i = 0; i < NR_ATTRIBUTES; i++) {
  395. if (threshold->id) {
  396. printf ("Id=%3d, Threshold=%3d\n", threshold->id,
  397. threshold->threshold); }
  398. ++threshold;
  399. }
  400. printf ("CheckSum=%d\n", p->checksum);
  401. }
  402. int
  403. smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error)
  404. {
  405. int e = 0;
  406. __u8 args[4];
  407. args[0] = WIN_SMART;
  408. args[1] = val0;
  409. args[2] = smart_command[command].value;
  410. args[3] = 0;
  411. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  412. e = errno;
  413. if (show_error) {
  414. printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
  415. }
  416. }
  417. return e;
  418. }
  419. int
  420. smart_read_thresholds (int fd, thresholds_t * thresholds)
  421. {
  422. int e;
  423. __u8 args[4 + 512];
  424. args[0] = WIN_SMART;
  425. args[1] = 0;
  426. args[2] = SMART_READ_THRESHOLDS;
  427. args[3] = 1;
  428. if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
  429. e = errno;
  430. printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
  431. return e;
  432. }
  433. memcpy (thresholds, args + 4, 512);
  434. return 0;
  435. }
  436. void
  437. print_help (void)
  438. {
  439. print_revision (progname, revision);
  440. printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
  441. printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
  442. printf (COPYRIGHT, copyright, email);
  443. printf(_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php].\n\n"));
  444. printf ("\
  445. Usage: %s [OPTION] [DEVICE]\n\
  446. -d, --device=DEVICE\n\
  447. Select device DEVICE\n\
  448. Note: if the device is selected with this option, _no_ other options are accepted\n\
  449. -i, --immediate\n\
  450. Perform immediately offline tests\n\
  451. -q, --quiet-check\n\
  452. Returns the number of failed tests\n\
  453. -1, --auto-on\n\
  454. Turn on automatic offline tests\n\
  455. -0, --auto-off\n\
  456. Turn off automatic offline tests\n\
  457. -n, --nagios\n\
  458. Output suitable for Nagios\n", progname);
  459. }
  460. void
  461. print_usage (void)
  462. {
  463. printf ("\
  464. Usage: %s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]\n\
  465. [-O <auto-off>] [-n <nagios>]\n", progname);
  466. }