mon.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. /*
  2. * Copyright (c) 2010 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Angus Salkeld <asalkeld@redhat.com>
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. #include <config.h>
  35. #include <unistd.h>
  36. #if defined(HAVE_LIBSTATGRAB)
  37. #include <statgrab.h>
  38. #endif
  39. #include <corosync/corotypes.h>
  40. #include <corosync/corodefs.h>
  41. #include <corosync/lcr/lcr_comp.h>
  42. #include <corosync/engine/coroapi.h>
  43. #include <corosync/list.h>
  44. #include <corosync/totem/coropoll.h>
  45. #include <corosync/engine/logsys.h>
  46. #include "../exec/fsm.h"
  47. LOGSYS_DECLARE_SUBSYS ("MON");
  48. #undef ENTER
  49. #define ENTER() log_printf (LOGSYS_LEVEL_INFO, "%s", __func__)
  50. /*
  51. * Service Interfaces required by service_message_handler struct
  52. */
  53. static int mon_exec_init_fn (
  54. struct corosync_api_v1 *corosync_api);
  55. hdb_handle_t mon_poll = 0;
  56. static struct corosync_api_v1 *api;
  57. static hdb_handle_t resources_obj;
  58. static pthread_t mon_poll_thread;
  59. #define MON_DEFAULT_PERIOD 3
  60. struct corosync_service_engine mon_service_engine = {
  61. .name = "corosync resource monitoring service",
  62. .id = MON_SERVICE,
  63. .priority = 1,
  64. .private_data_size = 0,
  65. .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
  66. .lib_init_fn = NULL,
  67. .lib_exit_fn = NULL,
  68. .lib_engine = NULL,
  69. .lib_engine_count = 0,
  70. .exec_engine = NULL,
  71. .exec_engine_count = 0,
  72. .confchg_fn = NULL,
  73. .exec_init_fn = mon_exec_init_fn,
  74. .exec_dump_fn = NULL,
  75. .sync_mode = CS_SYNC_V2
  76. };
  77. static DECLARE_LIST_INIT (confchg_notify);
  78. struct resource_instance {
  79. hdb_handle_t handle;
  80. const char *name;
  81. poll_timer_handle timer_handle;
  82. void (*update_stats_fn) (void *data);
  83. struct cs_fsm fsm;
  84. int32_t period;
  85. objdb_value_types_t max_type;
  86. union {
  87. int32_t int32;
  88. double dbl;
  89. } max;
  90. };
  91. static void mem_update_stats_fn (void *data);
  92. static void load_update_stats_fn (void *data);
  93. static struct resource_instance memory_used_inst = {
  94. .name = "memory_used",
  95. .update_stats_fn = mem_update_stats_fn,
  96. .max_type = OBJDB_VALUETYPE_INT32,
  97. .max.int32 = INT32_MAX,
  98. .period = MON_DEFAULT_PERIOD,
  99. };
  100. static struct resource_instance load_15min_inst = {
  101. .name = "load_15min",
  102. .update_stats_fn = load_update_stats_fn,
  103. .max_type = OBJDB_VALUETYPE_DOUBLE,
  104. .max.dbl = INT32_MAX,
  105. .period = MON_DEFAULT_PERIOD,
  106. };
  107. /*
  108. * F S M
  109. */
  110. static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
  111. static void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
  112. const char * mon_ok_str = "ok";
  113. const char * mon_failed_str = "failed";
  114. const char * mon_failure_str = "failure";
  115. const char * mon_disabled_str = "disabled";
  116. const char * mon_config_changed_str = "config_changed";
  117. enum mon_resource_state {
  118. MON_S_DISABLED,
  119. MON_S_OK,
  120. MON_S_FAILED
  121. };
  122. enum mon_resource_event {
  123. MON_E_CONFIG_CHANGED,
  124. MON_E_FAILURE
  125. };
  126. struct cs_fsm_entry mon_fsm_table[] = {
  127. { MON_S_DISABLED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_DISABLED, MON_S_OK, -1} },
  128. { MON_S_DISABLED, MON_E_FAILURE, NULL, {-1} },
  129. { MON_S_OK, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_OK, MON_S_DISABLED, -1} },
  130. { MON_S_OK, MON_E_FAILURE, mon_resource_failed, {MON_S_FAILED, -1} },
  131. { MON_S_FAILED, MON_E_CONFIG_CHANGED, mon_config_changed, {MON_S_OK, MON_S_DISABLED, -1} },
  132. { MON_S_FAILED, MON_E_FAILURE, NULL, {-1} },
  133. };
  134. /*
  135. * Dynamic loading descriptor
  136. */
  137. static struct corosync_service_engine *mon_get_service_engine_ver0 (void);
  138. static struct corosync_service_engine_iface_ver0 mon_service_engine_iface = {
  139. .corosync_get_service_engine_ver0 = mon_get_service_engine_ver0
  140. };
  141. static struct lcr_iface corosync_mon_ver0[1] = {
  142. {
  143. .name = "corosync_mon",
  144. .version = 0,
  145. .versions_replace = 0,
  146. .versions_replace_count = 0,
  147. .dependencies = 0,
  148. .dependency_count = 0,
  149. .constructor = NULL,
  150. .destructor = NULL,
  151. .interfaces = NULL,
  152. }
  153. };
  154. static struct lcr_comp mon_comp_ver0 = {
  155. .iface_count = 1,
  156. .ifaces = corosync_mon_ver0
  157. };
  158. static struct corosync_service_engine *mon_get_service_engine_ver0 (void)
  159. {
  160. return (&mon_service_engine);
  161. }
  162. #ifdef COROSYNC_SOLARIS
  163. void corosync_lcr_component_register (void);
  164. void corosync_lcr_component_register (void) {
  165. #else
  166. __attribute__ ((constructor)) static void corosync_lcr_component_register (void) {
  167. #endif
  168. lcr_interfaces_set (&corosync_mon_ver0[0], &mon_service_engine_iface);
  169. lcr_component_register (&mon_comp_ver0);
  170. }
  171. static const char * mon_res_state_to_str(struct cs_fsm* fsm,
  172. int32_t state)
  173. {
  174. switch (state) {
  175. case MON_S_DISABLED:
  176. return mon_disabled_str;
  177. break;
  178. case MON_S_OK:
  179. return mon_ok_str;
  180. break;
  181. case MON_S_FAILED:
  182. return mon_failed_str;
  183. break;
  184. }
  185. return NULL;
  186. }
  187. static const char * mon_res_event_to_str(struct cs_fsm* fsm,
  188. int32_t event)
  189. {
  190. switch (event) {
  191. case MON_E_CONFIG_CHANGED:
  192. return mon_config_changed_str;
  193. break;
  194. case MON_E_FAILURE:
  195. return mon_failure_str;
  196. break;
  197. }
  198. return NULL;
  199. }
  200. static void mon_fsm_state_set (struct cs_fsm* fsm,
  201. enum mon_resource_state next_state, struct resource_instance* inst)
  202. {
  203. enum mon_resource_state prev_state = fsm->curr_state;
  204. const char *state_str;
  205. ENTER();
  206. cs_fsm_state_set(fsm, next_state, inst);
  207. if (prev_state == fsm->curr_state) {
  208. return;
  209. }
  210. state_str = mon_res_state_to_str(fsm, fsm->curr_state);
  211. api->object_key_replace (inst->handle,
  212. "state", strlen ("state"),
  213. state_str, strlen (state_str));
  214. }
  215. static void mon_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
  216. {
  217. struct resource_instance * inst = (struct resource_instance *)data;
  218. char *str;
  219. size_t str_len;
  220. objdb_value_types_t type;
  221. int32_t tmp_value;
  222. int32_t res;
  223. ENTER();
  224. res = api->object_key_get_typed (inst->handle,
  225. "poll_period",
  226. (void**)&str, &str_len,
  227. &type);
  228. if (res == 0) {
  229. tmp_value = strtol (str, NULL, 0);
  230. if (tmp_value > 0 && tmp_value < 120) {
  231. if (inst->period != tmp_value) {
  232. inst->period = tmp_value;
  233. }
  234. }
  235. }
  236. res = api->object_key_get_typed (inst->handle, "max",
  237. (void**)&str, &str_len, &type);
  238. if (res != 0) {
  239. if (inst->max_type == OBJDB_VALUETYPE_INT32) {
  240. inst->max.int32 = INT32_MAX;
  241. } else
  242. if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
  243. inst->max.dbl = INT32_MAX;
  244. }
  245. mon_fsm_state_set (fsm, MON_S_DISABLED, inst);
  246. } else {
  247. if (inst->max_type == OBJDB_VALUETYPE_INT32) {
  248. inst->max.int32 = strtol (str, NULL, 0);
  249. } else
  250. if (inst->max_type == OBJDB_VALUETYPE_DOUBLE) {
  251. inst->max.dbl = strtod (str, NULL);
  252. }
  253. mon_fsm_state_set (fsm, MON_S_OK, inst);
  254. }
  255. if (mon_poll == 0) {
  256. return;
  257. }
  258. poll_timer_delete (mon_poll, inst->timer_handle);
  259. /*
  260. * run the updater, incase the period has shortened
  261. */
  262. inst->update_stats_fn (inst);
  263. poll_timer_add (mon_poll,
  264. inst->period * 1000, NULL,
  265. inst->update_stats_fn,
  266. &inst->timer_handle);
  267. }
  268. void mon_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
  269. {
  270. struct resource_instance * inst = (struct resource_instance *)data;
  271. ENTER();
  272. mon_fsm_state_set (fsm, MON_S_FAILED, inst);
  273. }
  274. static int32_t percent_mem_used_get(void)
  275. {
  276. #if defined(HAVE_LIBSTATGRAB)
  277. sg_mem_stats *mem_stats;
  278. sg_swap_stats *swap_stats;
  279. long long total, freemem;
  280. mem_stats = sg_get_mem_stats();
  281. swap_stats = sg_get_swap_stats();
  282. if (mem_stats == NULL || swap_stats != NULL) {
  283. log_printf (LOGSYS_LEVEL_ERROR, "Unable to get memory stats: %s\n",
  284. sg_str_error(sg_get_error()));
  285. return -1;
  286. }
  287. total = mem_stats->total + swap_stats->total;
  288. freemem = mem_stats->free + swap_stats->free;
  289. return ((total - freemem) * 100) / total;
  290. #else
  291. #if defined(COROSYNC_LINUX)
  292. char *line_ptr;
  293. char line[512];
  294. unsigned long long value;
  295. FILE *f;
  296. long long total = 0;
  297. long long freemem = 0;
  298. if ((f = fopen("/proc/meminfo", "r")) == NULL) {
  299. return -1;
  300. }
  301. while ((line_ptr = fgets(line, sizeof(line), f)) != NULL) {
  302. if (sscanf(line_ptr, "%*s %llu kB", &value) != 1) {
  303. continue;
  304. }
  305. value *= 1024;
  306. if (strncmp(line_ptr, "MemTotal:", 9) == 0) {
  307. total += value;
  308. } else if (strncmp(line_ptr, "MemFree:", 8) == 0) {
  309. freemem += value;
  310. } else if (strncmp(line_ptr, "SwapTotal:", 10) == 0) {
  311. total += value;
  312. } else if (strncmp(line_ptr, "SwapFree:", 9) == 0) {
  313. freemem += value;
  314. }
  315. }
  316. fclose(f);
  317. return ((total - freemem) * 100) / total;
  318. #else
  319. #error need libstatgrab or linux.
  320. #endif /* COROSYNC_LINUX */
  321. #endif /* HAVE_LIBSTATGRAB */
  322. }
  323. static void mem_update_stats_fn (void *data)
  324. {
  325. struct resource_instance * inst = (struct resource_instance *)data;
  326. int32_t new_value;
  327. uint64_t timestamp;
  328. new_value = percent_mem_used_get();
  329. if (new_value > 0) {
  330. api->object_key_replace (inst->handle,
  331. "current", strlen("current"),
  332. &new_value, sizeof(new_value));
  333. timestamp = time (NULL);
  334. api->object_key_replace (inst->handle,
  335. "last_updated", strlen("last_updated"),
  336. &timestamp, sizeof(time_t));
  337. if (new_value > inst->max.int32) {
  338. cs_fsm_process (&inst->fsm, MON_E_FAILURE, inst);
  339. }
  340. }
  341. poll_timer_add (mon_poll,
  342. inst->period * 1000, inst,
  343. inst->update_stats_fn,
  344. &inst->timer_handle);
  345. }
  346. static double min15_loadavg_get(void)
  347. {
  348. #if defined(HAVE_LIBSTATGRAB)
  349. sg_load_stats *load_stats;
  350. load_stats = sg_get_load_stats ();
  351. if (load_stats == NULL) {
  352. log_printf (LOGSYS_LEVEL_ERROR, "Unable to get load stats: %s\n",
  353. sg_str_error (sg_get_error()));
  354. return -1;
  355. }
  356. return load_stats->min15;
  357. #else
  358. #if defined(COROSYNC_LINUX)
  359. double loadav[3];
  360. if (getloadavg(loadav,3) < 0) {
  361. return -1;
  362. }
  363. return loadav[2];
  364. #else
  365. #error need libstatgrab or linux.
  366. #endif /* COROSYNC_LINUX */
  367. #endif /* HAVE_LIBSTATGRAB */
  368. }
  369. static void load_update_stats_fn (void *data)
  370. {
  371. struct resource_instance * inst = (struct resource_instance *)data;
  372. uint64_t timestamp;
  373. int32_t res = 0;
  374. double min15 = min15_loadavg_get();
  375. if (min15 < 0) {
  376. }
  377. res = api->object_key_replace (inst->handle,
  378. "current", strlen("current"),
  379. &min15, sizeof (min15));
  380. if (res != 0)
  381. log_printf (LOGSYS_LEVEL_ERROR, "replace current failed: %d", res);
  382. timestamp = cs_timestamp_get();
  383. res = api->object_key_replace (inst->handle,
  384. "last_updated", strlen("last_updated"),
  385. &timestamp, sizeof(uint64_t));
  386. if (res != 0)
  387. log_printf (LOGSYS_LEVEL_ERROR, "replace last_updated failed: %d", res);
  388. if (min15 > inst->max.dbl) {
  389. cs_fsm_process (&inst->fsm, MON_E_FAILURE, &inst);
  390. }
  391. poll_timer_add (mon_poll,
  392. inst->period * 1000, inst,
  393. inst->update_stats_fn,
  394. &inst->timer_handle);
  395. }
  396. static void *mon_thread_handler (void * unused)
  397. {
  398. #ifdef HAVE_LIBSTATGRAB
  399. sg_init();
  400. #endif /* HAVE_LIBSTATGRAB */
  401. mon_poll = poll_create ();
  402. poll_timer_add (mon_poll,
  403. memory_used_inst.period * 1000,
  404. &memory_used_inst,
  405. memory_used_inst.update_stats_fn,
  406. &memory_used_inst.timer_handle);
  407. poll_timer_add (mon_poll,
  408. load_15min_inst.period * 1000,
  409. &load_15min_inst,
  410. load_15min_inst.update_stats_fn,
  411. &load_15min_inst.timer_handle);
  412. poll_run (mon_poll);
  413. return NULL;
  414. }
  415. static int object_find_or_create (
  416. hdb_handle_t parent_object_handle,
  417. hdb_handle_t *object_handle,
  418. const void *object_name,
  419. size_t object_name_len)
  420. {
  421. hdb_handle_t obj_finder;
  422. hdb_handle_t obj;
  423. int ret = -1;
  424. api->object_find_create (
  425. parent_object_handle,
  426. object_name,
  427. object_name_len,
  428. &obj_finder);
  429. if (api->object_find_next (obj_finder, &obj) == 0) {
  430. /* found it */
  431. *object_handle = obj;
  432. ret = 0;
  433. }
  434. else {
  435. ret = api->object_create (parent_object_handle,
  436. object_handle,
  437. object_name, object_name_len);
  438. }
  439. api->object_find_destroy (obj_finder);
  440. return ret;
  441. }
  442. static void mon_key_change_notify (object_change_type_t change_type,
  443. hdb_handle_t parent_object_handle,
  444. hdb_handle_t object_handle,
  445. const void *object_name_pt, size_t object_name_len,
  446. const void *key_name_pt, size_t key_len,
  447. const void *key_value_pt, size_t key_value_len,
  448. void *priv_data_pt)
  449. {
  450. struct resource_instance* inst = (struct resource_instance*)priv_data_pt;
  451. if ((strcmp ((char*)key_name_pt, "max") == 0) ||
  452. (strcmp ((char*)key_name_pt, "poll_period") == 0)) {
  453. ENTER();
  454. cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
  455. }
  456. }
  457. static void mon_instance_init (hdb_handle_t parent, struct resource_instance* inst)
  458. {
  459. int32_t res;
  460. char mon_period_str[32];
  461. size_t mon_period_len;
  462. objdb_value_types_t mon_period_type;
  463. int32_t tmp_value;
  464. int32_t zero_32 = 0;
  465. time_t zero_64 = 0;
  466. double zero_double = 0;
  467. ENTER();
  468. object_find_or_create (parent,
  469. &inst->handle,
  470. inst->name, strlen (inst->name));
  471. if (inst->max_type == OBJDB_VALUETYPE_INT32) {
  472. api->object_key_create_typed (inst->handle,
  473. "current", &zero_32,
  474. sizeof (zero_32), inst->max_type);
  475. } else {
  476. api->object_key_create_typed (inst->handle,
  477. "current", &zero_double,
  478. sizeof (zero_double), inst->max_type);
  479. }
  480. api->object_key_create_typed (inst->handle,
  481. "last_updated", &zero_64,
  482. sizeof (time_t), OBJDB_VALUETYPE_INT64);
  483. api->object_key_create_typed (inst->handle,
  484. "state", mon_disabled_str, strlen (mon_disabled_str),
  485. OBJDB_VALUETYPE_STRING);
  486. inst->fsm.name = inst->name;
  487. inst->fsm.curr_entry = 0;
  488. inst->fsm.curr_state = MON_S_DISABLED;
  489. inst->fsm.table = mon_fsm_table;
  490. inst->fsm.entries = sizeof(mon_fsm_table) / sizeof(struct cs_fsm_entry);
  491. inst->fsm.state_to_str = mon_res_state_to_str;
  492. inst->fsm.event_to_str = mon_res_event_to_str;
  493. res = api->object_key_get_typed (inst->handle,
  494. "poll_period",
  495. (void**)&mon_period_str, &mon_period_len,
  496. &mon_period_type);
  497. if (res != 0) {
  498. mon_period_len = snprintf (mon_period_str, 32, "%d",
  499. inst->period);
  500. api->object_key_create_typed (inst->handle,
  501. "poll_period", &mon_period_str,
  502. mon_period_len,
  503. OBJDB_VALUETYPE_STRING);
  504. }
  505. else {
  506. tmp_value = strtol (mon_period_str, NULL, 0);
  507. if (tmp_value > 0 && tmp_value < 120)
  508. inst->period = tmp_value;
  509. }
  510. cs_fsm_process (&inst->fsm, MON_E_CONFIG_CHANGED, inst);
  511. poll_timer_add (mon_poll,
  512. inst->period * 1000, inst,
  513. inst->update_stats_fn,
  514. &inst->timer_handle);
  515. api->object_track_start (inst->handle, OBJECT_TRACK_DEPTH_ONE,
  516. mon_key_change_notify,
  517. NULL, NULL, NULL, NULL);
  518. }
  519. static int mon_exec_init_fn (
  520. struct corosync_api_v1 *corosync_api)
  521. {
  522. hdb_handle_t obj;
  523. hdb_handle_t parent;
  524. #ifdef COROSYNC_SOLARIS
  525. logsys_subsys_init();
  526. #endif
  527. api = corosync_api;
  528. ENTER();
  529. object_find_or_create (OBJECT_PARENT_HANDLE,
  530. &resources_obj,
  531. "resources", strlen ("resources"));
  532. object_find_or_create (resources_obj,
  533. &obj,
  534. "system", strlen ("system"));
  535. parent = obj;
  536. mon_instance_init (parent, &memory_used_inst);
  537. mon_instance_init (parent, &load_15min_inst);
  538. pthread_create (&mon_poll_thread, NULL, mon_thread_handler, NULL);
  539. return 0;
  540. }