qdevice-instance.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. /*
  2. * Copyright (c) 2015-2020 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. #include <string.h>
  35. #include <stdio.h>
  36. #include "log.h"
  37. #include "qdevice-config.h"
  38. #include "qdevice-instance.h"
  39. #include "qdevice-heuristics-exec-list.h"
  40. /*TODO Remove this 3 line includes when porting on pr-poll-loop */
  41. #include "qdevice-heuristics.h"
  42. #include "qdevice-heuristics-cmd.h"
  43. #include "qdevice-votequorum.h"
  44. #include "qdevice-model.h"
  45. #include "utils.h"
  46. int
  47. qdevice_instance_init(struct qdevice_instance *instance,
  48. const struct qdevice_advanced_settings *advanced_settings)
  49. {
  50. memset(instance, 0, sizeof(*instance));
  51. node_list_init(&instance->config_node_list);
  52. instance->vq_last_poll = ((time_t) -1);
  53. instance->advanced_settings = advanced_settings;
  54. return (0);
  55. }
  56. int
  57. qdevice_instance_destroy(struct qdevice_instance *instance)
  58. {
  59. node_list_free(&instance->config_node_list);
  60. return (0);
  61. }
  62. int
  63. qdevice_instance_configure_from_cmap_heuristics(struct qdevice_instance *instance)
  64. {
  65. char *str;
  66. long long int lli;
  67. int i;
  68. int res;
  69. cs_error_t cs_err;
  70. cmap_iter_handle_t iter_handle;
  71. char key_name[CMAP_KEYNAME_MAXLEN + 1];
  72. size_t value_len;
  73. cmap_value_types_t type;
  74. struct qdevice_heuristics_exec_list tmp_exec_list;
  75. struct qdevice_heuristics_exec_list *exec_list;
  76. char *command;
  77. char exec_name[CMAP_KEYNAME_MAXLEN + 1];
  78. char tmp_key[CMAP_KEYNAME_MAXLEN + 1];
  79. size_t no_execs;
  80. int send_exec_list;
  81. instance->heuristics_instance.timeout = instance->heartbeat_interval / 2;
  82. if (cmap_get_string(instance->cmap_handle,
  83. "quorum.device.heuristics.timeout", &str) == CS_OK) {
  84. if (utils_strtonum(str, instance->advanced_settings->heuristics_min_timeout,
  85. instance->advanced_settings->heuristics_max_timeout, &lli) == -1) {
  86. log(LOG_ERR, "heuristics.timeout must be valid number in "
  87. "range <%"PRIu32",%"PRIu32">",
  88. instance->advanced_settings->heuristics_min_timeout,
  89. instance->advanced_settings->heuristics_max_timeout);
  90. free(str);
  91. return (-1);
  92. } else {
  93. instance->heuristics_instance.timeout = lli;
  94. }
  95. free(str);
  96. }
  97. instance->heuristics_instance.sync_timeout = instance->sync_heartbeat_interval / 2;
  98. if (cmap_get_string(instance->cmap_handle,
  99. "quorum.device.heuristics.sync_timeout", &str) == CS_OK) {
  100. if (utils_strtonum(str, instance->advanced_settings->heuristics_min_timeout,
  101. instance->advanced_settings->heuristics_max_timeout, &lli) == -1) {
  102. log(LOG_ERR, "heuristics.sync_timeout must be valid number in "
  103. "range <%"PRIu32",%"PRIu32">",
  104. instance->advanced_settings->heuristics_min_timeout,
  105. instance->advanced_settings->heuristics_max_timeout);
  106. free(str);
  107. return (-1);
  108. } else {
  109. instance->heuristics_instance.sync_timeout = lli;
  110. }
  111. free(str);
  112. }
  113. instance->heuristics_instance.interval = instance->heartbeat_interval * 3;
  114. if (cmap_get_string(instance->cmap_handle,
  115. "quorum.device.heuristics.interval", &str) == CS_OK) {
  116. if (utils_strtonum(str, instance->advanced_settings->heuristics_min_interval,
  117. instance->advanced_settings->heuristics_max_interval, &lli) == -1) {
  118. log(LOG_ERR, "heuristics.interval must be valid number in "
  119. "range <%"PRIu32",%"PRIu32">",
  120. instance->advanced_settings->heuristics_min_interval,
  121. instance->advanced_settings->heuristics_max_interval);
  122. free(str);
  123. return (-1);
  124. } else {
  125. instance->heuristics_instance.interval = lli;
  126. }
  127. free(str);
  128. }
  129. instance->heuristics_instance.mode = QDEVICE_DEFAULT_HEURISTICS_MODE;
  130. if (cmap_get_string(instance->cmap_handle, "quorum.device.heuristics.mode", &str) == CS_OK) {
  131. if ((i = utils_parse_bool_str(str)) == -1) {
  132. if (strcasecmp(str, "sync") != 0) {
  133. log(LOG_ERR, "quorum.device.heuristics.mode value is not valid.");
  134. free(str);
  135. return (-1);
  136. } else {
  137. instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_SYNC;
  138. }
  139. } else {
  140. if (i == 1) {
  141. instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_ENABLED;
  142. } else {
  143. instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
  144. }
  145. }
  146. free(str);
  147. }
  148. send_exec_list = 0;
  149. exec_list = NULL;
  150. qdevice_heuristics_exec_list_init(&tmp_exec_list);
  151. if (instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_DISABLED) {
  152. exec_list = NULL;
  153. send_exec_list = 1;
  154. } else if (instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_ENABLED ||
  155. instance->heuristics_instance.mode == QDEVICE_HEURISTICS_MODE_SYNC) {
  156. /*
  157. * Walk thru list of commands to exec
  158. */
  159. cs_err = cmap_iter_init(instance->cmap_handle, "quorum.device.heuristics.exec_", &iter_handle);
  160. if (cs_err != CS_OK) {
  161. log(LOG_ERR, "Can't iterate quorum.device.heuristics.exec_ keys. "
  162. "Error %s", cs_strerror(cs_err));
  163. return (-1);
  164. }
  165. while ((cs_err = cmap_iter_next(instance->cmap_handle, iter_handle, key_name,
  166. &value_len, &type)) == CS_OK) {
  167. if (type != CMAP_VALUETYPE_STRING) {
  168. log(LOG_WARNING, "%s key is not of string type. Ignoring", key_name);
  169. continue ;
  170. }
  171. res = sscanf(key_name, "quorum.device.heuristics.exec_%[^.]%s", exec_name, tmp_key);
  172. if (res != 1) {
  173. log(LOG_WARNING, "%s key is not correct heuristics exec name. Ignoring", key_name);
  174. continue ;
  175. }
  176. cs_err = cmap_get_string(instance->cmap_handle, key_name, &command);
  177. if (cs_err != CS_OK) {
  178. log(LOG_WARNING, "Can't get value of %s key. Ignoring", key_name);
  179. continue ;
  180. }
  181. if (qdevice_heuristics_exec_list_add(&tmp_exec_list, exec_name, command) == NULL) {
  182. log(LOG_WARNING, "Can't store value of %s key into list. Ignoring", key_name);
  183. }
  184. free(command);
  185. }
  186. no_execs = qdevice_heuristics_exec_list_size(&tmp_exec_list);
  187. if (no_execs == 0) {
  188. log(LOG_INFO, "No valid heuristics execs defined. Disabling heuristics.");
  189. instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
  190. exec_list = NULL;
  191. send_exec_list = 1;
  192. } else if (no_execs > instance->advanced_settings->heuristics_max_execs) {
  193. log(LOG_ERR, "Too much (%zu) heuristics execs defined (max is %zu)."
  194. " Disabling heuristics.", no_execs,
  195. instance->advanced_settings->heuristics_max_execs);
  196. instance->heuristics_instance.mode = QDEVICE_HEURISTICS_MODE_DISABLED;
  197. exec_list = NULL;
  198. send_exec_list = 1;
  199. } else if (qdevice_heuristics_exec_list_eq(&tmp_exec_list,
  200. &instance->heuristics_instance.exec_list) == 1) {
  201. log(LOG_DEBUG, "Heuristics list is unchanged");
  202. send_exec_list = 0;
  203. } else {
  204. log(LOG_DEBUG, "Heuristics list changed");
  205. exec_list = &tmp_exec_list;
  206. send_exec_list = 1;
  207. }
  208. } else {
  209. log(LOG_CRIT, "Undefined heuristics mode");
  210. exit(EXIT_FAILURE);
  211. }
  212. if (send_exec_list) {
  213. if (qdevice_heuristics_change_exec_list(&instance->heuristics_instance,
  214. exec_list, instance->sync_in_progress) != 0) {
  215. return (-1);
  216. }
  217. }
  218. qdevice_heuristics_exec_list_free(&tmp_exec_list);
  219. return (0);
  220. }
  221. int
  222. qdevice_instance_configure_from_cmap(struct qdevice_instance *instance)
  223. {
  224. char *str;
  225. if (cmap_get_string(instance->cmap_handle, "quorum.device.model", &str) != CS_OK) {
  226. log(LOG_ERR, "Can't read quorum.device.model cmap key.");
  227. return (-1);
  228. }
  229. if (qdevice_model_str_to_type(str, &instance->model_type) != 0) {
  230. log(LOG_ERR, "Configured device model %s is not supported.", str);
  231. free(str);
  232. return (-1);
  233. }
  234. free(str);
  235. if (cmap_get_uint32(instance->cmap_handle, "runtime.votequorum.this_node_id",
  236. &instance->node_id) != CS_OK) {
  237. log(LOG_ERR, "Unable to retrieve this node nodeid.");
  238. return (-1);
  239. }
  240. if (cmap_get_uint32(instance->cmap_handle, "quorum.device.timeout", &instance->heartbeat_interval) != CS_OK) {
  241. instance->heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
  242. }
  243. if (cmap_get_uint32(instance->cmap_handle, "quorum.device.sync_timeout",
  244. &instance->sync_heartbeat_interval) != CS_OK) {
  245. instance->sync_heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
  246. }
  247. if (qdevice_instance_configure_from_cmap_heuristics(instance) != 0) {
  248. return (-1);
  249. }
  250. return (0);
  251. }
  252. #define QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS 5
  253. int
  254. qdevice_instance_wait_for_initial_heuristics_exec_result(struct qdevice_instance *instance)
  255. {
  256. struct pollfd pfds[QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS];
  257. int no_pfds;
  258. int poll_res;
  259. int timeout;
  260. int i;
  261. int case_processed;
  262. int res;
  263. while (!instance->vq_node_list_initial_heuristics_finished) {
  264. no_pfds = 0;
  265. assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
  266. pfds[no_pfds].fd = instance->heuristics_instance.pipe_log_recv;
  267. pfds[no_pfds].events = POLLIN;
  268. pfds[no_pfds].revents = 0;
  269. no_pfds++;
  270. assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
  271. pfds[no_pfds].fd = instance->heuristics_instance.pipe_cmd_recv;
  272. pfds[no_pfds].events = POLLIN;
  273. pfds[no_pfds].revents = 0;
  274. no_pfds++;
  275. assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
  276. pfds[no_pfds].fd = instance->votequorum_poll_fd;
  277. pfds[no_pfds].events = POLLIN;
  278. pfds[no_pfds].revents = 0;
  279. no_pfds++;
  280. if (!send_buffer_list_empty(&instance->heuristics_instance.cmd_out_buffer_list)) {
  281. assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
  282. pfds[no_pfds].fd = instance->heuristics_instance.pipe_cmd_send;
  283. pfds[no_pfds].events = POLLOUT;
  284. pfds[no_pfds].revents = 0;
  285. no_pfds++;
  286. }
  287. /*
  288. * We know this is never larger than QDEVICE_DEFAULT_HEURISTICS_MAX_TIMEOUT * 2
  289. */
  290. timeout = (int)instance->heuristics_instance.sync_timeout * 2;
  291. poll_res = poll(pfds, no_pfds, timeout);
  292. if (poll_res > 0) {
  293. for (i = 0; i < no_pfds; i++) {
  294. if (pfds[i].revents & POLLIN) {
  295. case_processed = 0;
  296. switch (i) {
  297. case 0:
  298. case_processed = 1;
  299. res = qdevice_heuristics_log_read_from_pipe(&instance->heuristics_instance);
  300. if (res == -1) {
  301. return (-1);
  302. }
  303. break;
  304. case 1:
  305. case_processed = 1;
  306. res = qdevice_heuristics_cmd_read_from_pipe(&instance->heuristics_instance);
  307. if (res == -1) {
  308. return (-1);
  309. }
  310. break;
  311. case 2:
  312. case_processed = 1;
  313. res = qdevice_votequorum_dispatch(instance);
  314. if (res == -1) {
  315. return (-1);
  316. }
  317. case 3:
  318. /*
  319. * Read on heuristics cmd send fs shouldn't happen
  320. */
  321. break;
  322. }
  323. if (!case_processed) {
  324. log(LOG_CRIT, "Unhandled read on poll descriptor %u", i);
  325. exit(EXIT_FAILURE);
  326. }
  327. }
  328. if (pfds[i].revents & POLLOUT) {
  329. case_processed = 0;
  330. switch (i) {
  331. case 0:
  332. case 1:
  333. case 2:
  334. /*
  335. * Write on heuristics log, cmd recv or vq shouldn't happen
  336. */
  337. break;
  338. case 3:
  339. case_processed = 1;
  340. res = qdevice_heuristics_cmd_write(&instance->heuristics_instance);
  341. if (res == -1) {
  342. return (-1);
  343. }
  344. break;
  345. }
  346. if (!case_processed) {
  347. log(LOG_CRIT, "Unhandled write on poll descriptor %u", i);
  348. exit(EXIT_FAILURE);
  349. }
  350. }
  351. if ((pfds[i].revents & (POLLERR|POLLHUP|POLLNVAL)) &&
  352. !(pfds[i].revents & (POLLIN|POLLOUT))) {
  353. switch (i) {
  354. case 0:
  355. case 1:
  356. case 3:
  357. /*
  358. * Closed pipe doesn't mean return of POLLIN. To display
  359. * better log message, we call read log as if POLLIN would
  360. * be set.
  361. */
  362. res = qdevice_heuristics_log_read_from_pipe(&instance->heuristics_instance);
  363. if (res == -1) {
  364. return (-1);
  365. }
  366. log(LOG_ERR, "POLLERR (%u) on heuristics pipe. Exiting",
  367. pfds[i].revents);
  368. return (-1);
  369. break;
  370. case 2:
  371. log(LOG_ERR, "POLLERR (%u) on corosync socket. Exiting",
  372. pfds[i].revents);
  373. return (-1);
  374. break;
  375. }
  376. }
  377. }
  378. } else if (poll_res == 0) {
  379. log(LOG_ERR, "Timeout waiting for initial heuristics exec result");
  380. return (-1);
  381. } else {
  382. log_err(LOG_ERR, "Initial heuristics exec result poll failed");
  383. return (-1);
  384. }
  385. }
  386. return (0);
  387. }