sam.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /*
  2. * Copyright (c) 2009-2010 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. /*
  35. * Provides a SAM API
  36. */
  37. #include <config.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include <unistd.h>
  41. #include <sys/types.h>
  42. #include <sys/socket.h>
  43. #include <errno.h>
  44. #include <corosync/corotypes.h>
  45. #include <corosync/coroipc_types.h>
  46. #include <corosync/coroipcc.h>
  47. #include <corosync/corodefs.h>
  48. #include <corosync/hdb.h>
  49. #include <corosync/sam.h>
  50. #include "util.h"
  51. #include <stdio.h>
  52. #include <sys/wait.h>
  53. #include <signal.h>
  54. enum sam_internal_status_t {
  55. SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
  56. SAM_INTERNAL_STATUS_INITIALIZED,
  57. SAM_INTERNAL_STATUS_REGISTERED,
  58. SAM_INTERNAL_STATUS_STARTED,
  59. SAM_INTERNAL_STATUS_FINALIZED
  60. };
  61. enum sam_command_t {
  62. SAM_COMMAND_START,
  63. SAM_COMMAND_STOP,
  64. SAM_COMMAND_HB
  65. };
  66. enum sam_parent_action_t {
  67. SAM_PARENT_ACTION_ERROR,
  68. SAM_PARENT_ACTION_RECOVERY,
  69. SAM_PARENT_ACTION_QUIT,
  70. SAM_PARENT_ACTION_CONTINUE
  71. };
  72. static struct {
  73. int time_interval;
  74. sam_recovery_policy_t recovery_policy;
  75. enum sam_internal_status_t internal_status;
  76. unsigned int instance_id;
  77. int parent_fd;
  78. int term_send;
  79. int warn_signal;
  80. sam_hc_callback_t hc_callback;
  81. pthread_t cb_thread;
  82. int cb_rpipe_fd, cb_wpipe_fd;
  83. int cb_registered;
  84. } sam_internal_data;
  85. cs_error_t sam_initialize (
  86. int time_interval,
  87. sam_recovery_policy_t recovery_policy)
  88. {
  89. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_NOT_INITIALIZED) {
  90. return (CS_ERR_BAD_HANDLE);
  91. }
  92. if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART) {
  93. return (CS_ERR_INVALID_PARAM);
  94. }
  95. sam_internal_data.recovery_policy = recovery_policy;
  96. sam_internal_data.time_interval = time_interval;
  97. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_INITIALIZED;
  98. sam_internal_data.warn_signal = SIGTERM;
  99. return (CS_OK);
  100. }
  101. /*
  102. * Wrapper on top of write(2) function. It handles EAGAIN and EINTR states and sends whole buffer if possible.
  103. */
  104. static size_t sam_safe_write (
  105. int d,
  106. const void *buf,
  107. size_t nbyte)
  108. {
  109. ssize_t bytes_write;
  110. ssize_t tmp_bytes_write;
  111. bytes_write = 0;
  112. do {
  113. tmp_bytes_write = write (d, (const char *)buf + bytes_write, nbyte - bytes_write);
  114. if (tmp_bytes_write == -1) {
  115. if (!(errno == EAGAIN || errno == EINTR))
  116. return -1;
  117. } else {
  118. bytes_write += tmp_bytes_write;
  119. }
  120. } while (bytes_write != nbyte);
  121. return bytes_write;
  122. }
  123. cs_error_t sam_start (void)
  124. {
  125. char command;
  126. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
  127. return (CS_ERR_BAD_HANDLE);
  128. }
  129. command = SAM_COMMAND_START;
  130. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  131. return (CS_ERR_LIBRARY);
  132. if (sam_internal_data.hc_callback)
  133. if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) == -1)
  134. return (CS_ERR_LIBRARY);
  135. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_STARTED;
  136. return (CS_OK);
  137. }
  138. cs_error_t sam_stop (void)
  139. {
  140. char command;
  141. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  142. return (CS_ERR_BAD_HANDLE);
  143. }
  144. command = SAM_COMMAND_STOP;
  145. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  146. return (CS_ERR_LIBRARY);
  147. if (sam_internal_data.hc_callback)
  148. if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) == -1)
  149. return (CS_ERR_LIBRARY);
  150. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
  151. return (CS_OK);
  152. }
  153. cs_error_t sam_hc_send (void)
  154. {
  155. char command;
  156. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  157. return (CS_ERR_BAD_HANDLE);
  158. }
  159. command = SAM_COMMAND_HB;
  160. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  161. return (CS_ERR_LIBRARY);
  162. return (CS_OK);
  163. }
  164. cs_error_t sam_finalize (void)
  165. {
  166. cs_error_t error;
  167. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED &&
  168. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED &&
  169. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  170. return (CS_ERR_BAD_HANDLE);
  171. }
  172. if (sam_internal_data.internal_status == SAM_INTERNAL_STATUS_STARTED) {
  173. error = sam_stop ();
  174. if (error != CS_OK)
  175. goto exit_error;
  176. }
  177. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_FINALIZED;
  178. exit_error:
  179. return (CS_OK);
  180. }
  181. cs_error_t sam_warn_signal_set (int warn_signal)
  182. {
  183. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED &&
  184. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED &&
  185. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  186. return (CS_ERR_BAD_HANDLE);
  187. }
  188. sam_internal_data.warn_signal = warn_signal;
  189. return (CS_OK);
  190. }
  191. static enum sam_parent_action_t sam_parent_handler (int pipe_fd, pid_t child_pid)
  192. {
  193. int poll_error;
  194. int action;
  195. int status;
  196. ssize_t bytes_read;
  197. char command;
  198. int time_interval;
  199. struct pollfd pfds;
  200. status = 0;
  201. action = SAM_PARENT_ACTION_CONTINUE;
  202. while (action == SAM_PARENT_ACTION_CONTINUE) {
  203. pfds.fd = pipe_fd;
  204. pfds.events = POLLIN;
  205. pfds.revents = 0;
  206. if (status == 1 && sam_internal_data.time_interval != 0) {
  207. time_interval = sam_internal_data.time_interval;
  208. } else {
  209. time_interval = -1;
  210. }
  211. poll_error = poll (&pfds, 1, time_interval);
  212. if (poll_error == -1) {
  213. /*
  214. * Error in poll
  215. * If it is EINTR, continue, otherwise QUIT
  216. */
  217. if (errno != EINTR) {
  218. action = SAM_PARENT_ACTION_ERROR;
  219. }
  220. }
  221. if (poll_error == 0) {
  222. /*
  223. * Time limit expires
  224. */
  225. if (status == 0) {
  226. action = SAM_PARENT_ACTION_QUIT;
  227. } else {
  228. /*
  229. * Kill child process
  230. */
  231. if (!sam_internal_data.term_send) {
  232. /*
  233. * We didn't send warn_signal yet.
  234. */
  235. kill (child_pid, sam_internal_data.warn_signal);
  236. sam_internal_data.term_send = 1;
  237. } else {
  238. /*
  239. * We sent child warning. Now, we will not be so nice
  240. */
  241. kill (child_pid, SIGKILL);
  242. action = SAM_PARENT_ACTION_RECOVERY;
  243. }
  244. }
  245. }
  246. if (poll_error > 0) {
  247. /*
  248. * We have EOF or command in pipe
  249. */
  250. bytes_read = read (pipe_fd, &command, 1);
  251. if (bytes_read == 0) {
  252. /*
  253. * Handle EOF -> Take recovery action or quit if sam_start wasn't called
  254. */
  255. if (status == 0)
  256. action = SAM_PARENT_ACTION_QUIT;
  257. else
  258. action = SAM_PARENT_ACTION_RECOVERY;
  259. continue;
  260. }
  261. if (bytes_read == -1) {
  262. /*
  263. * Something really bad happened in read side
  264. */
  265. if (errno == EAGAIN || errno == EINTR) {
  266. continue;
  267. }
  268. action = SAM_PARENT_ACTION_ERROR;
  269. goto action_exit;
  270. }
  271. /*
  272. * We have read command -> take status
  273. */
  274. switch (status) {
  275. case 0:
  276. /*
  277. * Not started yet
  278. */
  279. if (command == SAM_COMMAND_START)
  280. status = 1;
  281. break;
  282. case 1:
  283. /*
  284. * Started
  285. */
  286. if (command == SAM_COMMAND_STOP)
  287. status = 0;
  288. break;
  289. }
  290. } /* select_error > 0 */
  291. } /* action == SAM_PARENT_ACTION_CONTINUE */
  292. action_exit:
  293. return action;
  294. }
  295. cs_error_t sam_register (
  296. unsigned int *instance_id)
  297. {
  298. cs_error_t error;
  299. pid_t pid;
  300. int pipe_error;
  301. int pipe_fd[2];
  302. enum sam_parent_action_t action;
  303. int child_status;
  304. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) {
  305. return (CS_ERR_BAD_HANDLE);
  306. }
  307. error = CS_OK;
  308. while (1) {
  309. pipe_error = pipe (pipe_fd);
  310. if (pipe_error != 0) {
  311. /*
  312. * Pipe creation error
  313. */
  314. error = CS_ERR_LIBRARY;
  315. goto error_exit;
  316. }
  317. sam_internal_data.instance_id++;
  318. sam_internal_data.term_send = 0;
  319. pid = fork ();
  320. if (pid == -1) {
  321. /*
  322. * Fork error
  323. */
  324. sam_internal_data.instance_id--;
  325. error = CS_ERR_LIBRARY;
  326. goto error_exit;
  327. }
  328. if (pid == 0) {
  329. /*
  330. * Child process
  331. */
  332. close (pipe_fd[0]);
  333. sam_internal_data.parent_fd = pipe_fd[1];
  334. if (instance_id)
  335. *instance_id = sam_internal_data.instance_id;
  336. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
  337. goto error_exit;
  338. } else {
  339. /*
  340. * Parent process
  341. */
  342. close (pipe_fd[1]);
  343. action = sam_parent_handler (pipe_fd[0], pid);
  344. close (pipe_fd[0]);
  345. if (action == SAM_PARENT_ACTION_ERROR) {
  346. error = CS_ERR_LIBRARY;
  347. goto error_exit;
  348. }
  349. /*
  350. * We really don't like zombies
  351. */
  352. while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
  353. ;
  354. if (action == SAM_PARENT_ACTION_RECOVERY) {
  355. if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT)
  356. action = SAM_PARENT_ACTION_QUIT;
  357. }
  358. if (action == SAM_PARENT_ACTION_QUIT) {
  359. exit (WEXITSTATUS (child_status));
  360. }
  361. }
  362. }
  363. error_exit:
  364. return (error);
  365. }
  366. static void *hc_callback_thread (void *unused_param)
  367. {
  368. int poll_error;
  369. int status;
  370. ssize_t bytes_readed;
  371. char command;
  372. int time_interval, tmp_time_interval;
  373. int counter;
  374. struct pollfd pfds;
  375. status = 0;
  376. counter = 0;
  377. time_interval = sam_internal_data.time_interval >> 2;
  378. while (1) {
  379. pfds.fd = sam_internal_data.cb_rpipe_fd;
  380. pfds.events = POLLIN;
  381. pfds.revents = 0;
  382. if (status == 1) {
  383. tmp_time_interval = time_interval;
  384. } else {
  385. tmp_time_interval = -1;
  386. }
  387. poll_error = poll (&pfds, 1, tmp_time_interval);
  388. if (poll_error == 0) {
  389. sam_hc_send ();
  390. counter++;
  391. if (counter >= 4) {
  392. if (sam_internal_data.hc_callback () != 0) {
  393. status = 3;
  394. }
  395. counter = 0;
  396. }
  397. }
  398. if (poll_error > 0) {
  399. bytes_readed = read (sam_internal_data.cb_rpipe_fd, &command, 1);
  400. if (bytes_readed > 0) {
  401. if (status == 0 && command == SAM_COMMAND_START)
  402. status = 1;
  403. if (status == 1 && command == SAM_COMMAND_STOP)
  404. status = 0;
  405. }
  406. }
  407. }
  408. /*
  409. * This makes compiler happy, it's same as return (NULL);
  410. */
  411. return (unused_param);
  412. }
  413. cs_error_t sam_hc_callback_register (sam_hc_callback_t cb)
  414. {
  415. cs_error_t error = CS_OK;
  416. pthread_attr_t thread_attr;
  417. int pipe_error;
  418. int pipe_fd[2];
  419. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
  420. return (CS_ERR_BAD_HANDLE);
  421. }
  422. if (sam_internal_data.time_interval == 0) {
  423. return (CS_ERR_INVALID_PARAM);
  424. }
  425. if (sam_internal_data.cb_registered) {
  426. sam_internal_data.hc_callback = cb;
  427. return (CS_OK);
  428. }
  429. /*
  430. * We know, this is first registration
  431. */
  432. if (cb == NULL) {
  433. return (CS_ERR_INVALID_PARAM);
  434. }
  435. pipe_error = pipe (pipe_fd);
  436. if (pipe_error != 0) {
  437. /*
  438. * Pipe creation error
  439. */
  440. error = CS_ERR_LIBRARY;
  441. goto error_exit;
  442. }
  443. sam_internal_data.cb_rpipe_fd = pipe_fd[0];
  444. sam_internal_data.cb_wpipe_fd = pipe_fd[1];
  445. /*
  446. * Create thread attributes
  447. */
  448. error = pthread_attr_init (&thread_attr);
  449. if (error != 0) {
  450. error = CS_ERR_LIBRARY;
  451. goto error_close_fd_exit;
  452. }
  453. pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
  454. pthread_attr_setstacksize (&thread_attr, 32768);
  455. /*
  456. * Create thread
  457. */
  458. error = pthread_create (&sam_internal_data.cb_thread, &thread_attr, hc_callback_thread, NULL);
  459. if (error != 0) {
  460. error = CS_ERR_LIBRARY;
  461. goto error_attr_destroy_exit;
  462. }
  463. /*
  464. * Cleanup
  465. */
  466. pthread_attr_destroy(&thread_attr);
  467. sam_internal_data.cb_registered = 1;
  468. sam_internal_data.hc_callback = cb;
  469. return (CS_OK);
  470. error_attr_destroy_exit:
  471. pthread_attr_destroy(&thread_attr);
  472. error_close_fd_exit:
  473. sam_internal_data.cb_rpipe_fd = sam_internal_data.cb_wpipe_fd = 0;
  474. close (pipe_fd[0]);
  475. close (pipe_fd[1]);
  476. error_exit:
  477. return (error);
  478. }