sam.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /*
  2. * Copyright (c) 2009 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. /*
  35. * Provides a SAM API
  36. */
  37. #include <config.h>
  38. #include <stdlib.h>
  39. #include <string.h>
  40. #include <unistd.h>
  41. #include <sys/types.h>
  42. #include <sys/socket.h>
  43. #include <errno.h>
  44. #include <corosync/corotypes.h>
  45. #include <corosync/coroipc_types.h>
  46. #include <corosync/coroipcc.h>
  47. #include <corosync/corodefs.h>
  48. #include <corosync/hdb.h>
  49. #include <corosync/sam.h>
  50. #include "util.h"
  51. #include <stdio.h>
  52. #include <sys/wait.h>
  53. #include <signal.h>
  54. enum sam_internal_status_t {
  55. SAM_INTERNAL_STATUS_NOT_INITIALIZED = 0,
  56. SAM_INTERNAL_STATUS_INITIALIZED,
  57. SAM_INTERNAL_STATUS_REGISTERED,
  58. SAM_INTERNAL_STATUS_STARTED,
  59. SAM_INTERNAL_STATUS_FINALIZED
  60. };
  61. enum sam_command_t {
  62. SAM_COMMAND_START,
  63. SAM_COMMAND_STOP,
  64. SAM_COMMAND_HB
  65. };
  66. enum sam_parent_action_t {
  67. SAM_PARENT_ACTION_ERROR,
  68. SAM_PARENT_ACTION_RECOVERY,
  69. SAM_PARENT_ACTION_QUIT,
  70. SAM_PARENT_ACTION_CONTINUE
  71. };
  72. static struct {
  73. int time_interval;
  74. sam_recovery_policy_t recovery_policy;
  75. enum sam_internal_status_t internal_status;
  76. unsigned int instance_id;
  77. int parent_fd;
  78. int term_send;
  79. sam_hc_callback_t hc_callback;
  80. pthread_t cb_thread;
  81. int cb_rpipe_fd, cb_wpipe_fd;
  82. int cb_registered;
  83. } sam_internal_data;
  84. cs_error_t sam_initialize (
  85. int time_interval,
  86. sam_recovery_policy_t recovery_policy)
  87. {
  88. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_NOT_INITIALIZED) {
  89. return (CS_ERR_BAD_HANDLE);
  90. }
  91. if (recovery_policy != SAM_RECOVERY_POLICY_QUIT && recovery_policy != SAM_RECOVERY_POLICY_RESTART) {
  92. return (CS_ERR_INVALID_PARAM);
  93. }
  94. sam_internal_data.recovery_policy = recovery_policy;
  95. sam_internal_data.time_interval = time_interval;
  96. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_INITIALIZED;
  97. return (CS_OK);
  98. }
  99. /*
  100. * Wrapper on top of write(2) function. It handles EAGAIN and EINTR states and sends whole buffer if possible.
  101. */
  102. static size_t sam_safe_write (
  103. int d,
  104. const void *buf,
  105. size_t nbyte)
  106. {
  107. ssize_t bytes_write;
  108. ssize_t tmp_bytes_write;
  109. bytes_write = 0;
  110. do {
  111. tmp_bytes_write = write (d, (const char *)buf + bytes_write, nbyte - bytes_write);
  112. if (tmp_bytes_write == -1) {
  113. if (!(errno == EAGAIN || errno == EINTR))
  114. return -1;
  115. } else {
  116. bytes_write += tmp_bytes_write;
  117. }
  118. } while (bytes_write != nbyte);
  119. return bytes_write;
  120. }
  121. cs_error_t sam_start (void)
  122. {
  123. char command;
  124. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
  125. return (CS_ERR_BAD_HANDLE);
  126. }
  127. command = SAM_COMMAND_START;
  128. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  129. return (CS_ERR_LIBRARY);
  130. if (sam_internal_data.hc_callback)
  131. if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) == -1)
  132. return (CS_ERR_LIBRARY);
  133. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_STARTED;
  134. return (CS_OK);
  135. }
  136. cs_error_t sam_stop (void)
  137. {
  138. char command;
  139. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  140. return (CS_ERR_BAD_HANDLE);
  141. }
  142. command = SAM_COMMAND_STOP;
  143. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  144. return (CS_ERR_LIBRARY);
  145. if (sam_internal_data.hc_callback)
  146. if (sam_safe_write (sam_internal_data.cb_wpipe_fd, &command, 1) == -1)
  147. return (CS_ERR_LIBRARY);
  148. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
  149. return (CS_OK);
  150. }
  151. cs_error_t sam_hc_send (void)
  152. {
  153. char command;
  154. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  155. return (CS_ERR_BAD_HANDLE);
  156. }
  157. command = SAM_COMMAND_HB;
  158. if (sam_safe_write (sam_internal_data.parent_fd, &command, 1) == -1)
  159. return (CS_ERR_LIBRARY);
  160. return (CS_OK);
  161. }
  162. cs_error_t sam_finalize (void)
  163. {
  164. cs_error_t error;
  165. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED &&
  166. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED &&
  167. sam_internal_data.internal_status != SAM_INTERNAL_STATUS_STARTED) {
  168. return (CS_ERR_BAD_HANDLE);
  169. }
  170. if (sam_internal_data.internal_status == SAM_INTERNAL_STATUS_STARTED) {
  171. error = sam_stop ();
  172. if (error != CS_OK)
  173. goto exit_error;
  174. }
  175. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_FINALIZED;
  176. exit_error:
  177. return (CS_OK);
  178. }
  179. static enum sam_parent_action_t sam_parent_handler (int pipe_fd, pid_t child_pid)
  180. {
  181. int poll_error;
  182. int action;
  183. int status;
  184. ssize_t bytes_read;
  185. char command;
  186. int time_interval;
  187. struct pollfd pfds;
  188. status = 0;
  189. action = SAM_PARENT_ACTION_CONTINUE;
  190. while (action == SAM_PARENT_ACTION_CONTINUE) {
  191. pfds.fd = pipe_fd;
  192. pfds.events = POLLIN;
  193. pfds.revents = 0;
  194. if (status == 1 && sam_internal_data.time_interval != 0) {
  195. time_interval = sam_internal_data.time_interval;
  196. } else {
  197. time_interval = -1;
  198. }
  199. poll_error = poll (&pfds, 1, time_interval);
  200. if (poll_error == -1) {
  201. /*
  202. * Error in poll
  203. * If it is EINTR, continue, otherwise QUIT
  204. */
  205. if (errno != EINTR) {
  206. action = SAM_PARENT_ACTION_ERROR;
  207. }
  208. }
  209. if (poll_error == 0) {
  210. /*
  211. * Time limit expires
  212. */
  213. if (status == 0) {
  214. action = SAM_PARENT_ACTION_QUIT;
  215. } else {
  216. /*
  217. * Kill child process
  218. */
  219. if (!sam_internal_data.term_send) {
  220. /*
  221. * We didn't send SIGTERM (warning) yet.
  222. */
  223. kill (child_pid, SIGTERM);
  224. sam_internal_data.term_send = 1;
  225. } else {
  226. /*
  227. * We sent child warning. Now, we will not be so nice
  228. */
  229. kill (child_pid, SIGKILL);
  230. action = SAM_PARENT_ACTION_RECOVERY;
  231. }
  232. }
  233. }
  234. if (poll_error > 0) {
  235. /*
  236. * We have EOF or command in pipe
  237. */
  238. bytes_read = read (pipe_fd, &command, 1);
  239. if (bytes_read == 0) {
  240. /*
  241. * Handle EOF -> Take recovery action or quit if sam_start wasn't called
  242. */
  243. if (status == 0)
  244. action = SAM_PARENT_ACTION_QUIT;
  245. else
  246. action = SAM_PARENT_ACTION_RECOVERY;
  247. continue;
  248. }
  249. if (bytes_read == -1) {
  250. /*
  251. * Something really bad happened in read side
  252. */
  253. if (errno == EAGAIN || errno == EINTR) {
  254. continue;
  255. }
  256. action = SAM_PARENT_ACTION_ERROR;
  257. goto action_exit;
  258. }
  259. /*
  260. * We have read command -> take status
  261. */
  262. switch (status) {
  263. case 0:
  264. /*
  265. * Not started yet
  266. */
  267. if (command == SAM_COMMAND_START)
  268. status = 1;
  269. break;
  270. case 1:
  271. /*
  272. * Started
  273. */
  274. if (command == SAM_COMMAND_STOP)
  275. status = 0;
  276. break;
  277. }
  278. } /* select_error > 0 */
  279. } /* action == SAM_PARENT_ACTION_CONTINUE */
  280. action_exit:
  281. return action;
  282. }
  283. cs_error_t sam_register (
  284. unsigned int *instance_id)
  285. {
  286. cs_error_t error;
  287. pid_t pid;
  288. int pipe_error;
  289. int pipe_fd[2];
  290. enum sam_parent_action_t action;
  291. int child_status;
  292. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_INITIALIZED) {
  293. return (CS_ERR_BAD_HANDLE);
  294. }
  295. error = CS_OK;
  296. while (1) {
  297. pipe_error = pipe (pipe_fd);
  298. if (pipe_error != 0) {
  299. /*
  300. * Pipe creation error
  301. */
  302. error = CS_ERR_LIBRARY;
  303. goto error_exit;
  304. }
  305. sam_internal_data.instance_id++;
  306. sam_internal_data.term_send = 0;
  307. pid = fork ();
  308. if (pid == -1) {
  309. /*
  310. * Fork error
  311. */
  312. sam_internal_data.instance_id--;
  313. error = CS_ERR_LIBRARY;
  314. goto error_exit;
  315. }
  316. if (pid == 0) {
  317. /*
  318. * Child process
  319. */
  320. close (pipe_fd[0]);
  321. sam_internal_data.parent_fd = pipe_fd[1];
  322. if (instance_id)
  323. *instance_id = sam_internal_data.instance_id;
  324. sam_internal_data.internal_status = SAM_INTERNAL_STATUS_REGISTERED;
  325. goto error_exit;
  326. } else {
  327. /*
  328. * Parent process
  329. */
  330. close (pipe_fd[1]);
  331. action = sam_parent_handler (pipe_fd[0], pid);
  332. close (pipe_fd[0]);
  333. if (action == SAM_PARENT_ACTION_ERROR) {
  334. error = CS_ERR_LIBRARY;
  335. goto error_exit;
  336. }
  337. /*
  338. * We really don't like zombies
  339. */
  340. while (waitpid (pid, &child_status, 0) == -1 && errno == EINTR)
  341. ;
  342. if (action == SAM_PARENT_ACTION_RECOVERY) {
  343. if (sam_internal_data.recovery_policy == SAM_RECOVERY_POLICY_QUIT)
  344. action = SAM_PARENT_ACTION_QUIT;
  345. }
  346. if (action == SAM_PARENT_ACTION_QUIT) {
  347. exit (WEXITSTATUS (child_status));
  348. }
  349. }
  350. }
  351. error_exit:
  352. return (error);
  353. }
  354. static void *hc_callback_thread (void *unused_param)
  355. {
  356. int poll_error;
  357. int status;
  358. ssize_t bytes_readed;
  359. char command;
  360. int time_interval, tmp_time_interval;
  361. int counter;
  362. struct pollfd pfds;
  363. status = 0;
  364. counter = 0;
  365. time_interval = sam_internal_data.time_interval >> 2;
  366. while (1) {
  367. pfds.fd = sam_internal_data.cb_rpipe_fd;
  368. pfds.events = POLLIN;
  369. pfds.revents = 0;
  370. if (status == 1) {
  371. tmp_time_interval = time_interval;
  372. } else {
  373. tmp_time_interval = -1;
  374. }
  375. poll_error = poll (&pfds, 1, tmp_time_interval);
  376. if (poll_error == 0) {
  377. sam_hc_send ();
  378. counter++;
  379. if (counter >= 4) {
  380. if (sam_internal_data.hc_callback () != 0) {
  381. status = 3;
  382. }
  383. counter = 0;
  384. }
  385. }
  386. if (poll_error > 0) {
  387. bytes_readed = read (sam_internal_data.cb_rpipe_fd, &command, 1);
  388. if (bytes_readed > 0) {
  389. if (status == 0 && command == SAM_COMMAND_START)
  390. status = 1;
  391. if (status == 1 && command == SAM_COMMAND_STOP)
  392. status = 0;
  393. }
  394. }
  395. }
  396. /*
  397. * This makes compiler happy, it's same as return (NULL);
  398. */
  399. return (unused_param);
  400. }
  401. cs_error_t sam_hc_callback_register (sam_hc_callback_t cb)
  402. {
  403. cs_error_t error = CS_OK;
  404. pthread_attr_t thread_attr;
  405. int pipe_error;
  406. int pipe_fd[2];
  407. if (sam_internal_data.internal_status != SAM_INTERNAL_STATUS_REGISTERED) {
  408. return (CS_ERR_BAD_HANDLE);
  409. }
  410. if (sam_internal_data.time_interval == 0) {
  411. return (CS_ERR_INVALID_PARAM);
  412. }
  413. if (sam_internal_data.cb_registered) {
  414. sam_internal_data.hc_callback = cb;
  415. return (CS_OK);
  416. }
  417. /*
  418. * We know, this is first registration
  419. */
  420. if (cb == NULL) {
  421. return (CS_ERR_INVALID_PARAM);
  422. }
  423. pipe_error = pipe (pipe_fd);
  424. if (pipe_error != 0) {
  425. /*
  426. * Pipe creation error
  427. */
  428. error = CS_ERR_LIBRARY;
  429. goto error_exit;
  430. }
  431. sam_internal_data.cb_rpipe_fd = pipe_fd[0];
  432. sam_internal_data.cb_wpipe_fd = pipe_fd[1];
  433. /*
  434. * Create thread attributes
  435. */
  436. error = pthread_attr_init (&thread_attr);
  437. if (error != 0) {
  438. error = CS_ERR_LIBRARY;
  439. goto error_close_fd_exit;
  440. }
  441. pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
  442. pthread_attr_setstacksize (&thread_attr, 32768);
  443. /*
  444. * Create thread
  445. */
  446. error = pthread_create (&sam_internal_data.cb_thread, &thread_attr, hc_callback_thread, NULL);
  447. if (error != 0) {
  448. error = CS_ERR_LIBRARY;
  449. goto error_attr_destroy_exit;
  450. }
  451. /*
  452. * Cleanup
  453. */
  454. pthread_attr_destroy(&thread_attr);
  455. sam_internal_data.cb_registered = 1;
  456. sam_internal_data.hc_callback = cb;
  457. return (CS_OK);
  458. error_attr_destroy_exit:
  459. pthread_attr_destroy(&thread_attr);
  460. error_close_fd_exit:
  461. sam_internal_data.cb_rpipe_fd = sam_internal_data.cb_wpipe_fd = 0;
  462. close (pipe_fd[0]);
  463. close (pipe_fd[1]);
  464. error_exit:
  465. return (error);
  466. }