qnetd-algo-ffsplit.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. /*
  2. * Copyright (c) 2015-2016 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. #include <sys/types.h>
  35. #include <string.h>
  36. #include "qnetd-algo-ffsplit.h"
  37. #include "qnetd-log.h"
  38. #include "qnetd-log-debug.h"
  39. #include "qnetd-cluster-list.h"
  40. #include "qnetd-cluster.h"
  41. #include "qnetd-client-send.h"
  42. enum qnetd_algo_ffsplit_cluster_state {
  43. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE,
  44. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP,
  45. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS,
  46. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS,
  47. };
  48. struct qnetd_algo_ffsplit_cluster_data {
  49. enum qnetd_algo_ffsplit_cluster_state cluster_state;
  50. const struct node_list *quorate_partition_node_list;
  51. };
  52. enum qnetd_algo_ffsplit_client_state {
  53. QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE,
  54. QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK,
  55. QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK,
  56. };
  57. struct qnetd_algo_ffsplit_client_data {
  58. enum qnetd_algo_ffsplit_client_state client_state;
  59. uint32_t vote_info_expected_seq_num;
  60. };
  61. enum tlv_reply_error_code
  62. qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
  63. {
  64. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  65. struct qnetd_algo_ffsplit_client_data *client_data;
  66. if (qnetd_cluster_size(client->cluster) == 1) {
  67. cluster_data = malloc(sizeof(*cluster_data));
  68. if (cluster_data == NULL) {
  69. qnetd_log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
  70. client->addr_str);
  71. return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
  72. }
  73. memset(cluster_data, 0, sizeof(*cluster_data));
  74. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  75. cluster_data->quorate_partition_node_list = NULL;
  76. client->cluster->algorithm_data = cluster_data;
  77. }
  78. client_data = malloc(sizeof(*client_data));
  79. if (client_data == NULL) {
  80. qnetd_log(LOG_ERR, "ffsplit: Can't initialize node data for client %s",
  81. client->addr_str);
  82. return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
  83. }
  84. memset(client_data, 0, sizeof(*client_data));
  85. client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  86. client->algorithm_data = client_data;
  87. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  88. }
  89. static int
  90. qnetd_algo_ffsplit_is_prefered_partition(const struct qnetd_client *client,
  91. const struct node_list *config_node_list, const struct node_list *membership_node_list)
  92. {
  93. uint32_t prefered_node_id;
  94. struct node_list_entry *node_entry;
  95. switch (client->tie_breaker.mode) {
  96. case TLV_TIE_BREAKER_MODE_LOWEST:
  97. node_entry = TAILQ_FIRST(config_node_list);
  98. prefered_node_id = node_entry->node_id;
  99. TAILQ_FOREACH(node_entry, config_node_list, entries) {
  100. if (node_entry->node_id < prefered_node_id) {
  101. prefered_node_id = node_entry->node_id;
  102. }
  103. }
  104. break;
  105. case TLV_TIE_BREAKER_MODE_HIGHEST:
  106. node_entry = TAILQ_FIRST(config_node_list);
  107. prefered_node_id = node_entry->node_id;
  108. TAILQ_FOREACH(node_entry, config_node_list, entries) {
  109. if (node_entry->node_id > prefered_node_id) {
  110. prefered_node_id = node_entry->node_id;
  111. }
  112. }
  113. break;
  114. case TLV_TIE_BREAKER_MODE_NODE_ID:
  115. prefered_node_id = client->tie_breaker.node_id;
  116. break;
  117. }
  118. return (node_list_find_node_id(membership_node_list, prefered_node_id) != NULL);
  119. }
  120. static int
  121. qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving,
  122. const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
  123. const struct node_list *membership_node_list)
  124. {
  125. const struct qnetd_client *iter_client1, *iter_client2;
  126. const struct node_list *config_node_list1, *config_node_list2;
  127. const struct node_list *membership_node_list1, *membership_node_list2;
  128. const struct node_list_entry *iter_node1, *iter_node2;
  129. const struct node_list_entry *iter_node3, *iter_node4;
  130. const struct tlv_ring_id *ring_id1, *ring_id2;
  131. /*
  132. * Test if all active clients share same config list.
  133. */
  134. TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
  135. TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) {
  136. if (iter_client1 == iter_client2) {
  137. continue;
  138. }
  139. if (iter_client1->node_id == client->node_id) {
  140. if (client_leaving) {
  141. continue;
  142. }
  143. config_node_list1 = config_node_list;
  144. } else {
  145. config_node_list1 = &iter_client1->configuration_node_list;
  146. }
  147. if (iter_client2->node_id == client->node_id) {
  148. if (client_leaving) {
  149. continue;
  150. }
  151. config_node_list2 = config_node_list;
  152. } else {
  153. config_node_list2 = &iter_client2->configuration_node_list;
  154. }
  155. /*
  156. * Walk thru all node ids in given config node list...
  157. */
  158. TAILQ_FOREACH(iter_node1, config_node_list1, entries) {
  159. /*
  160. * ... and try to find given node id in other list
  161. */
  162. iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id);
  163. if (iter_node2 == NULL) {
  164. /*
  165. * Node with iter_node1->node_id was not found in
  166. * config_node_list2 -> lists doesn't match
  167. */
  168. return (0);
  169. }
  170. }
  171. }
  172. }
  173. /*
  174. * Test if same partitions share same ring ids and membership node list
  175. */
  176. TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
  177. if (iter_client1->node_id == client->node_id) {
  178. if (client_leaving) {
  179. continue;
  180. }
  181. membership_node_list1 = membership_node_list;
  182. ring_id1 = ring_id;
  183. } else {
  184. membership_node_list1 = &iter_client1->last_membership_node_list;
  185. ring_id1 = &iter_client1->last_ring_id;
  186. }
  187. /*
  188. * Walk thru all memberships nodes
  189. */
  190. TAILQ_FOREACH(iter_node1, membership_node_list1, entries) {
  191. /*
  192. * try to find client with given node id
  193. */
  194. iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster,
  195. iter_node1->node_id);
  196. if (iter_client2 == NULL) {
  197. /*
  198. * Client with given id is not connected
  199. */
  200. continue;
  201. }
  202. if (iter_client2->node_id == client->node_id) {
  203. if (client_leaving) {
  204. continue;
  205. }
  206. membership_node_list2 = membership_node_list;
  207. ring_id2 = ring_id;
  208. } else {
  209. membership_node_list2 = &iter_client2->last_membership_node_list;
  210. ring_id2 = &iter_client2->last_ring_id;
  211. }
  212. /*
  213. * Compare ring ids
  214. */
  215. if (!tlv_ring_id_eq(ring_id1, ring_id2)) {
  216. return (0);
  217. }
  218. /*
  219. * Now compare that membership node list equals, so walk thru all
  220. * members ...
  221. */
  222. TAILQ_FOREACH(iter_node3, membership_node_list1, entries) {
  223. /*
  224. * ... and try to find given node id in other membership node list
  225. */
  226. iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id);
  227. if (iter_node4 == NULL) {
  228. /*
  229. * Node with iter_node3->node_id was not found in
  230. * membership_node_list2 -> lists doesn't match
  231. */
  232. return (0);
  233. }
  234. }
  235. }
  236. }
  237. return (1);
  238. }
  239. static size_t
  240. qnetd_algo_ffsplit_no_active_clients_in_partition(const struct qnetd_client *client,
  241. const struct node_list *membership_node_list)
  242. {
  243. const struct node_list_entry *iter_node;
  244. const struct qnetd_client *iter_client;
  245. size_t res;
  246. res = 0;
  247. if (client == NULL || membership_node_list == NULL) {
  248. return (0);
  249. }
  250. TAILQ_FOREACH(iter_node, membership_node_list, entries) {
  251. iter_client = qnetd_cluster_find_client_by_node_id(client->cluster,
  252. iter_node->node_id);
  253. if (iter_client != NULL) {
  254. res++;
  255. }
  256. }
  257. return (res);
  258. }
  259. /*
  260. * Compares two partitions. Return 1 if client1, config_node_list1, membership_node_list1 is
  261. * "better" than client2, config_node_list2, membership_node_list2
  262. */
  263. static int
  264. qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
  265. const struct node_list *config_node_list1, const struct node_list *membership_node_list1,
  266. const struct qnetd_client *client2,
  267. const struct node_list *config_node_list2, const struct node_list *membership_node_list2)
  268. {
  269. size_t part1_active_clients, part2_active_clients;
  270. if (node_list_size(config_node_list1) % 2 != 0) {
  271. /*
  272. * Odd clusters never split into 50:50.
  273. */
  274. if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
  275. return (1);
  276. } else {
  277. return (0);
  278. }
  279. } else {
  280. if (node_list_size(membership_node_list1) > node_list_size(config_node_list1) / 2) {
  281. return (1);
  282. } else if (node_list_size(membership_node_list1) < node_list_size(config_node_list1) / 2) {
  283. return (0);
  284. }
  285. /*
  286. * 50:50 split
  287. */
  288. /*
  289. * Check how many active clients are in partitions
  290. */
  291. part1_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition(
  292. client1, membership_node_list1);
  293. part2_active_clients = qnetd_algo_ffsplit_no_active_clients_in_partition(
  294. client2, membership_node_list2);
  295. if (part1_active_clients > part2_active_clients) {
  296. return (1);
  297. } else if (part1_active_clients < part2_active_clients) {
  298. return (0);
  299. }
  300. /*
  301. * Number of active clients in both partitions equals. Use tie-breaker.
  302. */
  303. if (qnetd_algo_ffsplit_is_prefered_partition(client1, config_node_list1,
  304. membership_node_list1)) {
  305. return (1);
  306. } else {
  307. return (0);
  308. }
  309. }
  310. qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_partition_cmp unhandled case");
  311. exit(1);
  312. /* NOTREACHED */
  313. }
  314. /*
  315. * Select best partition for given client->cluster.
  316. * If there is no partition which could become quorate, NULL is returned
  317. */
  318. static const struct node_list *
  319. qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
  320. const struct node_list *config_node_list, const struct node_list *membership_node_list)
  321. {
  322. const struct qnetd_client *iter_client;
  323. const struct qnetd_client *best_client;
  324. const struct node_list *best_config_node_list, *best_membership_node_list;
  325. const struct node_list *iter_config_node_list, *iter_membership_node_list;
  326. best_client = NULL;
  327. best_config_node_list = best_membership_node_list = NULL;
  328. /*
  329. * Get highest score
  330. */
  331. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  332. if (iter_client->node_id == client->node_id) {
  333. if (client_leaving) {
  334. continue;
  335. }
  336. iter_config_node_list = config_node_list;
  337. iter_membership_node_list = membership_node_list;
  338. } else {
  339. iter_config_node_list = &iter_client->configuration_node_list;
  340. iter_membership_node_list = &iter_client->last_membership_node_list;
  341. }
  342. if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
  343. iter_membership_node_list, best_client, best_config_node_list,
  344. best_membership_node_list) > 0) {
  345. best_client = iter_client;
  346. best_config_node_list = iter_config_node_list;
  347. best_membership_node_list = iter_membership_node_list;
  348. }
  349. }
  350. return (best_membership_node_list);
  351. }
  352. /*
  353. * Update state of all nodes to match quorate_partition_node_list
  354. */
  355. static void
  356. qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving,
  357. const struct node_list *quorate_partition_node_list)
  358. {
  359. const struct qnetd_client *iter_client;
  360. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  361. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  362. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  363. if (iter_client->node_id == client->node_id && client_leaving) {
  364. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  365. continue;
  366. }
  367. if (quorate_partition_node_list == NULL ||
  368. node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) {
  369. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK;
  370. } else {
  371. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK;
  372. }
  373. }
  374. }
  375. /*
  376. * Send vote info. If client_leaving is set, client is ignored. if send_acks
  377. * is set, only ACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
  378. * otherwise only NACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
  379. *
  380. * Returns number of send votes
  381. */
  382. static size_t
  383. qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving,
  384. const struct tlv_ring_id *ring_id, int send_acks)
  385. {
  386. size_t sent_votes;
  387. struct qnetd_client *iter_client;
  388. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  389. const struct tlv_ring_id *ring_id_to_send;
  390. enum tlv_vote vote_to_send;
  391. sent_votes = 0;
  392. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  393. if (iter_client->node_id == client->node_id) {
  394. if (client_leaving) {
  395. continue;
  396. }
  397. ring_id_to_send = ring_id;
  398. } else {
  399. ring_id_to_send = &iter_client->last_ring_id;
  400. }
  401. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  402. vote_to_send = TLV_VOTE_UNDEFINED;
  403. if (send_acks) {
  404. if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
  405. vote_to_send = TLV_VOTE_ACK;
  406. }
  407. } else {
  408. if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
  409. vote_to_send = TLV_VOTE_NACK;
  410. }
  411. }
  412. if (vote_to_send != TLV_VOTE_UNDEFINED) {
  413. iter_client_data->vote_info_expected_seq_num++;
  414. sent_votes++;
  415. if (qnetd_client_send_vote_info(iter_client,
  416. iter_client_data->vote_info_expected_seq_num, ring_id_to_send,
  417. vote_to_send) == -1) {
  418. client->schedule_disconnect = 1;
  419. }
  420. }
  421. }
  422. return (sent_votes);
  423. }
  424. /*
  425. * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
  426. * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
  427. * not set
  428. */
  429. static size_t
  430. qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks)
  431. {
  432. size_t no_clients;
  433. struct qnetd_client *iter_client;
  434. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  435. no_clients = 0;
  436. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  437. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  438. if (sending_acks &&
  439. iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
  440. no_clients++;
  441. }
  442. if (!sending_acks &&
  443. iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
  444. no_clients++;
  445. }
  446. }
  447. return (no_clients);
  448. }
  449. static enum tlv_vote
  450. qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
  451. const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
  452. const struct node_list *membership_node_list)
  453. {
  454. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  455. const struct node_list *quorate_partition_node_list;
  456. cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
  457. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP;
  458. if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving,
  459. ring_id, config_node_list, membership_node_list)) {
  460. /*
  461. * Wait until membership is stable
  462. */
  463. qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
  464. return (TLV_VOTE_WAIT_FOR_REPLY);
  465. }
  466. qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
  467. quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
  468. config_node_list, membership_node_list);
  469. cluster_data->quorate_partition_node_list = quorate_partition_node_list;
  470. if (quorate_partition_node_list == NULL) {
  471. qnetd_log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
  472. } else {
  473. qnetd_log(LOG_DEBUG, "ffsplit: Quorate partition selected");
  474. qnetd_log_debug_dump_node_list(client, quorate_partition_node_list);
  475. }
  476. qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
  477. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS;
  478. if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) {
  479. qnetd_log(LOG_DEBUG, "ffsplit: No client gets NACK");
  480. /*
  481. * No one gets nack -> send acks
  482. */
  483. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
  484. if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) {
  485. qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
  486. /*
  487. * No one gets acks -> finished
  488. */
  489. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  490. }
  491. }
  492. return (TLV_VOTE_NO_CHANGE);
  493. }
  494. enum tlv_reply_error_code
  495. qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
  496. uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
  497. const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
  498. {
  499. if (node_list_size(nodes) == 0) {
  500. /*
  501. * Empty node list shouldn't happen
  502. */
  503. qnetd_log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
  504. client->addr_str);
  505. return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
  506. }
  507. if (node_list_find_node_id(nodes, client->node_id) == NULL) {
  508. /*
  509. * Current node is not in node list
  510. */
  511. qnetd_log(LOG_ERR, "ffsplit: Received config node list without client %s",
  512. client->addr_str);
  513. return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
  514. }
  515. if (initial || node_list_size(&client->last_membership_node_list) == 0) {
  516. /*
  517. * Initial node list -> membership is going to be send by client
  518. */
  519. *result_vote = TLV_VOTE_ASK_LATER;
  520. } else {
  521. *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
  522. nodes, &client->last_membership_node_list);
  523. }
  524. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  525. }
  526. /*
  527. * Called after client sent membership node list.
  528. * All client fields are already set. Nodes is actual node list.
  529. * msg_seq_num is 32-bit number set by client. If client sent config file version,
  530. * config_version_set is set to 1 and config_version contains valid config file version.
  531. * ring_id and quorate are copied from client votequorum callback.
  532. *
  533. * Function has to return result_vote. This can be one of ack/nack, ask_later (client
  534. * should ask later for a vote) or wait_for_reply (client should wait for reply).
  535. *
  536. * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
  537. * on failure (error is send back to client)
  538. */
  539. enum tlv_reply_error_code
  540. qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
  541. uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
  542. const struct node_list *nodes, enum tlv_vote *result_vote)
  543. {
  544. if (node_list_size(nodes) == 0) {
  545. /*
  546. * Empty node list shouldn't happen
  547. */
  548. qnetd_log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
  549. client->addr_str);
  550. return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
  551. }
  552. if (node_list_find_node_id(nodes, client->node_id) == NULL) {
  553. /*
  554. * Current node is not in node list
  555. */
  556. qnetd_log(LOG_ERR, "ffsplit: Received membership node list without client %s",
  557. client->addr_str);
  558. return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
  559. }
  560. if (node_list_size(&client->configuration_node_list) == 0) {
  561. /*
  562. * Config node list not received -> it's going to be sent later
  563. */
  564. *result_vote = TLV_VOTE_ASK_LATER;
  565. } else {
  566. *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
  567. &client->configuration_node_list, nodes);
  568. }
  569. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  570. }
  571. enum tlv_reply_error_code
  572. qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
  573. uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes,
  574. enum tlv_vote *result_vote)
  575. {
  576. /*
  577. * Quorum node list is informative -> no change
  578. */
  579. *result_vote = TLV_VOTE_NO_CHANGE;
  580. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  581. }
  582. void
  583. qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
  584. {
  585. (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
  586. &client->configuration_node_list, &client->last_membership_node_list);
  587. free(client->algorithm_data);
  588. if (qnetd_cluster_size(client->cluster) == 1) {
  589. /*
  590. * Last client in the cluster
  591. */
  592. free(client->cluster->algorithm_data);
  593. }
  594. }
  595. enum tlv_reply_error_code
  596. qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num,
  597. enum tlv_vote *result_vote)
  598. {
  599. /*
  600. * Ask for vote is not supported in current algorithm
  601. */
  602. return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
  603. }
  604. enum tlv_reply_error_code
  605. qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num)
  606. {
  607. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  608. struct qnetd_algo_ffsplit_client_data *client_data;
  609. cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
  610. client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data;
  611. if (client_data->vote_info_expected_seq_num != msg_seq_num) {
  612. qnetd_log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s",
  613. client->addr_str);
  614. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  615. }
  616. client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  617. if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS &&
  618. cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) {
  619. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  620. }
  621. if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) {
  622. if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) {
  623. qnetd_log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s",
  624. client->cluster_name);
  625. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
  626. if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) {
  627. qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
  628. /*
  629. * No one gets acks -> finished
  630. */
  631. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  632. }
  633. }
  634. } else {
  635. if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) {
  636. qnetd_log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s",
  637. client->cluster_name);
  638. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  639. }
  640. }
  641. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  642. }
  643. enum tlv_reply_error_code
  644. qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer,
  645. int *send_vote, enum tlv_vote *result_vote)
  646. {
  647. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  648. }
  649. static struct qnetd_algorithm qnetd_algo_ffsplit = {
  650. .init = qnetd_algo_ffsplit_client_init,
  651. .config_node_list_received = qnetd_algo_ffsplit_config_node_list_received,
  652. .membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received,
  653. .quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received,
  654. .client_disconnect = qnetd_algo_ffsplit_client_disconnect,
  655. .ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received,
  656. .vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received,
  657. .timer_callback = qnetd_algo_ffsplit_timer_callback,
  658. };
  659. enum tlv_reply_error_code qnetd_algo_ffsplit_register()
  660. {
  661. return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit));
  662. }