qnetd-algo-ffsplit.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742
  1. /*
  2. * Copyright (c) 2015-2016 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. #include <sys/types.h>
  35. #include <string.h>
  36. #include "qnetd-algo-ffsplit.h"
  37. #include "qnetd-log.h"
  38. #include "qnetd-log-debug.h"
  39. #include "qnetd-cluster-list.h"
  40. #include "qnetd-cluster.h"
  41. #include "qnetd-client-send.h"
  42. enum qnetd_algo_ffsplit_cluster_state {
  43. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE,
  44. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP,
  45. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS,
  46. QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS,
  47. };
  48. struct qnetd_algo_ffsplit_cluster_data {
  49. enum qnetd_algo_ffsplit_cluster_state cluster_state;
  50. const struct node_list *quorate_partition_node_list;
  51. };
  52. enum qnetd_algo_ffsplit_client_state {
  53. QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE,
  54. QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK,
  55. QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK,
  56. };
  57. struct qnetd_algo_ffsplit_client_data {
  58. enum qnetd_algo_ffsplit_client_state client_state;
  59. uint32_t vote_info_expected_seq_num;
  60. };
  61. enum tlv_reply_error_code
  62. qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
  63. {
  64. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  65. struct qnetd_algo_ffsplit_client_data *client_data;
  66. if (qnetd_cluster_size(client->cluster) == 1) {
  67. cluster_data = malloc(sizeof(*cluster_data));
  68. if (cluster_data == NULL) {
  69. qnetd_log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
  70. client->addr_str);
  71. return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
  72. }
  73. memset(cluster_data, 0, sizeof(*cluster_data));
  74. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  75. cluster_data->quorate_partition_node_list = NULL;
  76. client->cluster->algorithm_data = cluster_data;
  77. }
  78. client_data = malloc(sizeof(*client_data));
  79. if (client_data == NULL) {
  80. qnetd_log(LOG_ERR, "ffsplit: Can't initialize node data for client %s",
  81. client->addr_str);
  82. return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
  83. }
  84. memset(client_data, 0, sizeof(*client_data));
  85. client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  86. client->algorithm_data = client_data;
  87. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  88. }
  89. static int
  90. qnetd_algo_ffsplit_is_prefered_partition(const struct qnetd_client *client,
  91. const struct node_list *config_node_list, const struct node_list *membership_node_list)
  92. {
  93. uint32_t prefered_node_id;
  94. struct node_list_entry *node_entry;
  95. switch (client->tie_breaker.mode) {
  96. case TLV_TIE_BREAKER_MODE_LOWEST:
  97. node_entry = TAILQ_FIRST(config_node_list);
  98. prefered_node_id = node_entry->node_id;
  99. TAILQ_FOREACH(node_entry, config_node_list, entries) {
  100. if (node_entry->node_id < prefered_node_id) {
  101. prefered_node_id = node_entry->node_id;
  102. }
  103. }
  104. break;
  105. case TLV_TIE_BREAKER_MODE_HIGHEST:
  106. node_entry = TAILQ_FIRST(config_node_list);
  107. prefered_node_id = node_entry->node_id;
  108. TAILQ_FOREACH(node_entry, config_node_list, entries) {
  109. if (node_entry->node_id > prefered_node_id) {
  110. prefered_node_id = node_entry->node_id;
  111. }
  112. }
  113. break;
  114. case TLV_TIE_BREAKER_MODE_NODE_ID:
  115. prefered_node_id = client->tie_breaker.node_id;
  116. break;
  117. }
  118. return (node_list_find_node_id(membership_node_list, prefered_node_id) != NULL);
  119. }
  120. static int
  121. qnetd_algo_ffsplit_is_membership_stable(const struct qnetd_client *client, int client_leaving,
  122. const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
  123. const struct node_list *membership_node_list)
  124. {
  125. const struct qnetd_client *iter_client1, *iter_client2;
  126. const struct node_list *config_node_list1, *config_node_list2;
  127. const struct node_list *membership_node_list1, *membership_node_list2;
  128. const struct node_list_entry *iter_node1, *iter_node2;
  129. const struct node_list_entry *iter_node3, *iter_node4;
  130. const struct tlv_ring_id *ring_id1, *ring_id2;
  131. /*
  132. * Test if all active clients share same config list.
  133. */
  134. TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
  135. TAILQ_FOREACH(iter_client2, &client->cluster->client_list, cluster_entries) {
  136. if (iter_client1 == iter_client2) {
  137. continue;
  138. }
  139. if (iter_client1->node_id == client->node_id) {
  140. if (client_leaving) {
  141. continue;
  142. }
  143. config_node_list1 = config_node_list;
  144. } else {
  145. config_node_list1 = &iter_client1->configuration_node_list;
  146. }
  147. if (iter_client2->node_id == client->node_id) {
  148. if (client_leaving) {
  149. continue;
  150. }
  151. config_node_list2 = config_node_list;
  152. } else {
  153. config_node_list2 = &iter_client2->configuration_node_list;
  154. }
  155. /*
  156. * Walk thru all node ids in given config node list...
  157. */
  158. TAILQ_FOREACH(iter_node1, config_node_list1, entries) {
  159. /*
  160. * ... and try to find given node id in other list
  161. */
  162. iter_node2 = node_list_find_node_id(config_node_list2, iter_node1->node_id);
  163. if (iter_node2 == NULL) {
  164. /*
  165. * Node with iter_node1->node_id was not found in
  166. * config_node_list2 -> lists doesn't match
  167. */
  168. return (0);
  169. }
  170. }
  171. }
  172. }
  173. /*
  174. * Test if same partitions share same ring ids and membership node list
  175. */
  176. TAILQ_FOREACH(iter_client1, &client->cluster->client_list, cluster_entries) {
  177. if (iter_client1->node_id == client->node_id) {
  178. if (client_leaving) {
  179. continue;
  180. }
  181. membership_node_list1 = membership_node_list;
  182. ring_id1 = ring_id;
  183. } else {
  184. membership_node_list1 = &iter_client1->last_membership_node_list;
  185. ring_id1 = &iter_client1->last_ring_id;
  186. }
  187. /*
  188. * Walk thru all memberships nodes
  189. */
  190. TAILQ_FOREACH(iter_node1, membership_node_list1, entries) {
  191. /*
  192. * try to find client with given node id
  193. */
  194. iter_client2 = qnetd_cluster_find_client_by_node_id(client->cluster,
  195. iter_node1->node_id);
  196. if (iter_client2 == NULL) {
  197. /*
  198. * Client with given id is not connected
  199. */
  200. continue;
  201. }
  202. if (iter_client2->node_id == client->node_id) {
  203. if (client_leaving) {
  204. continue;
  205. }
  206. membership_node_list2 = membership_node_list;
  207. ring_id2 = ring_id;
  208. } else {
  209. membership_node_list2 = &iter_client2->last_membership_node_list;
  210. ring_id2 = &iter_client2->last_ring_id;
  211. }
  212. /*
  213. * Compare ring ids
  214. */
  215. if (!tlv_ring_id_eq(ring_id1, ring_id2)) {
  216. return (0);
  217. }
  218. /*
  219. * Now compare that membership node list equals, so walk thru all
  220. * members ...
  221. */
  222. TAILQ_FOREACH(iter_node3, membership_node_list1, entries) {
  223. /*
  224. * ... and try to find given node id in other membership node list
  225. */
  226. iter_node4 = node_list_find_node_id(membership_node_list2, iter_node3->node_id);
  227. if (iter_node4 == NULL) {
  228. /*
  229. * Node with iter_node3->node_id was not found in
  230. * membership_node_list2 -> lists doesn't match
  231. */
  232. return (0);
  233. }
  234. }
  235. }
  236. }
  237. return (1);
  238. }
  239. /*
  240. * 0 - Not quarate
  241. * 1 - Quorate but not fulfilling tie breaker or quorate with more than 50%
  242. * 2 - Quorate and fulfilling tie breaker
  243. */
  244. static unsigned int
  245. qnetd_algo_ffsplit_get_partition_score(const struct qnetd_client *client,
  246. const struct node_list *config_node_list, const struct node_list *membership_node_list)
  247. {
  248. if (node_list_size(config_node_list) % 2 != 0) {
  249. /*
  250. * Odd clusters never split into 50:50.
  251. */
  252. if (node_list_size(membership_node_list) > node_list_size(config_node_list) / 2) {
  253. return (1);
  254. } else {
  255. return (0);
  256. }
  257. } else {
  258. if (node_list_size(membership_node_list) > node_list_size(config_node_list) / 2) {
  259. return (1);
  260. } else if (node_list_size(membership_node_list) < node_list_size(config_node_list) / 2) {
  261. return (0);
  262. } else {
  263. /*
  264. * 50:50 split
  265. */
  266. if (qnetd_algo_ffsplit_is_prefered_partition(client, config_node_list,
  267. membership_node_list)) {
  268. return (2);
  269. } else {
  270. return (1);
  271. }
  272. }
  273. }
  274. qnetd_log(LOG_CRIT, "qnetd_algo_ffsplit_get_partition_score unhandled case");
  275. exit(1);
  276. return (-1);
  277. }
  278. static const struct node_list *
  279. qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
  280. const struct node_list *config_node_list, const struct node_list *membership_node_list)
  281. {
  282. unsigned int highest_score, iter_score;
  283. const struct qnetd_client *iter_client;
  284. const struct node_list *result_node_list;
  285. const struct node_list *iter_config_node_list;
  286. const struct node_list *iter_membership_node_list;
  287. highest_score = 0;
  288. result_node_list = NULL;
  289. /*
  290. * Get highest score
  291. */
  292. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  293. if (iter_client->node_id == client->node_id) {
  294. if (client_leaving) {
  295. continue;
  296. }
  297. iter_config_node_list = config_node_list;
  298. iter_membership_node_list = membership_node_list;
  299. } else {
  300. iter_config_node_list = &iter_client->configuration_node_list;
  301. iter_membership_node_list = &iter_client->last_membership_node_list;
  302. }
  303. if ((iter_score = qnetd_algo_ffsplit_get_partition_score(iter_client, iter_config_node_list,
  304. iter_membership_node_list)) > highest_score) {
  305. highest_score = iter_score;
  306. result_node_list = iter_membership_node_list;
  307. }
  308. }
  309. return (result_node_list);
  310. }
  311. /*
  312. * Update state of all nodes to match quorate_partition_node_list
  313. */
  314. static void
  315. qnetd_algo_ffsplit_update_nodes_state(struct qnetd_client *client, int client_leaving,
  316. const struct node_list *quorate_partition_node_list)
  317. {
  318. const struct qnetd_client *iter_client;
  319. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  320. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  321. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  322. if (iter_client->node_id == client->node_id && client_leaving) {
  323. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  324. continue;
  325. }
  326. if (quorate_partition_node_list == NULL ||
  327. node_list_find_node_id(quorate_partition_node_list, iter_client->node_id) == NULL) {
  328. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK;
  329. } else {
  330. iter_client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK;
  331. }
  332. }
  333. }
  334. /*
  335. * Send vote info. If client_leaving is set, client is ignored. if send_acks
  336. * is set, only ACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state),
  337. * otherwise only NACK votes are send (nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state)
  338. *
  339. * Returns number of send votes
  340. */
  341. static size_t
  342. qnetd_algo_ffsplit_send_votes(struct qnetd_client *client, int client_leaving,
  343. const struct tlv_ring_id *ring_id, int send_acks)
  344. {
  345. size_t sent_votes;
  346. struct qnetd_client *iter_client;
  347. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  348. const struct tlv_ring_id *ring_id_to_send;
  349. enum tlv_vote vote_to_send;
  350. sent_votes = 0;
  351. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  352. if (iter_client->node_id == client->node_id) {
  353. if (client_leaving) {
  354. continue;
  355. }
  356. ring_id_to_send = ring_id;
  357. } else {
  358. ring_id_to_send = &iter_client->last_ring_id;
  359. }
  360. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  361. vote_to_send = TLV_VOTE_UNDEFINED;
  362. if (send_acks) {
  363. if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
  364. vote_to_send = TLV_VOTE_ACK;
  365. }
  366. } else {
  367. if (iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
  368. vote_to_send = TLV_VOTE_NACK;
  369. }
  370. }
  371. if (vote_to_send != TLV_VOTE_UNDEFINED) {
  372. iter_client_data->vote_info_expected_seq_num++;
  373. sent_votes++;
  374. if (qnetd_client_send_vote_info(iter_client,
  375. iter_client_data->vote_info_expected_seq_num, ring_id_to_send,
  376. vote_to_send) == -1) {
  377. client->schedule_disconnect = 1;
  378. }
  379. }
  380. }
  381. return (sent_votes);
  382. }
  383. /*
  384. * Return number of clients in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK state if sending_acks is
  385. * set or number of nodes in QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK state if sending_acks is
  386. * not set
  387. */
  388. static size_t
  389. qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int sending_acks)
  390. {
  391. size_t no_clients;
  392. struct qnetd_client *iter_client;
  393. struct qnetd_algo_ffsplit_client_data *iter_client_data;
  394. no_clients = 0;
  395. TAILQ_FOREACH(iter_client, &client->cluster->client_list, cluster_entries) {
  396. iter_client_data = (struct qnetd_algo_ffsplit_client_data *)iter_client->algorithm_data;
  397. if (sending_acks &&
  398. iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_ACK) {
  399. no_clients++;
  400. }
  401. if (!sending_acks &&
  402. iter_client_data->client_state == QNETD_ALGO_FFSPLIT_CLIENT_STATE_SENDING_NACK) {
  403. no_clients++;
  404. }
  405. }
  406. return (no_clients);
  407. }
  408. static enum tlv_vote
  409. qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
  410. const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
  411. const struct node_list *membership_node_list)
  412. {
  413. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  414. const struct node_list *quorate_partition_node_list;
  415. cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
  416. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_STABLE_MEMBERSHIP;
  417. if (!qnetd_algo_ffsplit_is_membership_stable(client, client_leaving,
  418. ring_id, config_node_list, membership_node_list)) {
  419. /*
  420. * Wait until membership is stable
  421. */
  422. qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
  423. return (TLV_VOTE_WAIT_FOR_REPLY);
  424. }
  425. qnetd_log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
  426. quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
  427. config_node_list, membership_node_list);
  428. cluster_data->quorate_partition_node_list = quorate_partition_node_list;
  429. if (quorate_partition_node_list == NULL) {
  430. qnetd_log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
  431. } else {
  432. qnetd_log(LOG_DEBUG, "ffsplit: Quorate partition selected");
  433. qnetd_log_debug_dump_node_list(client, quorate_partition_node_list);
  434. }
  435. qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
  436. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS;
  437. if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 0) == 0) {
  438. qnetd_log(LOG_DEBUG, "ffsplit: No client gets NACK");
  439. /*
  440. * No one gets nack -> send acks
  441. */
  442. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
  443. if (qnetd_algo_ffsplit_send_votes(client, client_leaving, ring_id, 1) == 0) {
  444. qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
  445. /*
  446. * No one gets acks -> finished
  447. */
  448. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  449. }
  450. }
  451. return (TLV_VOTE_NO_CHANGE);
  452. }
  453. enum tlv_reply_error_code
  454. qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
  455. uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
  456. const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
  457. {
  458. if (node_list_size(nodes) == 0) {
  459. /*
  460. * Empty node list shouldn't happen
  461. */
  462. qnetd_log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
  463. client->addr_str);
  464. return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
  465. }
  466. if (node_list_find_node_id(nodes, client->node_id) == NULL) {
  467. /*
  468. * Current node is not in node list
  469. */
  470. qnetd_log(LOG_ERR, "ffsplit: Received config node list without client %s",
  471. client->addr_str);
  472. return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
  473. }
  474. if (initial || node_list_size(&client->last_membership_node_list) == 0) {
  475. /*
  476. * Initial node list -> membership is going to be send by client
  477. */
  478. *result_vote = TLV_VOTE_ASK_LATER;
  479. } else {
  480. *result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
  481. nodes, &client->last_membership_node_list);
  482. }
  483. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  484. }
  485. /*
  486. * Called after client sent membership node list.
  487. * All client fields are already set. Nodes is actual node list.
  488. * msg_seq_num is 32-bit number set by client. If client sent config file version,
  489. * config_version_set is set to 1 and config_version contains valid config file version.
  490. * ring_id and quorate are copied from client votequorum callback.
  491. *
  492. * Function has to return result_vote. This can be one of ack/nack, ask_later (client
  493. * should ask later for a vote) or wait_for_reply (client should wait for reply).
  494. *
  495. * Return TLV_REPLY_ERROR_CODE_NO_ERROR on success, different TLV_REPLY_ERROR_CODE_*
  496. * on failure (error is send back to client)
  497. */
  498. enum tlv_reply_error_code
  499. qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
  500. uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
  501. const struct node_list *nodes, enum tlv_vote *result_vote)
  502. {
  503. if (node_list_size(nodes) == 0) {
  504. /*
  505. * Empty node list shouldn't happen
  506. */
  507. qnetd_log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
  508. client->addr_str);
  509. return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
  510. }
  511. if (node_list_find_node_id(nodes, client->node_id) == NULL) {
  512. /*
  513. * Current node is not in node list
  514. */
  515. qnetd_log(LOG_ERR, "ffsplit: Received membership node list without client %s",
  516. client->addr_str);
  517. return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
  518. }
  519. if (node_list_size(&client->configuration_node_list) == 0) {
  520. /*
  521. * Config node list not received -> it's going to be sent later
  522. */
  523. *result_vote = TLV_VOTE_ASK_LATER;
  524. } else {
  525. *result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
  526. &client->configuration_node_list, nodes);
  527. }
  528. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  529. }
  530. enum tlv_reply_error_code
  531. qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
  532. uint32_t msg_seq_num, enum tlv_quorate quorate, const struct node_list *nodes,
  533. enum tlv_vote *result_vote)
  534. {
  535. /*
  536. * Quorum node list is informative -> no change
  537. */
  538. *result_vote = TLV_VOTE_NO_CHANGE;
  539. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  540. }
  541. void
  542. qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
  543. {
  544. (void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
  545. &client->configuration_node_list, &client->last_membership_node_list);
  546. free(client->algorithm_data);
  547. if (qnetd_cluster_size(client->cluster) == 1) {
  548. /*
  549. * Last client in the cluster
  550. */
  551. free(client->cluster->algorithm_data);
  552. }
  553. }
  554. enum tlv_reply_error_code
  555. qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t msg_seq_num,
  556. enum tlv_vote *result_vote)
  557. {
  558. /*
  559. * Ask for vote is not supported in current algorithm
  560. */
  561. return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
  562. }
  563. enum tlv_reply_error_code
  564. qnetd_algo_ffsplit_vote_info_reply_received(struct qnetd_client *client, uint32_t msg_seq_num)
  565. {
  566. struct qnetd_algo_ffsplit_cluster_data *cluster_data;
  567. struct qnetd_algo_ffsplit_client_data *client_data;
  568. cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
  569. client_data = (struct qnetd_algo_ffsplit_client_data *)client->algorithm_data;
  570. if (client_data->vote_info_expected_seq_num != msg_seq_num) {
  571. qnetd_log(LOG_DEBUG, "ffsplit: Received old vote info reply from client %s",
  572. client->addr_str);
  573. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  574. }
  575. client_data->client_state = QNETD_ALGO_FFSPLIT_CLIENT_STATE_WAITING_FOR_CHANGE;
  576. if (cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS &&
  577. cluster_data->cluster_state != QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS) {
  578. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  579. }
  580. if (cluster_data->cluster_state == QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_NACKS) {
  581. if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 0) == 0) {
  582. qnetd_log(LOG_DEBUG, "ffsplit: All NACK votes sent for cluster %s",
  583. client->cluster_name);
  584. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_SENDING_ACKS;
  585. if (qnetd_algo_ffsplit_send_votes(client, 0, &client->last_ring_id, 1) == 0) {
  586. qnetd_log(LOG_DEBUG, "ffsplit: No client gets ACK");
  587. /*
  588. * No one gets acks -> finished
  589. */
  590. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  591. }
  592. }
  593. } else {
  594. if (qnetd_algo_ffsplit_no_clients_in_sending_state(client, 1) == 0) {
  595. qnetd_log(LOG_DEBUG, "ffsplit: All ACK votes sent for cluster %s",
  596. client->cluster_name);
  597. cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
  598. }
  599. }
  600. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  601. }
  602. enum tlv_reply_error_code
  603. qnetd_algo_ffsplit_timer_callback(struct qnetd_client *client, int *reschedule_timer,
  604. int *send_vote, enum tlv_vote *result_vote)
  605. {
  606. return (TLV_REPLY_ERROR_CODE_NO_ERROR);
  607. }
  608. static struct qnetd_algorithm qnetd_algo_ffsplit = {
  609. .init = qnetd_algo_ffsplit_client_init,
  610. .config_node_list_received = qnetd_algo_ffsplit_config_node_list_received,
  611. .membership_node_list_received = qnetd_algo_ffsplit_membership_node_list_received,
  612. .quorum_node_list_received = qnetd_algo_ffsplit_quorum_node_list_received,
  613. .client_disconnect = qnetd_algo_ffsplit_client_disconnect,
  614. .ask_for_vote_received = qnetd_algo_ffsplit_ask_for_vote_received,
  615. .vote_info_reply_received = qnetd_algo_ffsplit_vote_info_reply_received,
  616. .timer_callback = qnetd_algo_ffsplit_timer_callback,
  617. };
  618. enum tlv_reply_error_code qnetd_algo_ffsplit_register()
  619. {
  620. return (qnetd_algorithm_register(TLV_DECISION_ALGORITHM_TYPE_FFSPLIT, &qnetd_algo_ffsplit));
  621. }