Parcourir la source

qnetd: Add support for keep active partition vote

When tie happens prefer partition with members of
previously active (quorate) partition. This is hard-coded
behavior of LMS algorithm so this setting affects only
FFSplit algorithm. By default it is disabled for backwards
compatibility.

This solves problem with FFSplit when node A (with lowest id) is killed,
node B gets vote and then node A starts up and creates single node
membership and gets vote.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Jan Friesse il y a 5 ans
Parent
commit
7a0201a5c6

+ 5 - 1
man/corosync-qnetd.8

@@ -31,7 +31,7 @@
 .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 .\" * THE POSSIBILITY OF SUCH DAMAGE.
 .\" */
-.TH COROSYNC-QNETD 8 2020-09-15
+.TH COROSYNC-QNETD 8 2020-09-22
 .SH NAME
 corosync-qnetd \- QNet daemon
 .SH SYNOPSIS
@@ -236,6 +236,10 @@ Maximum size of a message received by IPC client. (4096)
 .TP
 .B ipc_max_send_size
 Maximum size of a message sent to an IPC client. (10485760)
+.TP
+.B keep_active_partition_tie_breaker
+When tie happens prefer partition with members of previously active (quorate) partition.
+This is hard-coded behavior of LMS algorithm so this setting affects only FFSplit algorithm. (off)
 .SH SEE ALSO
 .BR corosync-qnetd-tool (8)
 .BR corosync-qnetd-certutil (8)

+ 2 - 0
qdevices/qnet-config.h

@@ -85,6 +85,8 @@ extern "C" {
 #define QNETD_DEFAULT_IPC_MAX_SEND_SIZE			(10*1024*1024)
 #define QNETD_MIN_IPC_RECEIVE_SEND_SIZE			1024
 
+#define QNETD_DEFAULT_KEEP_ACTIVE_PARTITION_TIE_BREAKER	0
+
 #define QNETD_TOOL_PROGRAM_NAME				"corosync-qnetd-tool"
 
 #define QDEVICE_NET_DEFAULT_NSS_DB_DIR			COROSYSCONFDIR "/qdevice/net/nssdb"

+ 9 - 1
qdevices/qnetd-advanced-settings.c

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -75,6 +75,8 @@ qnetd_advanced_settings_init(struct qnetd_advanced_settings *settings)
 	settings->ipc_max_receive_size = QNETD_DEFAULT_IPC_MAX_RECEIVE_SIZE;
 	settings->ipc_max_send_size = QNETD_DEFAULT_IPC_MAX_SEND_SIZE;
 
+	settings->keep_active_partition_tie_breaker = QNETD_DEFAULT_KEEP_ACTIVE_PARTITION_TIE_BREAKER;
+
 	return (0);
 }
 
@@ -195,6 +197,12 @@ qnetd_advanced_settings_set(struct qnetd_advanced_settings *settings,
 		}
 
 		settings->ipc_max_send_size = (size_t)tmpll;
+	} else if (strcasecmp(option, "keep_active_partition_tie_breaker") == 0) {
+		if ((tmpll = utils_parse_bool_str(value)) == -1) {
+			return (-2);
+		}
+
+		settings->keep_active_partition_tie_breaker = (uint8_t)tmpll;
 	} else {
 		return (-1);
 	}

+ 2 - 1
qdevices/qnetd-advanced-settings.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -56,6 +56,7 @@ struct qnetd_advanced_settings {
 	size_t ipc_max_clients;
 	size_t ipc_max_send_size;
 	size_t ipc_max_receive_size;
+	uint8_t keep_active_partition_tie_breaker;
 };
 
 extern int		qnetd_advanced_settings_init(struct qnetd_advanced_settings *settings);

+ 78 - 22
qdevices/qnetd-algo-ffsplit.c

@@ -53,7 +53,7 @@ enum qnetd_algo_ffsplit_cluster_state {
 
 struct qnetd_algo_ffsplit_cluster_data {
 	enum qnetd_algo_ffsplit_cluster_state cluster_state;
-	const struct node_list *quorate_partition_node_list;
+	struct node_list quorate_partition_node_list;
 };
 
 enum qnetd_algo_ffsplit_client_state {
@@ -83,7 +83,7 @@ qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
 		}
 		memset(cluster_data, 0, sizeof(*cluster_data));
 		cluster_data->cluster_state = QNETD_ALGO_FFSPLIT_CLUSTER_STATE_WAITING_FOR_CHANGE;
-		cluster_data->quorate_partition_node_list = NULL;
+		node_list_init(&cluster_data->quorate_partition_node_list);
 
 		client->cluster->algorithm_data = cluster_data;
 	}
@@ -337,12 +337,17 @@ qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
     enum tlv_heuristics heuristics_1,
     const struct qnetd_client *client2,
     const struct node_list *config_node_list2, const struct node_list *membership_node_list2,
-    enum tlv_heuristics heuristics_2)
+    enum tlv_heuristics heuristics_2,
+    const struct node_list *quorate_partition_node_list)
 {
 	size_t part1_active_clients, part2_active_clients;
 	size_t part1_no_heuristics_pass, part2_no_heuristics_pass;
 	size_t part1_no_heuristics_fail, part2_no_heuristics_fail;
 	size_t part1_score, part2_score;
+	/* Result of node_list_find_node_id of client 1 node id in quorate_partition_node_list */
+	struct node_list_entry *qpnl_client1;
+	/* Result of node_list_find_node_id of client 2 node id in quorate_partition_node_list */
+	struct node_list_entry *qpnl_client2;
 
 	int res;
 
@@ -407,6 +412,26 @@ qnetd_algo_ffsplit_partition_cmp(const struct qnetd_client *client1,
 			res = 0; goto exit_res;
 		}
 
+		/*
+		 * Use keep active partition tie-breaker if enabled for both clients
+		 */
+		if (client1->keep_active_partition_tie_breaker &&
+		    client2->keep_active_partition_tie_breaker) {
+			qpnl_client1 = node_list_find_node_id(quorate_partition_node_list, client1->node_id);
+			qpnl_client2 = node_list_find_node_id(quorate_partition_node_list, client2->node_id);
+
+			/*
+			 * Client 1 in quorate partition, client 2 isn't and vice-versa.
+			 * If both either doesn't exist in quorate partion or both exists use
+			 * next tie-breaker
+			 */
+			if (qpnl_client1 != NULL && qpnl_client2 == NULL) {
+				res = 1; goto exit_res;
+			} else if (qpnl_client1 == NULL && qpnl_client2 != NULL) {
+				res = 0; goto exit_res;
+			}
+		}
+
 		/*
 		 * Number of active clients in both partitions equals. Use tie-breaker.
 		 */
@@ -436,7 +461,7 @@ exit_res:
 static const struct node_list *
 qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int client_leaving,
     const struct node_list *config_node_list, const struct node_list *membership_node_list,
-    enum tlv_heuristics client_heuristics)
+    const struct node_list *quorate_partition_node_list, enum tlv_heuristics client_heuristics)
 {
 	const struct qnetd_client *iter_client;
 	const struct qnetd_client *best_client;
@@ -468,7 +493,8 @@ qnetd_algo_ffsplit_select_partition(const struct qnetd_client *client, int clien
 
 		if (qnetd_algo_ffsplit_partition_cmp(iter_client, iter_config_node_list,
 		    iter_membership_node_list, iter_heuristics, best_client, best_config_node_list,
-		    best_membership_node_list, best_heuristics) > 0) {
+		    best_membership_node_list, best_heuristics,
+		    quorate_partition_node_list) > 0) {
 			best_client = iter_client;
 			best_config_node_list = iter_config_node_list;
 			best_membership_node_list = iter_membership_node_list;
@@ -596,10 +622,11 @@ qnetd_algo_ffsplit_no_clients_in_sending_state(struct qnetd_client *client, int
 	return (no_clients);
 }
 
-static enum tlv_vote
+static enum tlv_reply_error_code
 qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
     const struct tlv_ring_id *ring_id, const struct node_list *config_node_list,
-    const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics)
+    const struct node_list *membership_node_list, enum tlv_heuristics client_heuristics,
+    enum tlv_vote *result_vote)
 {
 	struct qnetd_algo_ffsplit_cluster_data *cluster_data;
 	const struct node_list *quorate_partition_node_list;
@@ -614,21 +641,31 @@ qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
 		 * Wait until membership is stable
 		 */
 		log(LOG_DEBUG, "ffsplit: Membership for cluster %s is not yet stable", client->cluster_name);
+		*result_vote = TLV_VOTE_WAIT_FOR_REPLY;
 
-		return (TLV_VOTE_WAIT_FOR_REPLY);
+		return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 	}
 
 	log(LOG_DEBUG, "ffsplit: Membership for cluster %s is now stable", client->cluster_name);
 
 	quorate_partition_node_list = qnetd_algo_ffsplit_select_partition(client, client_leaving,
-	    config_node_list, membership_node_list, client_heuristics);
-	cluster_data->quorate_partition_node_list = quorate_partition_node_list;
+	    config_node_list, membership_node_list, &cluster_data->quorate_partition_node_list,
+	    client_heuristics);
+
+	node_list_free(&cluster_data->quorate_partition_node_list);
 
 	if (quorate_partition_node_list == NULL) {
 		log(LOG_DEBUG, "ffsplit: No quorate partition was selected");
 	} else {
 		log(LOG_DEBUG, "ffsplit: Quorate partition selected");
 		log_common_debug_dump_node_list(quorate_partition_node_list);
+
+		if (node_list_clone(&cluster_data->quorate_partition_node_list,
+		    quorate_partition_node_list) != 0) {
+			log(LOG_ERR, "ffsplit: Can't clone quourate partition node list");
+
+			return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
+		}
 	}
 
 	qnetd_algo_ffsplit_update_nodes_state(client, client_leaving, quorate_partition_node_list);
@@ -651,7 +688,9 @@ qnetd_algo_ffsplit_do(struct qnetd_client *client, int client_leaving,
 		}
 	}
 
-	return (TLV_VOTE_NO_CHANGE);
+	*result_vote = TLV_VOTE_NO_CHANGE;
+
+	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 }
 
 enum tlv_reply_error_code
@@ -659,6 +698,9 @@ qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
     uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
     const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
 {
+	enum tlv_reply_error_code reply_error_code;
+
+	reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
 	if (node_list_size(nodes) == 0) {
 		/*
@@ -686,11 +728,12 @@ qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
 		 */
 		*result_vote = TLV_VOTE_ASK_LATER;
 	} else {
-		*result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
-		    nodes, &client->last_membership_node_list, client->last_heuristics);
+		reply_error_code = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
+		    nodes, &client->last_membership_node_list, client->last_heuristics,
+		    result_vote);
 	}
 
-	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+	return (reply_error_code);
 }
 
 /*
@@ -712,6 +755,9 @@ qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
     uint32_t msg_seq_num, const struct tlv_ring_id *ring_id,
     const struct node_list *nodes, enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
 {
+	enum tlv_reply_error_code reply_error_code;
+
+	reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
 	if (node_list_size(nodes) == 0) {
 		/*
@@ -739,11 +785,12 @@ qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
 		 */
 		*result_vote = TLV_VOTE_ASK_LATER;
 	} else {
-		*result_vote = qnetd_algo_ffsplit_do(client, 0, ring_id,
-		    &client->configuration_node_list, nodes, heuristics);
+		reply_error_code = qnetd_algo_ffsplit_do(client, 0, ring_id,
+		    &client->configuration_node_list, nodes, heuristics,
+		    result_vote);
 	}
 
-	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+	return (reply_error_code);
 }
 
 enum tlv_reply_error_code
@@ -763,11 +810,15 @@ qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
 void
 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
 {
+	enum tlv_vote result_vote;
+	struct qnetd_algo_ffsplit_cluster_data *cluster_data;
+
+	cluster_data = (struct qnetd_algo_ffsplit_cluster_data *)client->cluster->algorithm_data;
 
 	if (!server_going_down) {
 		(void)qnetd_algo_ffsplit_do(client, 1, &client->last_ring_id,
 		    &client->configuration_node_list, &client->last_membership_node_list,
-		    client->last_heuristics);
+		    client->last_heuristics, &result_vote);
 	}
 
 	free(client->algorithm_data);
@@ -776,7 +827,9 @@ qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_goi
 		/*
 		 * Last client in the cluster
 		 */
-		 free(client->cluster->algorithm_data);
+		node_list_free(&cluster_data->quorate_partition_node_list);
+
+		free(client->cluster->algorithm_data);
 	}
 }
 
@@ -845,6 +898,9 @@ enum tlv_reply_error_code
 qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint32_t msg_seq_num,
     enum tlv_heuristics heuristics, enum tlv_vote *result_vote)
 {
+	enum tlv_reply_error_code reply_error_code;
+
+	reply_error_code = TLV_REPLY_ERROR_CODE_NO_ERROR;
 
 	if (node_list_size(&client->configuration_node_list) == 0 ||
 	    node_list_size(&client->last_membership_node_list) == 0) {
@@ -853,12 +909,12 @@ qnetd_algo_ffsplit_heuristics_change_received(struct qnetd_client *client, uint3
 		 */
 		*result_vote = TLV_VOTE_ASK_LATER;
 	} else {
-		*result_vote = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
+		reply_error_code = qnetd_algo_ffsplit_do(client, 0, &client->last_ring_id,
 		    &client->configuration_node_list, &client->last_membership_node_list,
-		    heuristics);
+		    heuristics, result_vote);
 	}
 
-	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
+	return (reply_error_code);
 }
 
 enum tlv_reply_error_code

+ 8 - 0
qdevices/qnetd-client-msg-received.c

@@ -444,6 +444,14 @@ qnetd_client_msg_received_init(struct qnetd_instance *instance, struct qnetd_cli
 		client->decision_algorithm = msg->decision_algorithm;
 	}
 
+	if (reply_error_code == TLV_REPLY_ERROR_CODE_NO_ERROR) {
+		/*
+		 * Preset keep_active_partition_tie_breaker from default config
+		 */
+		client->keep_active_partition_tie_breaker =
+		    instance->advanced_settings->keep_active_partition_tie_breaker;
+	}
+
 	if (reply_error_code == TLV_REPLY_ERROR_CODE_NO_ERROR) {
 		reply_error_code = qnetd_client_msg_received_init_check_new_client(instance,
 		    client);

+ 2 - 1
qdevices/qnetd-client.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017 Red Hat, Inc.
+ * Copyright (c) 2015-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -90,6 +90,7 @@ struct qnetd_client {
 	enum tlv_heuristics last_membership_heuristics; /* Passed in membership node list */
 	enum tlv_heuristics last_regular_heuristics; /* Passed in heuristics change callback */
 	enum tlv_heuristics last_heuristics; /* Latest heuristics both membership and regular */
+	uint8_t keep_active_partition_tie_breaker;
 	TAILQ_ENTRY(qnetd_client) entries;
 	TAILQ_ENTRY(qnetd_client) cluster_entries;
 };