Просмотр исходного кода

Qdevice: Add initial version of ffsplit

For now it's doing same job as atb.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Jan Friesse 9 лет назад
Родитель
Сommit
486b4754e9
3 измененных файлов с 248 добавлено и 4 удалено
  1. 86 0
      qdevices/qdevice-net-algo-ffsplit.c
  2. 160 4
      qdevices/qnetd-algo-ffsplit.c
  3. 2 0
      qdevices/tlv.h

+ 86 - 0
qdevices/qdevice-net-algo-ffsplit.c

@@ -40,11 +40,81 @@
 #include "qdevice-log.h"
 #include "qdevice-net-send.h"
 #include "qdevice-net-cast-vote-timer.h"
+#include "qdevice-votequorum.h"
+#include "utils.h"
+
+static int
+check_vqinfo_validity(struct qdevice_net_instance *instance)
+{
+	struct qdevice_instance *qdev_instance;
+	struct votequorum_info vq_info;
+	cs_error_t cs_res;
+	struct node_list_entry *node;
+	uint32_t node_id;
+
+	qdev_instance = instance->qdevice_instance_ptr;
+
+	TAILQ_FOREACH(node, &qdev_instance->config_node_list, entries) {
+		node_id = node->node_id;
+
+		cs_res = votequorum_getinfo(qdev_instance->votequorum_handle, node_id, &vq_info);
+
+		if (cs_res == CS_ERR_NOT_EXIST) {
+			continue ;
+		} else if (cs_res != CS_OK) {
+			qdevice_log(LOG_CRIT, "Can't get votequorum information for node "
+			    UTILS_PRI_NODE_ID ". Error %s", node_id, cs_strerror(cs_res));
+
+			return (-1);
+		}
+
+		if (vq_info.node_votes != 1) {
+			qdevice_log(LOG_CRIT, "50:50 split algorithm works only if all nodes have "
+			    "exactly 1 vote. Node " UTILS_PRI_NODE_ID " has %u votes!",
+			    node_id, vq_info.node_votes);
+
+			return (-1);
+		}
+
+		if (vq_info.qdevice_votes != 1) {
+			qdevice_log(LOG_CRIT, "50:50 split algorithm works only if qdevice has "
+			    "exactly 1 vote. Node "UTILS_PRI_NODE_ID" has %u votes!",
+			    node_id, vq_info.qdevice_votes);
+
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
+static int
+check_cmap_validity(struct qdevice_net_instance *instance)
+{
+	struct qdevice_instance *qdev_instance;
+	uint32_t qdevice_votes;
+
+	qdev_instance = instance->qdevice_instance_ptr;
+
+	if (cmap_get_uint32(qdev_instance->cmap_handle, "quorum.device.votes", &qdevice_votes) != CS_OK ||
+	    qdevice_votes != 1) {
+		qdevice_log(LOG_CRIT, "50:50 split algorithm works only if quorum.device.votes"
+		    " configuration key is set to 1!");
+
+		return (-1);
+	}
+
+	return (0);
+}
 
 int
 qdevice_net_algo_ffsplit_init(struct qdevice_net_instance *instance)
 {
 
+	if (check_cmap_validity(instance) != 0 ||
+	    check_vqinfo_validity(instance) != 0) {
+		return (-1);
+	}
 
 	return (0);
 }
@@ -62,6 +132,11 @@ qdevice_net_algo_ffsplit_config_node_list_changed(struct qdevice_net_instance *i
     const struct node_list *nlist, int config_version_set, uint64_t config_version,
     int *send_node_list, enum tlv_vote *vote)
 {
+
+	if (check_vqinfo_validity(instance) != 0) {
+		return (-1);
+	}
+
 	return (0);
 }
 
@@ -88,6 +163,10 @@ qdevice_net_algo_ffsplit_votequorum_expected_votes_notify(struct qdevice_net_ins
     uint32_t expected_votes, enum tlv_vote *vote)
 {
 
+	if (check_vqinfo_validity(instance) != 0) {
+		return (-1);
+	}
+
 	return (0);
 }
 
@@ -151,6 +230,13 @@ qdevice_net_algo_ffsplit_disconnected(struct qdevice_net_instance *instance,
     enum qdevice_net_disconnect_reason disconnect_reason, int *try_reconnect, enum tlv_vote *vote)
 {
 
+	/*
+	 * We cannot depend on default behavior (until there is no change -> use old vote).
+	 * This could create two quorate clusters (2:2 -> first half get ACK -> first half
+	 * disconnects from qnetd -> second half get ACK -> two quorate clusters)
+	 */
+	*vote = TLV_VOTE_NACK;
+
 	return (0);
 }
 

+ 160 - 4
qdevices/qnetd-algo-ffsplit.c

@@ -38,21 +38,143 @@
 
 #include "qnetd-algo-ffsplit.h"
 #include "qnetd-log.h"
+#include "qnetd-cluster-list.h"
+#include "qnetd-cluster.h"
+
+struct ffsplit_cluster_data {
+	uint8_t leader_set;
+	uint32_t leader_id;
+};
 
 enum tlv_reply_error_code
 qnetd_algo_ffsplit_client_init(struct qnetd_client *client)
 {
+	struct ffsplit_cluster_data *cluster_data;
+
+	if (qnetd_cluster_size(client->cluster) == 1) {
+		cluster_data = malloc(sizeof(struct ffsplit_cluster_data));
+		if (cluster_data == NULL) {
+			qnetd_log(LOG_ERR, "ffsplit: Can't initialize cluster data for client %s",
+			    client->addr_str);
+
+			return (TLV_REPLY_ERROR_CODE_INTERNAL_ERROR);
+		}
+		memset(cluster_data, 0, sizeof(*cluster_data));
+
+		client->cluster->algorithm_data = cluster_data;
+	}
 
 	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 }
 
+static int
+qnetd_algo_ffsplit_is_prefered_partition(struct qnetd_client *client,
+    const struct node_list *config_node_list, const struct node_list *membership_node_list)
+{
+	uint32_t prefered_node_id;
+	struct node_list_entry *node_entry;
+
+	switch (client->tie_breaker.mode) {
+	case TLV_TIE_BREAKER_MODE_LOWEST:
+		node_entry = TAILQ_FIRST(config_node_list);
+
+		prefered_node_id = node_entry->node_id;
+
+		TAILQ_FOREACH(node_entry, config_node_list, entries) {
+			if (node_entry->node_id < prefered_node_id) {
+				prefered_node_id = node_entry->node_id;
+			}
+		}
+		break;
+	case TLV_TIE_BREAKER_MODE_HIGHEST:
+		node_entry = TAILQ_FIRST(config_node_list);
+
+		prefered_node_id = node_entry->node_id;
+
+		TAILQ_FOREACH(node_entry, config_node_list, entries) {
+			if (node_entry->node_id > prefered_node_id) {
+				prefered_node_id = node_entry->node_id;
+			}
+		}
+		break;
+	case TLV_TIE_BREAKER_MODE_NODE_ID:
+		prefered_node_id = client->tie_breaker.node_id;
+		break;
+	}
+
+	return (node_list_find_node_id(membership_node_list, prefered_node_id) != NULL);
+}
+
+static enum tlv_vote
+qnetd_algo_ffsplit_do(struct qnetd_client *client, const struct node_list *config_node_list,
+    const struct node_list *membership_node_list)
+{
+	struct ffplist_cluster_data *cluster_data;
+
+	cluster_data = (struct ffplist_cluster_data *)client->cluster->algorithm_data;
+
+	if (node_list_size(config_node_list) % 2 != 0) {
+		/*
+		 * Odd clusters never split into 50:50.
+		 */
+		if (node_list_size(membership_node_list) > node_list_size(config_node_list) / 2) {
+			return (TLV_VOTE_ACK);
+		} else {
+			return (TLV_VOTE_NACK);
+		}
+	} else {
+		if (node_list_size(membership_node_list) > node_list_size(config_node_list) / 2) {
+			return (TLV_VOTE_ACK);
+		} else if (node_list_size(membership_node_list) < node_list_size(config_node_list) / 2) {
+			return (TLV_VOTE_NACK);
+		} else {
+			/*
+			 * 50:50 split
+			 */
+			if (qnetd_algo_ffsplit_is_prefered_partition(client, config_node_list,
+			    membership_node_list)) {
+				return (TLV_VOTE_ACK);
+			} else {
+				return (TLV_VOTE_NACK);
+			}
+		}
+	}
+}
+
 enum tlv_reply_error_code
 qnetd_algo_ffsplit_config_node_list_received(struct qnetd_client *client,
     uint32_t msg_seq_num, int config_version_set, uint64_t config_version,
     const struct node_list *nodes, int initial, enum tlv_vote *result_vote)
 {
 
-	*result_vote = TLV_VOTE_NO_CHANGE;
+	if (node_list_size(nodes) == 0) {
+		/*
+		 * Empty node list shouldn't happen
+		 */
+		qnetd_log(LOG_ERR, "ffsplit: Received empty config node list for client %s",
+			    client->addr_str);
+
+		return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
+	}
+
+	if (node_list_find_node_id(nodes, client->node_id) == NULL) {
+		/*
+		 * Current node is not in node list
+		 */
+		qnetd_log(LOG_ERR, "ffsplit: Received config node list without client %s",
+			    client->addr_str);
+
+		return (TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST);
+	}
+
+	if (initial || node_list_size(&client->last_membership_node_list) == 0) {
+		/*
+		 * Initial node list -> membership is going to be send by client
+		 */
+		*result_vote = TLV_VOTE_ASK_LATER;
+	} else {
+		*result_vote = qnetd_algo_ffsplit_do(client, nodes, &client->last_membership_node_list);
+	}
 
 	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 }
@@ -77,7 +199,34 @@ qnetd_algo_ffsplit_membership_node_list_received(struct qnetd_client *client,
     const struct node_list *nodes, enum tlv_vote *result_vote)
 {
 
-	*result_vote = TLV_VOTE_ASK_LATER;
+	if (node_list_size(nodes) == 0) {
+		/*
+		 * Empty node list shouldn't happen
+		 */
+		qnetd_log(LOG_ERR, "ffsplit: Received empty membership node list for client %s",
+			    client->addr_str);
+
+		return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
+	}
+
+	if (node_list_find_node_id(nodes, client->node_id) == NULL) {
+		/*
+		 * Current node is not in node list
+		 */
+		qnetd_log(LOG_ERR, "ffsplit: Received membership node list without client %s",
+			    client->addr_str);
+
+		return (TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST);
+	}
+
+	if (node_list_size(&client->configuration_node_list) == 0) {
+		/*
+		 * Config node list not received -> it's going to be sent later
+		 */
+		*result_vote = TLV_VOTE_ASK_LATER;
+	} else {
+		*result_vote = qnetd_algo_ffsplit_do(client, &client->configuration_node_list, nodes);
+	}
 
 	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
 }
@@ -88,6 +237,9 @@ qnetd_algo_ffsplit_quorum_node_list_received(struct qnetd_client *client,
     enum tlv_vote *result_vote)
 {
 
+	/*
+	 * Quorum node list is informative -> no change
+	 */
 	*result_vote = TLV_VOTE_NO_CHANGE;
 
 	return (TLV_REPLY_ERROR_CODE_NO_ERROR);
@@ -97,6 +249,12 @@ void
 qnetd_algo_ffsplit_client_disconnect(struct qnetd_client *client, int server_going_down)
 {
 
+	if (qnetd_cluster_size(client->cluster) == 1) {
+		/*
+		 * Last client in the cluster
+		 */
+		 free(client->cluster->algorithm_data);
+	}
 }
 
 enum tlv_reply_error_code
@@ -104,8 +262,6 @@ qnetd_algo_ffsplit_ask_for_vote_received(struct qnetd_client *client, uint32_t m
     enum tlv_vote *result_vote)
 {
 
-	*result_vote = TLV_VOTE_ASK_LATER;
-
 	return (TLV_REPLY_ERROR_CODE_UNSUPPORTED_DECISION_ALGORITHM_MESSAGE);
 }
 

+ 2 - 0
qdevices/tlv.h

@@ -94,6 +94,8 @@ enum tlv_reply_error_code {
 	TLV_REPLY_ERROR_CODE_TIE_BREAKER_DIFFERS_FROM_OTHER_NODES = 15,
 	TLV_REPLY_ERROR_CODE_ALGORITHM_DIFFERS_FROM_OTHER_NODES = 16,
 	TLV_REPLY_ERROR_CODE_DUPLICATE_NODE_ID = 17,
+	TLV_REPLY_ERROR_CODE_INVALID_CONFIG_NODE_LIST = 18,
+	TLV_REPLY_ERROR_CODE_INVALID_MEMBERSHIP_NODE_LIST = 19,
 };
 
 enum tlv_decision_algorithm_type {