Просмотр исходного кода

cpg: Inform clients about left nodes during pause

Patch tries to fix incorrect behaviour during following test-case:
- 3 nodes
- Node 1 is paused
- Node 2 and 3 detects node 1 as failed and informs CPG clients
- Node 1 is unpaused
- Node 1 clients are informed about new membership, but not about Node 1
  being paused, so from Node 1 point-of-view, Node 2 and 3 failure

Solution is to:
- Remove downlist master choose and always choose local node downlist.
  For Node 1 in example above, downlist contains Node 2 and 3.
- Keep code which informs clients about left nodes
- Use joinlist as a authoritative source of nodes/clients which exists
  in membership

This patch doesn't break backwards compatibility.

I've walked thru all the patches which changed behavior of cpg to ensure
patch does not break CPG behavior. Most important were:
- 058f50314cd20abe67f5e8fb3c029a63b0e10cdc - Base. Code was significantly
  changed to handle double free by split group_info into two structures
  cpg_pd (local node clients) and process_info (all clients). Joinlist
  was
- 97c28ea756cdf59316b2f609103122cc678329bd - This patch removed
  confchg_fn and made CPG sync correct
- feff0e8542463773207a3b2c1f6004afba1f58d5 - I've tested described
  behavior without any issues
- 6bbbfcb6b4af72cf35ab9fdb4412fa6c6bdacc12 - Added idea of using
  heuristics to choose same downlist on all nodes. Sadly this idea
  was beginning of the problems described in
  040fda8872a4a20340d73fa1c240b86afb2489f8,
  ac1d79ea7c14997353427e962865781d0836d9fa,
  559d4083ed8355fe83f275e53b9c8f52a91694b2,
  02c5dffa5bb8579c223006fa1587de9ba7409a3d,
  64d0e5ace025cc929e42896c5d6beb3ef75b8244 and
  b55f32fe2e1538db33a1ec584b67744c724328c6
- 02c5dffa5bb8579c223006fa1587de9ba7409a3d - Made joinlist as
  authoritative source of nodes/clients but left downlist_master_choose
  as a source of information about left nodes

Long story made short. This patch basically reverts
idea of using heuristics to choose same downlist on all nodes.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
Jan Friesse 7 лет назад
Родитель
Сommit
9c2a97f4f9
1 измененных файлов с 11 добавлено и 153 удалено
  1. 11 153
      exec/cpg.c

+ 11 - 153
exec/cpg.c

@@ -139,13 +139,6 @@ enum cpg_sync_state {
 	CPGSYNC_JOINLIST
 };
 
-enum cpg_downlist_state_e {
-       CPG_DOWNLIST_NONE,
-       CPG_DOWNLIST_WAITING_FOR_MESSAGES,
-       CPG_DOWNLIST_APPLYING,
-};
-static enum cpg_downlist_state_e downlist_state;
-static struct list_head downlist_messages_head;
 static struct list_head joinlist_messages_head;
 
 struct cpg_pd {
@@ -295,9 +288,7 @@ static int cpg_exec_send_downlist(void);
 
 static int cpg_exec_send_joinlist(void);
 
-static void downlist_messages_delete (void);
-
-static void downlist_master_choose_and_send (void);
+static void downlist_inform_clients (void);
 
 static void joinlist_inform_clients (void);
 
@@ -499,14 +490,6 @@ struct req_exec_cpg_downlist {
 	mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
 };
 
-struct downlist_msg {
-	mar_uint32_t sender_nodeid;
-	mar_uint32_t old_members __attribute__((aligned(8)));
-	mar_uint32_t left_nodes __attribute__((aligned(8)));
-	mar_uint32_t nodeids[PROCESSOR_COUNT_MAX]  __attribute__((aligned(8)));
-	struct list_head list;
-};
-
 struct joinlist_msg {
 	mar_uint32_t sender_nodeid;
 	uint32_t pid;
@@ -566,8 +549,6 @@ static void cpg_sync_init (
 	last_sync_ring_id.nodeid = ring_id->rep.nodeid;
 	last_sync_ring_id.seq = ring_id->seq;
 
-	downlist_state = CPG_DOWNLIST_WAITING_FOR_MESSAGES;
-
 	entries = 0;
 	/*
 	 * Determine list of nodeids for downlist message
@@ -611,14 +592,10 @@ static void cpg_sync_activate (void)
 		my_member_list_entries * sizeof (unsigned int));
 	my_old_member_list_entries = my_member_list_entries;
 
-	if (downlist_state == CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
-		downlist_master_choose_and_send ();
-	}
+	downlist_inform_clients ();
 
 	joinlist_inform_clients ();
 
-	downlist_messages_delete ();
-	downlist_state = CPG_DOWNLIST_NONE;
 	joinlist_messages_delete ();
 
 	notify_lib_totem_membership (NULL, my_member_list_entries, my_member_list);
@@ -626,8 +603,7 @@ static void cpg_sync_activate (void)
 
 static void cpg_sync_abort (void)
 {
-	downlist_state = CPG_DOWNLIST_NONE;
-	downlist_messages_delete ();
+
 	joinlist_messages_delete ();
 }
 
@@ -800,76 +776,17 @@ static int notify_lib_joinlist(
 	return CS_OK;
 }
 
-static void downlist_log(const char *msg, struct downlist_msg* dl)
+static void downlist_log(const char *msg, struct req_exec_cpg_downlist *dl)
 {
 	log_printf (LOG_DEBUG,
-		    "%s: sender %s; members(old:%d left:%d)",
+		    "%s: members(old:%d left:%d)",
 		    msg,
-		    api->totem_ifaces_print(dl->sender_nodeid),
 		    dl->old_members,
 		    dl->left_nodes);
 }
 
-static struct downlist_msg* downlist_master_choose (void)
+static void downlist_inform_clients (void)
 {
-	struct downlist_msg *cmp;
-	struct downlist_msg *best = NULL;
-	struct list_head *iter;
-	uint32_t cmp_members;
-	uint32_t best_members;
-	uint32_t i;
-	int ignore_msg;
-
-	for (iter = downlist_messages_head.next;
-		iter != &downlist_messages_head;
-		iter = iter->next) {
-
-		cmp = list_entry(iter, struct downlist_msg, list);
-		downlist_log("comparing", cmp);
-
-		ignore_msg = 0;
-		for (i = 0; i < cmp->left_nodes; i++) {
-			if (cmp->nodeids[i] == api->totem_nodeid_get()) {
-				log_printf (LOG_DEBUG, "Ignoring this entry because I'm in the left list\n");
-
-				ignore_msg = 1;
-				break;
-			}
-		}
-
-		if (ignore_msg) {
-			continue ;
-		}
-
-		if (best == NULL) {
-			best = cmp;
-			continue;
-		}
-
-		best_members = best->old_members - best->left_nodes;
-		cmp_members = cmp->old_members - cmp->left_nodes;
-
-		if (cmp_members > best_members) {
-			best = cmp;
-		} else if (cmp_members == best_members) {
-			if (cmp->old_members > best->old_members) {
-				best = cmp;
-			} else if (cmp->old_members == best->old_members) {
-				if (cmp->sender_nodeid < best->sender_nodeid) {
-					best = cmp;
-				}
-			}
-		}
-	}
-
-	assert (best != NULL);
-
-	return best;
-}
-
-static void downlist_master_choose_and_send (void)
-{
-	struct downlist_msg *stored_msg;
 	struct list_head *iter;
 	struct process_info *left_pi;
 	qb_map_t *group_map;
@@ -884,14 +801,7 @@ static void downlist_master_choose_and_send (void)
 	qb_map_iter_t *miter;
 	int i, size;
 
-	downlist_state = CPG_DOWNLIST_APPLYING;
-
-	stored_msg = downlist_master_choose ();
-	if (!stored_msg) {
-		log_printf (LOGSYS_LEVEL_DEBUG, "NO chosen downlist");
-		return;
-	}
-	downlist_log("chosen downlist", stored_msg);
+	downlist_log("my downlist", &g_req_exec_cpg_downlist);
 
 	group_map = qb_skiplist_create();
 
@@ -905,9 +815,9 @@ static void downlist_master_choose_and_send (void)
 		iter = iter->next;
 
 		left_pi = NULL;
-		for (i = 0; i < stored_msg->left_nodes; i++) {
+		for (i = 0; i < g_req_exec_cpg_downlist.left_nodes; i++) {
 
-			if (pi->nodeid == stored_msg->nodeids[i]) {
+			if (pi->nodeid == g_req_exec_cpg_downlist.nodeids[i]) {
 				left_pi = pi;
 				break;
 			}
@@ -1039,23 +949,6 @@ static void joinlist_inform_clients (void)
 	joinlist_remove_zombie_pi_entries ();
 }
 
-static void downlist_messages_delete (void)
-{
-	struct downlist_msg *stored_msg;
-	struct list_head *iter, *iter_next;
-
-	for (iter = downlist_messages_head.next;
-		iter != &downlist_messages_head;
-		iter = iter_next) {
-
-		iter_next = iter->next;
-
-		stored_msg = list_entry(iter, struct downlist_msg, list);
-		list_del (&stored_msg->list);
-		free (stored_msg);
-	}
-}
-
 static void joinlist_messages_delete (void)
 {
 	struct joinlist_msg *stored_msg;
@@ -1076,7 +969,6 @@ static void joinlist_messages_delete (void)
 
 static char *cpg_exec_init_fn (struct corosync_api_v1 *corosync_api)
 {
-	list_init (&downlist_messages_head);
 	list_init (&joinlist_messages_head);
 	api = corosync_api;
 	return (NULL);
@@ -1338,43 +1230,9 @@ static void message_handler_req_exec_cpg_downlist(
 	unsigned int nodeid)
 {
 	const struct req_exec_cpg_downlist *req_exec_cpg_downlist = message;
-	int i;
-	struct list_head *iter;
-	struct downlist_msg *stored_msg;
-	int found;
-
-	if (downlist_state != CPG_DOWNLIST_WAITING_FOR_MESSAGES) {
-		log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received in state %d",
-			req_exec_cpg_downlist->left_nodes, downlist_state);
-		return;
-	}
-
-	stored_msg = malloc (sizeof (struct downlist_msg));
-	stored_msg->sender_nodeid = nodeid;
-	stored_msg->old_members = req_exec_cpg_downlist->old_members;
-	stored_msg->left_nodes = req_exec_cpg_downlist->left_nodes;
-	memcpy (stored_msg->nodeids, req_exec_cpg_downlist->nodeids,
-		req_exec_cpg_downlist->left_nodes * sizeof (mar_uint32_t));
-	list_init (&stored_msg->list);
-	list_add (&stored_msg->list, &downlist_messages_head);
-
-	for (i = 0; i < my_member_list_entries; i++) {
-		found = 0;
-		for (iter = downlist_messages_head.next;
-			iter != &downlist_messages_head;
-			iter = iter->next) {
-
-			stored_msg = list_entry(iter, struct downlist_msg, list);
-			if (my_member_list[i] == stored_msg->sender_nodeid) {
-				found = 1;
-			}
-		}
-		if (!found) {
-			return;
-		}
-	}
 
-	downlist_master_choose_and_send ();
+	log_printf (LOGSYS_LEVEL_WARNING, "downlist left_list: %d received",
+			req_exec_cpg_downlist->left_nodes);
 }