Răsfoiți Sursa

RRP: redundant ring automatic recovery

This patch automatically recovers redundant ring failures.

Please note that this patch introduced rrp_autorecovery_check_timeout
in totem config hence breaks internal ABI. The internal ABI users
of totem.h need to rebuild their binaries.

Signed-off-by: Jiaju Zhang <jjzhang@suse.de>
Signed-off-by: Steven Dake <sdake@redhat.com>
Tested-by: Jan Friesse <jfriesse@redhat.com>
Tested-by: Florian Haas <florian.haas@linbit.com>
Tested-by: Jiaju Zhang <jjzhang@suse.de>
Jiaju Zhang 14 ani în urmă
părinte
comite
5dc33c2824
6 a modificat fișierele cu 226 adăugiri și 41 ștergeri
  1. 7 0
      exec/totemconfig.c
  2. 203 39
      exec/totemrrp.c
  3. 2 1
      exec/totemrrp.h
  4. 5 1
      exec/totemsrp.c
  5. 2 0
      include/corosync/totem/totem.h
  6. 7 0
      man/corosync.conf.5

+ 7 - 0
exec/totemconfig.c

@@ -83,6 +83,7 @@
 #define RRP_PROBLEM_COUNT_TIMEOUT		2000
 #define RRP_PROBLEM_COUNT_TIMEOUT		2000
 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT	10
 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT	10
 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN		5
 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN		5
+#define RRP_AUTORECOVERY_CHECK_TIMEOUT		1000
 
 
 static char error_string_response[512];
 static char error_string_response[512];
 static struct objdb_iface_ver0 *global_objdb;
 static struct objdb_iface_ver0 *global_objdb;
@@ -212,6 +213,8 @@ static void totem_volatile_config_read (
 
 
 	objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
 	objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
 
 
+	objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
+
 	objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
 	objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
 
 
 	objdb_get_int (objdb,object_totem_handle, "max_network_delay", &totem_config->max_network_delay);
 	objdb_get_int (objdb,object_totem_handle, "max_network_delay", &totem_config->max_network_delay);
@@ -682,6 +685,10 @@ int totem_config_validate (
 		goto parse_error;
 		goto parse_error;
 	}
 	}
 
 
+	if (totem_config->rrp_autorecovery_check_timeout == 0) {
+		totem_config->rrp_autorecovery_check_timeout = RRP_AUTORECOVERY_CHECK_TIMEOUT;
+	}
+
 	if (strcmp (totem_config->rrp_mode, "none") == 0) {
 	if (strcmp (totem_config->rrp_mode, "none") == 0) {
 		interface_max = 1;
 		interface_max = 1;
 	}
 	}

+ 203 - 39
exec/totemrrp.c

@@ -159,7 +159,8 @@ struct rrp_algo {
 		unsigned int iface_no);
 		unsigned int iface_no);
 
 
 	void (*ring_reenable) (
 	void (*ring_reenable) (
-		struct totemrrp_instance *instance);
+		struct totemrrp_instance *instance,
+		unsigned int iface_no);
 
 
 	int (*mcast_recv_empty) (
 	int (*mcast_recv_empty) (
 		struct totemrrp_instance *instance);
 		struct totemrrp_instance *instance);
@@ -237,7 +238,13 @@ struct totemrrp_instance {
 
 
 	int processor_count;
 	int processor_count;
 
 
+	int my_nodeid;
+
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
+
+	void *deliver_fn_context[INTERFACE_MAX];
+
+	poll_timer_handle timer_active_test_ring_timeout[INTERFACE_MAX];
 };
 };
 
 
 /*
 /*
@@ -289,7 +296,8 @@ static void none_token_target_set (
 	unsigned int iface_no);
 	unsigned int iface_no);
 
 
 static void none_ring_reenable (
 static void none_ring_reenable (
-	struct totemrrp_instance *instance);
+	struct totemrrp_instance *instance,
+	unsigned int iface_no);
 
 
 static int none_mcast_recv_empty (
 static int none_mcast_recv_empty (
 	struct totemrrp_instance *instance);
 	struct totemrrp_instance *instance);
@@ -356,7 +364,8 @@ static void passive_token_target_set (
 	unsigned int iface_no);
 	unsigned int iface_no);
 
 
 static void passive_ring_reenable (
 static void passive_ring_reenable (
-	struct totemrrp_instance *instance);
+	struct totemrrp_instance *instance,
+	unsigned int iface_no);
 
 
 static int passive_mcast_recv_empty (
 static int passive_mcast_recv_empty (
 	struct totemrrp_instance *instance);
 	struct totemrrp_instance *instance);
@@ -423,7 +432,8 @@ static void active_token_target_set (
 	unsigned int iface_no);
 	unsigned int iface_no);
 
 
 static void active_ring_reenable (
 static void active_ring_reenable (
-	struct totemrrp_instance *instance);
+	struct totemrrp_instance *instance,
+	unsigned int iface_no);
 
 
 static int active_mcast_recv_empty (
 static int active_mcast_recv_empty (
 	struct totemrrp_instance *instance);
 	struct totemrrp_instance *instance);
@@ -450,6 +460,28 @@ static void active_timer_problem_decrementer_start (
 static void active_timer_problem_decrementer_cancel (
 static void active_timer_problem_decrementer_cancel (
 	struct active_instance *active_instance);
 	struct active_instance *active_instance);
 
 
+/*
+ * 0-5 reserved for totemsrp.c
+ */
+#define MESSAGE_TYPE_RING_TEST_ACTIVE		6
+#define MESSAGE_TYPE_RING_TEST_ACTIVATE		7
+
+#define ENDIAN_LOCAL				0xff22
+
+struct message_header {
+	char type;
+	char encapsulated;
+	unsigned short endian_detector;
+	int ring_number;
+	int nodeid_activator;
+} __attribute__((packed));
+
+struct deliver_fn_context {
+	struct totemrrp_instance *instance;
+	void *context;
+	int iface_no;
+};
+
 struct rrp_algo none_algo = {
 struct rrp_algo none_algo = {
 	.name			= "none",
 	.name			= "none",
 	.initialize		= NULL,
 	.initialize		= NULL,
@@ -522,6 +554,47 @@ do {									\
 		format, ##args);					\
 		format, ##args);					\
 } while (0);
 } while (0);
 
 
+static void test_active_msg_endian_convert(const struct message_header *in, struct message_header *out)
+{
+	out->type = in->type;
+	out->encapsulated = in->encapsulated;
+	out->endian_detector = ENDIAN_LOCAL;
+	out->ring_number = swab32 (in->ring_number);
+	out->nodeid_activator = swab32(in->nodeid_activator);
+}
+
+static void timer_function_test_ring_timeout (void *context)
+{
+	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
+	struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
+	unsigned int *faulty = NULL;
+	int iface_no = deliver_fn_context->iface_no;
+	struct message_header msg = {
+		.type = MESSAGE_TYPE_RING_TEST_ACTIVE,
+		.endian_detector = ENDIAN_LOCAL,
+	};
+
+	if (strcmp(rrp_instance->totem_config->rrp_mode, "active") == 0)
+		faulty = ((struct active_instance *)(rrp_instance->rrp_algo_instance))->faulty;
+	if (strcmp(rrp_instance->totem_config->rrp_mode, "passive") == 0)
+		faulty = ((struct passive_instance *)(rrp_instance->rrp_algo_instance))->faulty;
+
+	assert (faulty != NULL);
+
+	if (faulty[iface_no] == 1) {
+		msg.ring_number = iface_no;
+		msg.nodeid_activator = rrp_instance->my_nodeid;
+		totemnet_token_send (
+			rrp_instance->net_handles[iface_no],
+			&msg, sizeof (struct message_header));
+		poll_timer_add (rrp_instance->poll_handle,
+			rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+			(void *)deliver_fn_context,
+			timer_function_test_ring_timeout,
+			&rrp_instance->timer_active_test_ring_timeout[iface_no]);
+	}
+}
+
 /*
 /*
  * None Replication Implementation
  * None Replication Implementation
  */
  */
@@ -606,7 +679,8 @@ static void none_token_target_set (
 }
 }
 
 
 static void none_ring_reenable (
 static void none_ring_reenable (
-	struct totemrrp_instance *instance)
+	struct totemrrp_instance *instance,
+	unsigned int iface_no)
 {
 {
 	/*
 	/*
 	 * No operation
 	 * No operation
@@ -797,8 +871,14 @@ static void passive_mcast_recv (
 			(max - passive_instance->mcast_recv_count[i] >
 			(max - passive_instance->mcast_recv_count[i] >
 			rrp_instance->totem_config->rrp_problem_count_threshold)) {
 			rrp_instance->totem_config->rrp_problem_count_threshold)) {
 			passive_instance->faulty[i] = 1;
 			passive_instance->faulty[i] = 1;
+			poll_timer_add (rrp_instance->poll_handle,
+				rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+				rrp_instance->deliver_fn_context[i],
+				timer_function_test_ring_timeout,
+				&rrp_instance->timer_active_test_ring_timeout[i]);
+
 			sprintf (rrp_instance->status[i],
 			sprintf (rrp_instance->status[i],
-				"Marking ringid %u interface %s FAULTY - administrative intervention required.",
+				"Marking ringid %u interface %s FAULTY",
 				i,
 				i,
 				totemnet_iface_print (rrp_instance->net_handles[i]));
 				totemnet_iface_print (rrp_instance->net_handles[i]));
 			log_printf (
 			log_printf (
@@ -880,8 +960,14 @@ static void passive_token_recv (
 			(max - passive_instance->token_recv_count[i] >
 			(max - passive_instance->token_recv_count[i] >
 			rrp_instance->totem_config->rrp_problem_count_threshold)) {
 			rrp_instance->totem_config->rrp_problem_count_threshold)) {
 			passive_instance->faulty[i] = 1;
 			passive_instance->faulty[i] = 1;
+			poll_timer_add (rrp_instance->poll_handle,
+				rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+				rrp_instance->deliver_fn_context[i],
+				timer_function_test_ring_timeout,
+				&rrp_instance->timer_active_test_ring_timeout[i]);
+
 			sprintf (rrp_instance->status[i],
 			sprintf (rrp_instance->status[i],
-				"Marking seqid %d ringid %u interface %s FAULTY - administrative intervention required.",
+				"Marking seqid %d ringid %u interface %s FAULTY",
 				token_seq,
 				token_seq,
 				i,
 				i,
 				totemnet_iface_print (rrp_instance->net_handles[i]));
 				totemnet_iface_print (rrp_instance->net_handles[i]));
@@ -1002,7 +1088,8 @@ static int passive_member_remove (
 
 
 
 
 static void passive_ring_reenable (
 static void passive_ring_reenable (
-	struct totemrrp_instance *instance)
+	struct totemrrp_instance *instance,
+	unsigned int iface_no)
 {
 {
 	struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
 	struct passive_instance *rrp_algo_instance = (struct passive_instance *)instance->rrp_algo_instance;
 
 
@@ -1010,8 +1097,13 @@ static void passive_ring_reenable (
 		instance->interface_count);
 		instance->interface_count);
 	memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) *
 	memset (rrp_algo_instance->token_recv_count, 0, sizeof (unsigned int) *
 		instance->interface_count);
 		instance->interface_count);
-	memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
-		instance->interface_count);
+
+	if (iface_no == instance->interface_count) {
+		memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
+			instance->interface_count);
+	} else {
+		rrp_algo_instance->faulty[iface_no] = 0;
+	}
 }
 }
 
 
 /*
 /*
@@ -1128,8 +1220,14 @@ static void timer_function_active_token_expired (void *context)
 		if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold)
 		if (active_instance->counter_problems[i] >= rrp_instance->totem_config->rrp_problem_count_threshold)
 		{
 		{
 			active_instance->faulty[i] = 1;
 			active_instance->faulty[i] = 1;
+			poll_timer_add (rrp_instance->poll_handle,
+				rrp_instance->totem_config->rrp_autorecovery_check_timeout,
+				rrp_instance->deliver_fn_context[i],
+				timer_function_test_ring_timeout,
+				&rrp_instance->timer_active_test_ring_timeout[i]);
+
 			sprintf (rrp_instance->status[i],
 			sprintf (rrp_instance->status[i],
-				"Marking seqid %d ringid %u interface %s FAULTY - administrative intervention required.",
+				"Marking seqid %d ringid %u interface %s FAULTY",
 				active_instance->last_token_seq,
 				active_instance->last_token_seq,
 				i,
 				i,
 				totemnet_iface_print (rrp_instance->net_handles[i]));
 				totemnet_iface_print (rrp_instance->net_handles[i]));
@@ -1233,7 +1331,7 @@ static void active_mcast_noflush_send (
 }
 }
 
 
 static void active_token_recv (
 static void active_token_recv (
-	struct totemrrp_instance *instance,
+	struct totemrrp_instance *rrp_instance,
 	unsigned int iface_no,
 	unsigned int iface_no,
 	void *context,
 	void *context,
 	const void *msg,
 	const void *msg,
@@ -1241,13 +1339,13 @@ static void active_token_recv (
 	unsigned int token_seq)
 	unsigned int token_seq)
 {
 {
 	int i;
 	int i;
-	struct active_instance *active_instance = (struct active_instance *)instance->rrp_algo_instance;
+	struct active_instance *active_instance = (struct active_instance *)rrp_instance->rrp_algo_instance;
 
 
-	active_instance->totemrrp_context = context; // this should be in totemrrp_instance ?
+	active_instance->totemrrp_context = context;
 	if (token_seq > active_instance->last_token_seq) {
 	if (token_seq > active_instance->last_token_seq) {
 		memcpy (active_instance->token, msg, msg_len);
 		memcpy (active_instance->token, msg, msg_len);
 		active_instance->token_len = msg_len;
 		active_instance->token_len = msg_len;
-		for (i = 0; i < instance->interface_count; i++) {
+		for (i = 0; i < rrp_instance->interface_count; i++) {
 			active_instance->last_token_recv[i] = 0;
 			active_instance->last_token_recv[i] = 0;
 		}
 		}
 
 
@@ -1259,7 +1357,7 @@ static void active_token_recv (
 
 
 	if (token_seq == active_instance->last_token_seq) {
 	if (token_seq == active_instance->last_token_seq) {
 		active_instance->last_token_recv[iface_no] = 1;
 		active_instance->last_token_recv[iface_no] = 1;
-		for (i = 0; i < instance->interface_count; i++) {
+		for (i = 0; i < rrp_instance->interface_count; i++) {
 			if ((active_instance->last_token_recv[i] == 0) &&
 			if ((active_instance->last_token_recv[i] == 0) &&
 				active_instance->faulty[i] == 0) {
 				active_instance->faulty[i] == 0) {
 				return; /* don't deliver token */
 				return; /* don't deliver token */
@@ -1267,7 +1365,7 @@ static void active_token_recv (
 		}
 		}
 		active_timer_expired_token_cancel (active_instance);
 		active_timer_expired_token_cancel (active_instance);
 
 
-		instance->totemrrp_deliver_fn (
+		rrp_instance->totemrrp_deliver_fn (
 			context,
 			context,
 			msg,
 			msg,
 			msg_len);
 			msg_len);
@@ -1383,24 +1481,25 @@ static int active_mcast_recv_empty (
 }
 }
 
 
 static void active_ring_reenable (
 static void active_ring_reenable (
-	struct totemrrp_instance *instance)
+	struct totemrrp_instance *instance,
+	unsigned int iface_no)
 {
 {
 	struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
 	struct active_instance *rrp_algo_instance = (struct active_instance *)instance->rrp_algo_instance;
 
 
-	memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) *
-		instance->interface_count);
-	memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
-		instance->interface_count);
-	memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) *
-		instance->interface_count);
+	if (iface_no == instance->interface_count) {
+		memset (rrp_algo_instance->last_token_recv, 0, sizeof (unsigned int) *
+			instance->interface_count);
+		memset (rrp_algo_instance->faulty, 0, sizeof (unsigned int) *
+			instance->interface_count);
+		memset (rrp_algo_instance->counter_problems, 0, sizeof (unsigned int) *
+			instance->interface_count);
+	} else {
+		rrp_algo_instance->last_token_recv[iface_no] = 0;
+		rrp_algo_instance->faulty[iface_no] = 0;
+		rrp_algo_instance->counter_problems[iface_no] = 0;
+	}
 }
 }
 
 
-struct deliver_fn_context {
-	struct totemrrp_instance *instance;
-	void *context;
-	int iface_no;
-};
-
 static void totemrrp_instance_initialize (struct totemrrp_instance *instance)
 static void totemrrp_instance_initialize (struct totemrrp_instance *instance)
 {
 {
 	memset (instance, 0, sizeof (struct totemrrp_instance));
 	memset (instance, 0, sizeof (struct totemrrp_instance));
@@ -1441,18 +1540,71 @@ void rrp_deliver_fn (
 	unsigned int token_is;
 	unsigned int token_is;
 
 
 	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
 	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
+	struct totemrrp_instance *rrp_instance = deliver_fn_context->instance;
+	const struct message_header *hdr = msg;
+	struct message_header tmp_msg, activate_msg;
 
 
-	deliver_fn_context->instance->totemrrp_token_seqid_get (
+	memset(&tmp_msg, 0, sizeof(struct message_header));
+	memset(&activate_msg, 0, sizeof(struct message_header));
+
+	rrp_instance->totemrrp_token_seqid_get (
 		msg,
 		msg,
 		&token_seqid,
 		&token_seqid,
 		&token_is);
 		&token_is);
 
 
+	if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVE) {
+		log_printf (
+			rrp_instance->totemrrp_log_level_debug,
+			"received message requesting test of ring now active\n");
+
+		if (hdr->endian_detector != ENDIAN_LOCAL) {
+			test_active_msg_endian_convert(hdr, &tmp_msg);
+			hdr = &tmp_msg;
+		}
+
+		if (hdr->nodeid_activator == rrp_instance->my_nodeid) {
+			/*
+			 * Send an activate message
+			 */
+			activate_msg.type = MESSAGE_TYPE_RING_TEST_ACTIVATE;
+			activate_msg.endian_detector = ENDIAN_LOCAL;
+			activate_msg.ring_number = hdr->ring_number;
+			activate_msg.nodeid_activator = rrp_instance->my_nodeid;
+			totemnet_token_send (
+				rrp_instance->net_handles[deliver_fn_context->iface_no],
+				&activate_msg, sizeof (struct message_header));
+		} else {
+			/*
+			 * Send a ring test message
+			 */
+			totemnet_token_send (
+				rrp_instance->net_handles[deliver_fn_context->iface_no],
+				msg, msg_len);
+		}
+	} else 
+	if (hdr->type == MESSAGE_TYPE_RING_TEST_ACTIVATE) {
+		log_printf (
+			rrp_instance->totemrrp_log_level_notice,
+			"Automatically recovered ring %d\n", hdr->ring_number);
+
+		if (hdr->endian_detector != ENDIAN_LOCAL) {
+			test_active_msg_endian_convert(hdr, &tmp_msg);
+			hdr = &tmp_msg;
+		}
+
+		totemrrp_ring_reenable (rrp_instance, deliver_fn_context->iface_no);
+		if (hdr->nodeid_activator != rrp_instance->my_nodeid) {
+			totemnet_token_send (
+				rrp_instance->net_handles[deliver_fn_context->iface_no],
+				msg, msg_len);
+		}
+	} else 
 	if (token_is) {
 	if (token_is) {
 		/*
 		/*
 		 * Deliver to the token receiver for this rrp algorithm
 		 * Deliver to the token receiver for this rrp algorithm
 		 */
 		 */
-		deliver_fn_context->instance->rrp_algo->token_recv (
-			deliver_fn_context->instance,
+		rrp_instance->rrp_algo->token_recv (
+			rrp_instance,
 			deliver_fn_context->iface_no,
 			deliver_fn_context->iface_no,
 			deliver_fn_context->context,
 			deliver_fn_context->context,
 			msg,
 			msg,
@@ -1462,8 +1614,8 @@ void rrp_deliver_fn (
 		/*
 		/*
 		 * Deliver to the mcast receiver for this rrp algorithm
 		 * Deliver to the mcast receiver for this rrp algorithm
 		 */
 		 */
-		deliver_fn_context->instance->rrp_algo->mcast_recv (
-			deliver_fn_context->instance,
+		rrp_instance->rrp_algo->mcast_recv (
+			rrp_instance,
 			deliver_fn_context->iface_no,
 			deliver_fn_context->iface_no,
 			deliver_fn_context->context,
 			deliver_fn_context->context,
 			msg,
 			msg,
@@ -1477,6 +1629,7 @@ void rrp_iface_change_fn (
 {
 {
 	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
 	struct deliver_fn_context *deliver_fn_context = (struct deliver_fn_context *)context;
 
 
+	deliver_fn_context->instance->my_nodeid = iface_addr->nodeid;
 	deliver_fn_context->instance->totemrrp_iface_change_fn (
 	deliver_fn_context->instance->totemrrp_iface_change_fn (
 		deliver_fn_context->context,
 		deliver_fn_context->context,
 		iface_addr,
 		iface_addr,
@@ -1597,6 +1750,7 @@ int totemrrp_initialize (
 		deliver_fn_context->instance = instance;
 		deliver_fn_context->instance = instance;
 		deliver_fn_context->context = context;
 		deliver_fn_context->context = context;
 		deliver_fn_context->iface_no = i;
 		deliver_fn_context->iface_no = i;
+		instance->deliver_fn_context[i] = (void *)deliver_fn_context;
 
 
 		totemnet_initialize (
 		totemnet_initialize (
 			poll_handle,
 			poll_handle,
@@ -1746,17 +1900,27 @@ int totemrrp_crypto_set (
 }
 }
 
 
 
 
+/*
+ * iface_no indicates the interface number [0, ..., interface_count-1] of the
+ * specific ring which will be reenabled. We specify iface_no == interface_count
+ * means reenabling all the rings.
+ */
 int totemrrp_ring_reenable (
 int totemrrp_ring_reenable (
-        void *rrp_context)
+        void *rrp_context,
+	unsigned int iface_no)
 {
 {
 	struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
 	struct totemrrp_instance *instance = (struct totemrrp_instance *)rrp_context;
 	int res = 0;
 	int res = 0;
 	unsigned int i;
 	unsigned int i;
 
 
-	instance->rrp_algo->ring_reenable (instance);
+	instance->rrp_algo->ring_reenable (instance, iface_no);
 
 
-	for (i = 0; i < instance->interface_count; i++) {
-		sprintf (instance->status[i], "ring %d active with no faults", i);
+	if (iface_no == instance->interface_count) {
+		for (i = 0; i < instance->interface_count; i++) {
+			sprintf (instance->status[i], "ring %d active with no faults", i);
+		}
+	} else {
+		sprintf (instance->status[iface_no], "ring %d active with no faults", iface_no);
 	}
 	}
 
 
 	return (res);
 	return (res);

+ 2 - 1
exec/totemrrp.h

@@ -129,7 +129,8 @@ extern int totemrrp_crypto_set (
 	unsigned int type);
 	unsigned int type);
 
 
 extern int totemrrp_ring_reenable (
 extern int totemrrp_ring_reenable (
-	void *rrp_context);
+	void *rrp_context,
+	unsigned int iface_no);
 
 
 extern int totemrrp_mcast_recv_empty (
 extern int totemrrp_mcast_recv_empty (
 	void *rrp_context);
 	void *rrp_context);

+ 5 - 1
exec/totemsrp.c

@@ -863,6 +863,9 @@ int totemsrp_initialize (
 	log_printf (instance->totemsrp_log_level_debug,
 	log_printf (instance->totemsrp_log_level_debug,
 		"RRP threshold (%d problem count)\n",
 		"RRP threshold (%d problem count)\n",
 		totem_config->rrp_problem_count_threshold);
 		totem_config->rrp_problem_count_threshold);
+	log_printf (instance->totemsrp_log_level_debug,
+		"RRP automatic recovery check timeout (%d ms)\n",
+		totem_config->rrp_autorecovery_check_timeout);
 	log_printf (instance->totemsrp_log_level_debug,
 	log_printf (instance->totemsrp_log_level_debug,
 		"RRP mode set to %s.\n", instance->totem_config->rrp_mode);
 		"RRP mode set to %s.\n", instance->totem_config->rrp_mode);
 
 
@@ -1054,7 +1057,8 @@ int totemsrp_ring_reenable (
 {
 {
 	struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
 	struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
 
 
-	totemrrp_ring_reenable (instance->totemrrp_context);
+	totemrrp_ring_reenable (instance->totemrrp_context,
+		instance->totem_config->interface_count);
 
 
 	return (0);
 	return (0);
 }
 }

+ 2 - 0
include/corosync/totem/totem.h

@@ -142,6 +142,8 @@ struct totem_config {
 
 
 	unsigned int rrp_problem_count_threshold;
 	unsigned int rrp_problem_count_threshold;
 
 
+	unsigned int rrp_autorecovery_check_timeout;
+
 	char rrp_mode[TOTEM_RRP_MODE_BYTES];
 	char rrp_mode[TOTEM_RRP_MODE_BYTES];
 
 
 	struct totem_logging_configuration totem_logging_configuration;
 	struct totem_logging_configuration totem_logging_configuration;

+ 7 - 0
man/corosync.conf.5

@@ -483,6 +483,13 @@ override this value without guidance from the corosync community.
 
 
 The default is 47 milliseconds.
 The default is 47 milliseconds.
 
 
+.TP
+rrp_autorecovery_check_timeout
+This specifies the time in milliseconds to check if the failed ring can be
+auto-recovered.
+
+The default is 1000 milliseconds.
+
 .PP
 .PP
 Within the
 Within the
 .B logging
 .B logging