Browse Source

Display warning when not possible to form cluster

This may typically happen if local firewall is enabled. Patch adds new
item to statistics called continuous_gather where is number of
continuous entered gather state. If this number is bigger then
MAX_NO_CONT_GATHER, warning message is displayed. This is also used on
exiting, so stop of corosync is now possible even with enabled firewall.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Steven Dake <sdake@redhat.com>
Jan Friesse 15 years ago
parent
commit
b9df4424b1
3 changed files with 36 additions and 0 deletions
  1. 15 0
      exec/main.c
  2. 15 0
      exec/totemsrp.c
  3. 6 0
      include/corosync/totem/totem.h

+ 15 - 0
exec/main.c

@@ -198,8 +198,17 @@ void corosync_shutdown_request (void)
 
 static void *corosync_exit_thread_handler (void *arg)
 {
+	totempg_stats_t * stats;
+
 	sem_wait (&corosync_exit_sem);
 
+	stats = api->totem_get_stats();
+	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+	    stats->mrp->srp->operational_entered == 0) {
+		unlink_all_completed ();
+		/* NOTREACHED */
+	}
+
 	corosync_service_unlink_all (api, unlink_all_completed);
 
 	return arg;
@@ -626,6 +635,9 @@ static void corosync_totem_stats_updater (void *data)
 	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
 		"rx_msg_dropped", strlen("rx_msg_dropped"),
 		&stats->mrp->srp->rx_msg_dropped, sizeof (stats->mrp->srp->rx_msg_dropped));
+	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
+		"continuous_gather", strlen("continuous_gather"),
+		&stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather));
 
 	total_mtt_rx_token = 0;
 	total_token_holdtime = 0;
@@ -784,6 +796,9 @@ static void corosync_totem_stats_init (void)
 		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
 			"rx_msg_dropped", &zero_64,
 			sizeof (zero_64), OBJDB_VALUETYPE_UINT64);
+		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
+			"continuous_gather", &zero_32,
+			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
 
 	}
 	/* start stats timer */

+ 15 - 0
exec/totemsrp.c

@@ -502,6 +502,7 @@ struct totemsrp_instance {
 	struct memb_commit_token *commit_token;
 
 	totemsrp_stats_t stats;
+
 	void * token_recv_event_handle;
 	void * token_sent_event_handle;
 	char commit_token_storage[9000];
@@ -1789,6 +1790,8 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance)
 	instance->memb_state = MEMB_STATE_OPERATIONAL;
 
 	instance->stats.operational_entered++;
+	instance->stats.continuous_gather = 0;
+
 	instance->my_received_flg = 1;
 
 	reset_pause_timeout (instance);
@@ -1853,6 +1856,15 @@ static void memb_state_gather_enter (
 
 	instance->memb_state = MEMB_STATE_GATHER;
 	instance->stats.gather_entered++;
+	instance->stats.continuous_gather++;
+
+	if (instance->stats.continuous_gather > MAX_NO_CONT_GATHER) {
+		log_printf (instance->totemsrp_log_level_warning,
+			"Totem is unable to form a cluster because of an "
+			"operating system or network fault. The most common "
+			"cause of this message is that the local firewall is "
+			"configured improperly.\n");
+	}
 
 	return;
 }
@@ -1897,6 +1909,7 @@ static void memb_state_commit_enter (
 	reset_token_timeout (instance); // REVIEWED
 
 	instance->stats.commit_entered++;
+	instance->stats.continuous_gather = 0;
 
 	/*
 	 * reset all flow control variables since we are starting a new ring
@@ -2093,6 +2106,8 @@ originated:
 
 	instance->memb_state = MEMB_STATE_RECOVERY;
 	instance->stats.recovery_entered++;
+	instance->stats.continuous_gather = 0;
+
 	return;
 }
 

+ 6 - 0
include/corosync/totem/totem.h

@@ -52,6 +52,11 @@
 #define SEND_THREADS_MAX	16
 #define INTERFACE_MAX		2
 
+/*
+ * Maximum number of continuous gather states
+ */
+#define MAX_NO_CONT_GATHER	3
+
 struct totem_interface {
 	struct totem_ip_address bindnet;
 	struct totem_ip_address boundto;
@@ -250,6 +255,7 @@ typedef struct {
 	uint64_t recovery_token_lost;
 	uint64_t consensus_timeouts;
 	uint64_t rx_msg_dropped;
+	uint32_t continuous_gather;
 
 	int earliest_token;
 	int latest_token;