Просмотр исходного кода

Return back "Totem is unable to form..." message

This patch returns back SUBJ functionality. It rely on fact, that
sendmsg will return error, and if such error is returned for long time,
it's probably because of firewall.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
(backported to flatiron)
Jan Friesse 13 лет назад
Родитель
Сommit
b343668bbf
13 измененных файлов с 47 добавлено и 6 удалено
  1. 14 5
      exec/main.c
  2. 4 0
      exec/totemiba.c
  3. 1 0
      exec/totemiba.h
  4. 3 1
      exec/totemnet.c
  5. 1 0
      exec/totemnet.h
  6. 2 0
      exec/totemrrp.c
  7. 1 0
      exec/totemrrp.h
  8. 1 0
      exec/totemsrp.c
  9. 8 0
      exec/totemudp.c
  10. 1 0
      exec/totemudp.h
  11. 5 0
      exec/totemudpu.c
  12. 1 0
      exec/totemudpu.h
  13. 5 0
      include/corosync/totem/totem.h

+ 14 - 5
exec/main.c

@@ -640,20 +640,26 @@ static void corosync_totem_stats_updater (void *data)
 	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
 	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
 		"continuous_gather", strlen("continuous_gather"),
 		"continuous_gather", strlen("continuous_gather"),
 		&stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather));
 		&stats->mrp->srp->continuous_gather, sizeof (stats->mrp->srp->continuous_gather));
-
-	firewall_enabled_or_nic_failure = (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
 	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
 	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
-		"firewall_enabled_or_nic_failure", strlen("firewall_enabled_or_nic_failure"),
-		&firewall_enabled_or_nic_failure, sizeof (firewall_enabled_or_nic_failure));
+		"continuous_sendmsg_failures", strlen("continuous_sendmsg_failures"),
+		&stats->mrp->srp->continuous_sendmsg_failures, sizeof (stats->mrp->srp->continuous_sendmsg_failures));
 
 
-	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER) {
+	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+	    stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
 		log_printf (LOGSYS_LEVEL_WARNING,
 		log_printf (LOGSYS_LEVEL_WARNING,
 			"Totem is unable to form a cluster because of an "
 			"Totem is unable to form a cluster because of an "
 			"operating system or network fault. The most common "
 			"operating system or network fault. The most common "
 			"cause of this message is that the local firewall is "
 			"cause of this message is that the local firewall is "
 			"configured improperly.");
 			"configured improperly.");
+		firewall_enabled_or_nic_failure = 1;
+	} else {
+		firewall_enabled_or_nic_failure = 0;
 	}
 	}
 
 
+	objdb->object_key_replace (stats->mrp->srp->hdr.handle,
+		"firewall_enabled_or_nic_failure", strlen("firewall_enabled_or_nic_failure"),
+		&firewall_enabled_or_nic_failure, sizeof (firewall_enabled_or_nic_failure));
+
 	total_mtt_rx_token = 0;
 	total_mtt_rx_token = 0;
 	total_token_holdtime = 0;
 	total_token_holdtime = 0;
 	total_backlog_calc = 0;
 	total_backlog_calc = 0;
@@ -821,6 +827,9 @@ static void corosync_totem_stats_init (void)
 		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
 		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
 			"continuous_gather", &zero_32,
 			"continuous_gather", &zero_32,
 			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
 			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
+		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
+			"continuous_sendmsg_failures", &zero_32,
+			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
 		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
 		objdb->object_key_create_typed (stats->mrp->srp->hdr.handle,
 			"firewall_enabled_or_nic_failure", &zero_32,
 			"firewall_enabled_or_nic_failure", &zero_32,
 			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);
 			sizeof (zero_32), OBJDB_VALUETYPE_UINT32);

+ 4 - 0
exec/totemiba.c

@@ -97,6 +97,8 @@ struct totemiba_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	void (*totemiba_iface_change_fn) (
 	void (*totemiba_iface_change_fn) (
 		void *context,
 		void *context,
 		const struct totem_ip_address *iface_address);
 		const struct totem_ip_address *iface_address);
@@ -1267,6 +1269,7 @@ int totemiba_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **iba_context,
 	void **iba_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -1305,6 +1308,7 @@ int totemiba_initialize (
 	instance->totemiba_iface_change_fn = iface_change_fn;
 	instance->totemiba_iface_change_fn = iface_change_fn;
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
 
 
 	instance->rrp_context = context;
 	instance->rrp_context = context;
 
 

+ 1 - 0
exec/totemiba.h

@@ -47,6 +47,7 @@ extern int totemiba_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **iba_handle,
 	void **iba_handle,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 3 - 1
exec/totemnet.c

@@ -52,6 +52,7 @@ struct transport {
 		hdb_handle_t poll_handle,
 		hdb_handle_t poll_handle,
 		void **transport_instance,
 		void **transport_instance,
 		struct totem_config *totem_config,
 		struct totem_config *totem_config,
+		totemsrp_stats_t *stats,
 		int interface_no,
 		int interface_no,
 		void *context,
 		void *context,
 
 
@@ -256,6 +257,7 @@ int totemnet_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **net_context,
 	void **net_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -281,7 +283,7 @@ int totemnet_initialize (
 	totemnet_instance_initialize (instance, totem_config);
 	totemnet_instance_initialize (instance, totem_config);
 
 
 	res = instance->transport->initialize (poll_handle,
 	res = instance->transport->initialize (poll_handle,
-		&instance->transport_context, totem_config,
+		&instance->transport_context, totem_config, stats,
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 
 
 	if (res == -1) {
 	if (res == -1) {

+ 1 - 0
exec/totemnet.h

@@ -54,6 +54,7 @@ extern int totemnet_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **net_context,
 	void **net_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 2 - 0
exec/totemrrp.c

@@ -1798,6 +1798,7 @@ int totemrrp_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **rrp_context,
 	void **rrp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	void *context,
 	void *context,
 
 
 	void (*deliver_fn) (
 	void (*deliver_fn) (
@@ -1886,6 +1887,7 @@ int totemrrp_initialize (
 			poll_handle,
 			poll_handle,
 			&instance->net_handles[i],
 			&instance->net_handles[i],
 			totem_config,
 			totem_config,
+			stats,
 			i,
 			i,
 			(void *)deliver_fn_context,
 			(void *)deliver_fn_context,
 			rrp_deliver_fn,
 			rrp_deliver_fn,

+ 1 - 0
exec/totemrrp.h

@@ -55,6 +55,7 @@ extern int totemrrp_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **rrp_context,
 	void **rrp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	void *context,
 	void *context,
 
 
 	void (*deliver_fn) (
 	void (*deliver_fn) (

+ 1 - 0
exec/totemsrp.c

@@ -930,6 +930,7 @@ int totemsrp_initialize (
 		poll_handle,
 		poll_handle,
 		&instance->totemrrp_context,
 		&instance->totemrrp_context,
 		totem_config,
 		totem_config,
+		stats->srp,
 		instance,
 		instance,
 		main_deliver_fn,
 		main_deliver_fn,
 		main_iface_change_fn,
 		main_iface_change_fn,

+ 8 - 0
exec/totemudp.c

@@ -215,6 +215,8 @@ struct totemudp_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 	struct totem_ip_address token_target;
 };
 };
 
 
@@ -1067,6 +1069,9 @@ static inline void mcast_sendmsg (
 	if (res < 0) {
 	if (res < 0) {
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 			"sendmsg(mcast) failed (non-critical)");
 			"sendmsg(mcast) failed (non-critical)");
+		instance->stats->continuous_sendmsg_failures++;
+	} else {
+		instance->stats->continuous_sendmsg_failures = 0;
 	}
 	}
 
 
 	/*
 	/*
@@ -1861,6 +1866,7 @@ int totemudp_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **udp_context,
 	void **udp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -1886,6 +1892,8 @@ int totemudp_initialize (
 	totemudp_instance_initialize (instance);
 	totemudp_instance_initialize (instance);
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	/*
 	* Configure logging
 	* Configure logging
 	*/
 	*/

+ 1 - 0
exec/totemudp.h

@@ -48,6 +48,7 @@ extern int totemudp_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **udp_context,
 	void **udp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 5 - 0
exec/totemudpu.c

@@ -189,6 +189,8 @@ struct totemudpu_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 	struct totem_ip_address token_target;
 
 
 	int token_socket;
 	int token_socket;
@@ -1413,6 +1415,7 @@ int totemudpu_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **udpu_context,
 	void **udpu_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -1438,6 +1441,8 @@ int totemudpu_initialize (
 	totemudpu_instance_initialize (instance);
 	totemudpu_instance_initialize (instance);
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	/*
 	* Configure logging
 	* Configure logging
 	*/
 	*/

+ 1 - 0
exec/totemudpu.h

@@ -48,6 +48,7 @@ extern int totemudpu_initialize (
 	hdb_handle_t poll_handle,
 	hdb_handle_t poll_handle,
 	void **udpu_context,
 	void **udpu_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 5 - 0
include/corosync/totem/totem.h

@@ -56,6 +56,10 @@
  * Maximum number of continuous gather states
  * Maximum number of continuous gather states
  */
  */
 #define MAX_NO_CONT_GATHER	3
 #define MAX_NO_CONT_GATHER	3
+/*
+ * Maximum number of continuous failures get from sendmsg call
+ */
+#define MAX_NO_CONT_SENDMSG_FAILURES	30
 
 
 struct totem_interface {
 struct totem_interface {
 	struct totem_ip_address bindnet;
 	struct totem_ip_address bindnet;
@@ -262,6 +266,7 @@ typedef struct {
 	uint64_t consensus_timeouts;
 	uint64_t consensus_timeouts;
 	uint64_t rx_msg_dropped;
 	uint64_t rx_msg_dropped;
 	uint32_t continuous_gather;
 	uint32_t continuous_gather;
+	uint32_t continuous_sendmsg_failures;
 
 
 	int earliest_token;
 	int earliest_token;
 	int latest_token;
 	int latest_token;