Parcourir la source

Return back "Totem is unable to form..." message

This patch returns back SUBJ functionality. It rely on fact, that
sendmsg will return error, and if such error is returned for long time,
it's probably because of firewall.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Jan Friesse il y a 13 ans
Parent
commit
b7635ab9f7
11 fichiers modifiés avec 37 ajouts et 2 suppressions
  1. 8 1
      exec/main.c
  2. 3 0
      exec/totemiba.c
  3. 1 0
      exec/totemiba.h
  4. 3 1
      exec/totemnet.c
  5. 1 0
      exec/totemnet.h
  6. 1 0
      exec/totemrrp.c
  7. 8 0
      exec/totemudp.c
  8. 1 0
      exec/totemudp.h
  9. 5 0
      exec/totemudpu.c
  10. 1 0
      exec/totemudpu.h
  11. 5 0
      include/corosync/totem/totem.h

+ 8 - 1
exec/main.c

@@ -491,15 +491,22 @@ static void corosync_totem_stats_updater (void *data)
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
 	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
+	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
+	    stats->mrp->srp->continuous_sendmsg_failures);
+
 	icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
 		stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
 
-	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER) {
+	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+	    stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
 		log_printf (LOGSYS_LEVEL_WARNING,
 			"Totem is unable to form a cluster because of an "
 			"operating system or network fault. The most common "
 			"cause of this message is that the local firewall is "
 			"configured improperly.");
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
+	} else {
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
 	}
 
 	for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {

+ 3 - 0
exec/totemiba.c

@@ -98,6 +98,8 @@ struct totemiba_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	void (*totemiba_iface_change_fn) (
 		void *context,
 		const struct totem_ip_address *iface_address);
@@ -1320,6 +1322,7 @@ int totemiba_initialize (
 	instance->totemiba_iface_change_fn = iface_change_fn;
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
 
 	instance->rrp_context = context;
 

+ 1 - 0
exec/totemiba.h

@@ -48,6 +48,7 @@ extern int totemiba_initialize (
 	qb_loop_t* qb_poll_handle,
 	void **iba_handle,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 

+ 3 - 1
exec/totemnet.c

@@ -55,6 +55,7 @@ struct transport {
 		qb_loop_t *loop_pt,
 		void **transport_instance,
 		struct totem_config *totem_config,
+		totemsrp_stats_t *stats,
 		int interface_no,
 		void *context,
 
@@ -272,6 +273,7 @@ int totemnet_initialize (
 	qb_loop_t *loop_pt,
 	void **net_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -297,7 +299,7 @@ int totemnet_initialize (
 	totemnet_instance_initialize (instance, totem_config);
 
 	res = instance->transport->initialize (loop_pt,
-		&instance->transport_context, totem_config,
+		&instance->transport_context, totem_config, stats,
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 
 	if (res == -1) {

+ 1 - 0
exec/totemnet.h

@@ -58,6 +58,7 @@ extern int totemnet_initialize (
 	qb_loop_t *poll_handle,
 	void **net_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 

+ 1 - 0
exec/totemrrp.c

@@ -1937,6 +1937,7 @@ int totemrrp_initialize (
 			poll_handle,
 			&instance->net_handles[i],
 			totem_config,
+			stats,
 			i,
 			(void *)deliver_fn_context,
 			rrp_deliver_fn,

+ 8 - 0
exec/totemudp.c

@@ -189,6 +189,8 @@ struct totemudp_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 };
 
@@ -387,6 +389,9 @@ static inline void mcast_sendmsg (
 	if (res < 0) {
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 			"sendmsg(mcast) failed (non-critical)");
+		instance->stats->continuous_sendmsg_failures++;
+	} else {
+		instance->stats->continuous_sendmsg_failures = 0;
 	}
 
 	/*
@@ -1097,6 +1102,7 @@ int totemudp_initialize (
 	qb_loop_t *poll_handle,
 	void **udp_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -1122,6 +1128,8 @@ int totemudp_initialize (
 	totemudp_instance_initialize (instance);
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	* Configure logging
 	*/

+ 1 - 0
exec/totemudp.h

@@ -48,6 +48,7 @@ extern int totemudp_initialize (
 	qb_loop_t* poll_handle,
 	void **udp_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 

+ 5 - 0
exec/totemudpu.c

@@ -172,6 +172,8 @@ struct totemudpu_instance {
 
 	struct totem_config *totem_config;
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 
 	int token_socket;
@@ -731,6 +733,7 @@ int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 
@@ -756,6 +759,8 @@ int totemudpu_initialize (
 	totemudpu_instance_initialize (instance);
 
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	* Configure logging
 	*/

+ 1 - 0
exec/totemudpu.h

@@ -48,6 +48,7 @@ extern int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	void *context,
 

+ 5 - 0
include/corosync/totem/totem.h

@@ -56,6 +56,10 @@
  * Maximum number of continuous gather states
  */
 #define MAX_NO_CONT_GATHER	3
+/*
+ * Maximum number of continuous failures get from sendmsg call
+ */
+#define MAX_NO_CONT_SENDMSG_FAILURES	30
 
 struct totem_interface {
 	struct totem_ip_address bindnet;
@@ -252,6 +256,7 @@ typedef struct {
 	uint64_t consensus_timeouts;
 	uint64_t rx_msg_dropped;
 	uint32_t continuous_gather;
+	uint32_t continuous_sendmsg_failures;
 
 	int earliest_token;
 	int latest_token;