Sfoglia il codice sorgente

Return back "Totem is unable to form..." message

This patch returns back SUBJ functionality. It rely on fact, that
sendmsg will return error, and if such error is returned for long time,
it's probably because of firewall.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Jan Friesse 13 anni fa
parent
commit
b7635ab9f7

+ 8 - 1
exec/main.c

@@ -491,15 +491,22 @@ static void corosync_totem_stats_updater (void *data)
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.consensus_timeouts", stats->mrp->srp->consensus_timeouts);
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
 	icmap_set_uint64("runtime.totem.pg.mrp.srp.rx_msg_dropped", stats->mrp->srp->rx_msg_dropped);
 	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
 	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_gather", stats->mrp->srp->continuous_gather);
+	icmap_set_uint32("runtime.totem.pg.mrp.srp.continuous_sendmsg_failures",
+	    stats->mrp->srp->continuous_sendmsg_failures);
+
 	icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
 	icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure",
 		stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
 		stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ? 1 : 0);
 
 
-	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER) {
+	if (stats->mrp->srp->continuous_gather > MAX_NO_CONT_GATHER ||
+	    stats->mrp->srp->continuous_sendmsg_failures > MAX_NO_CONT_SENDMSG_FAILURES) {
 		log_printf (LOGSYS_LEVEL_WARNING,
 		log_printf (LOGSYS_LEVEL_WARNING,
 			"Totem is unable to form a cluster because of an "
 			"Totem is unable to form a cluster because of an "
 			"operating system or network fault. The most common "
 			"operating system or network fault. The most common "
 			"cause of this message is that the local firewall is "
 			"cause of this message is that the local firewall is "
 			"configured improperly.");
 			"configured improperly.");
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 1);
+	} else {
+		icmap_set_uint8("runtime.totem.pg.mrp.srp.firewall_enabled_or_nic_failure", 0);
 	}
 	}
 
 
 	for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {
 	for (i = 0; i < stats->mrp->srp->rrp->interface_count; i++) {

+ 3 - 0
exec/totemiba.c

@@ -98,6 +98,8 @@ struct totemiba_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	void (*totemiba_iface_change_fn) (
 	void (*totemiba_iface_change_fn) (
 		void *context,
 		void *context,
 		const struct totem_ip_address *iface_address);
 		const struct totem_ip_address *iface_address);
@@ -1320,6 +1322,7 @@ int totemiba_initialize (
 	instance->totemiba_iface_change_fn = iface_change_fn;
 	instance->totemiba_iface_change_fn = iface_change_fn;
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
 
 
 	instance->rrp_context = context;
 	instance->rrp_context = context;
 
 

+ 1 - 0
exec/totemiba.h

@@ -48,6 +48,7 @@ extern int totemiba_initialize (
 	qb_loop_t* qb_poll_handle,
 	qb_loop_t* qb_poll_handle,
 	void **iba_handle,
 	void **iba_handle,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 3 - 1
exec/totemnet.c

@@ -55,6 +55,7 @@ struct transport {
 		qb_loop_t *loop_pt,
 		qb_loop_t *loop_pt,
 		void **transport_instance,
 		void **transport_instance,
 		struct totem_config *totem_config,
 		struct totem_config *totem_config,
+		totemsrp_stats_t *stats,
 		int interface_no,
 		int interface_no,
 		void *context,
 		void *context,
 
 
@@ -272,6 +273,7 @@ int totemnet_initialize (
 	qb_loop_t *loop_pt,
 	qb_loop_t *loop_pt,
 	void **net_context,
 	void **net_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -297,7 +299,7 @@ int totemnet_initialize (
 	totemnet_instance_initialize (instance, totem_config);
 	totemnet_instance_initialize (instance, totem_config);
 
 
 	res = instance->transport->initialize (loop_pt,
 	res = instance->transport->initialize (loop_pt,
-		&instance->transport_context, totem_config,
+		&instance->transport_context, totem_config, stats,
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 		interface_no, context, deliver_fn, iface_change_fn, target_set_completed);
 
 
 	if (res == -1) {
 	if (res == -1) {

+ 1 - 0
exec/totemnet.h

@@ -58,6 +58,7 @@ extern int totemnet_initialize (
 	qb_loop_t *poll_handle,
 	qb_loop_t *poll_handle,
 	void **net_context,
 	void **net_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 1 - 0
exec/totemrrp.c

@@ -1937,6 +1937,7 @@ int totemrrp_initialize (
 			poll_handle,
 			poll_handle,
 			&instance->net_handles[i],
 			&instance->net_handles[i],
 			totem_config,
 			totem_config,
+			stats,
 			i,
 			i,
 			(void *)deliver_fn_context,
 			(void *)deliver_fn_context,
 			rrp_deliver_fn,
 			rrp_deliver_fn,

+ 8 - 0
exec/totemudp.c

@@ -189,6 +189,8 @@ struct totemudp_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 	struct totem_ip_address token_target;
 };
 };
 
 
@@ -387,6 +389,9 @@ static inline void mcast_sendmsg (
 	if (res < 0) {
 	if (res < 0) {
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 		LOGSYS_PERROR (errno, instance->totemudp_log_level_debug,
 			"sendmsg(mcast) failed (non-critical)");
 			"sendmsg(mcast) failed (non-critical)");
+		instance->stats->continuous_sendmsg_failures++;
+	} else {
+		instance->stats->continuous_sendmsg_failures = 0;
 	}
 	}
 
 
 	/*
 	/*
@@ -1097,6 +1102,7 @@ int totemudp_initialize (
 	qb_loop_t *poll_handle,
 	qb_loop_t *poll_handle,
 	void **udp_context,
 	void **udp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -1122,6 +1128,8 @@ int totemudp_initialize (
 	totemudp_instance_initialize (instance);
 	totemudp_instance_initialize (instance);
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	/*
 	* Configure logging
 	* Configure logging
 	*/
 	*/

+ 1 - 0
exec/totemudp.h

@@ -48,6 +48,7 @@ extern int totemudp_initialize (
 	qb_loop_t* poll_handle,
 	qb_loop_t* poll_handle,
 	void **udp_context,
 	void **udp_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 5 - 0
exec/totemudpu.c

@@ -172,6 +172,8 @@ struct totemudpu_instance {
 
 
 	struct totem_config *totem_config;
 	struct totem_config *totem_config;
 
 
+	totemsrp_stats_t *stats;
+
 	struct totem_ip_address token_target;
 	struct totem_ip_address token_target;
 
 
 	int token_socket;
 	int token_socket;
@@ -731,6 +733,7 @@ int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	void **udpu_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 
@@ -756,6 +759,8 @@ int totemudpu_initialize (
 	totemudpu_instance_initialize (instance);
 	totemudpu_instance_initialize (instance);
 
 
 	instance->totem_config = totem_config;
 	instance->totem_config = totem_config;
+	instance->stats = stats;
+
 	/*
 	/*
 	* Configure logging
 	* Configure logging
 	*/
 	*/

+ 1 - 0
exec/totemudpu.h

@@ -48,6 +48,7 @@ extern int totemudpu_initialize (
 	qb_loop_t *poll_handle,
 	qb_loop_t *poll_handle,
 	void **udpu_context,
 	void **udpu_context,
 	struct totem_config *totem_config,
 	struct totem_config *totem_config,
+	totemsrp_stats_t *stats,
 	int interface_no,
 	int interface_no,
 	void *context,
 	void *context,
 
 

+ 5 - 0
include/corosync/totem/totem.h

@@ -56,6 +56,10 @@
  * Maximum number of continuous gather states
  * Maximum number of continuous gather states
  */
  */
 #define MAX_NO_CONT_GATHER	3
 #define MAX_NO_CONT_GATHER	3
+/*
+ * Maximum number of continuous failures get from sendmsg call
+ */
+#define MAX_NO_CONT_SENDMSG_FAILURES	30
 
 
 struct totem_interface {
 struct totem_interface {
 	struct totem_ip_address bindnet;
 	struct totem_ip_address bindnet;
@@ -252,6 +256,7 @@ typedef struct {
 	uint64_t consensus_timeouts;
 	uint64_t consensus_timeouts;
 	uint64_t rx_msg_dropped;
 	uint64_t rx_msg_dropped;
 	uint32_t continuous_gather;
 	uint32_t continuous_gather;
+	uint32_t continuous_sendmsg_failures;
 
 
 	int earliest_token;
 	int earliest_token;
 	int latest_token;
 	int latest_token;