Browse Source

Handle delayed multicast packets that occur with switches

Some switches delay multicast packets vs the unicast token.  This patch works
around that problem by providing a new tuneable called miss_count_const.  This
tuneable works by counting the number of times a message is found missing
and once reaching the const value, marks it as missing in the retransmit list.

This improves performance and doesn't display warning messages about missed
multicast messages when operating in these switching environments.

Signed-off-by: Steven Dake <sdake@redhat.com>
Reviewed-by: Angus Salkeld <asalkeld@redhat.com>
Steven Dake 15 years ago
parent
commit
b2205fa86c
5 changed files with 65 additions and 1 deletions
  1. 7 0
      exec/totemconfig.c
  2. 19 1
      exec/totemsrp.c
  3. 24 0
      include/corosync/sq.h
  4. 5 0
      include/corosync/totem/totem.h
  5. 10 0
      man/corosync.conf.5

+ 7 - 0
exec/totemconfig.c

@@ -79,6 +79,7 @@
 #define MAX_NETWORK_DELAY			50
 #define MAX_NETWORK_DELAY			50
 #define WINDOW_SIZE				50
 #define WINDOW_SIZE				50
 #define MAX_MESSAGES				17
 #define MAX_MESSAGES				17
+#define MISS_COUNT_CONST			5
 #define RRP_PROBLEM_COUNT_TIMEOUT		2000
 #define RRP_PROBLEM_COUNT_TIMEOUT		2000
 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT	10
 #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT	10
 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN		5
 #define RRP_PROBLEM_COUNT_THRESHOLD_MIN		5
@@ -219,6 +220,8 @@ static void totem_volatile_config_read (
 	objdb_get_string (objdb, object_totem_handle, "vsftype", &totem_config->vsf_type);
 	objdb_get_string (objdb, object_totem_handle, "vsftype", &totem_config->vsf_type);
 
 
 	objdb_get_int (objdb,object_totem_handle, "max_messages", &totem_config->max_messages);
 	objdb_get_int (objdb,object_totem_handle, "max_messages", &totem_config->max_messages);
+
+	objdb_get_int (objdb,object_totem_handle, "miss_count_const", &totem_config->miss_count_const);
 }
 }
 
 
 
 
@@ -538,6 +541,10 @@ int totem_config_validate (
 		totem_config->max_messages = MAX_MESSAGES;
 		totem_config->max_messages = MAX_MESSAGES;
 	}
 	}
 
 
+	if (totem_config->miss_count_const == 0) {
+		totem_config->miss_count_const = MISS_COUNT_CONST;
+	}
+
 	if (totem_config->token_timeout < MINIMUM_TIMEOUT) {
 	if (totem_config->token_timeout < MINIMUM_TIMEOUT) {
 		snprintf (local_error_reason, sizeof(local_error_reason),
 		snprintf (local_error_reason, sizeof(local_error_reason),
 			"The token timeout parameter (%d ms) may not be less then (%d ms).",
 			"The token timeout parameter (%d ms) may not be less then (%d ms).",

+ 19 - 1
exec/totemsrp.c

@@ -840,6 +840,10 @@ int totemsrp_initialize (
 		"window size per rotation (%d messages) maximum messages per rotation (%d messages)\n",
 		"window size per rotation (%d messages) maximum messages per rotation (%d messages)\n",
 		totem_config->window_size, totem_config->max_messages);
 		totem_config->window_size, totem_config->max_messages);
 
 
+	log_printf (instance->totemsrp_log_level_debug,
+		"missed count const (%d messages)\n",
+		totem_config->miss_count_const);
+
 	log_printf (instance->totemsrp_log_level_debug,
 	log_printf (instance->totemsrp_log_level_debug,
 		"send threads (%d threads)\n", totem_config->threads);
 		"send threads (%d threads)\n", totem_config->threads);
 	log_printf (instance->totemsrp_log_level_debug,
 	log_printf (instance->totemsrp_log_level_debug,
@@ -2435,7 +2439,7 @@ static int orf_token_rtr (
 			strcat (retransmit_msg, value);
 			strcat (retransmit_msg, value);
 		}
 		}
 		strcat (retransmit_msg, "\n");
 		strcat (retransmit_msg, "\n");
-		log_printf (instance->totemsrp_log_level_debug,
+		log_printf (instance->totemsrp_log_level_notice,
 			"%s", retransmit_msg);
 			"%s", retransmit_msg);
 	}
 	}
 
 
@@ -2500,6 +2504,20 @@ static int orf_token_rtr (
 		 */
 		 */
 		res = sq_item_inuse (sort_queue, instance->my_aru + i);
 		res = sq_item_inuse (sort_queue, instance->my_aru + i);
 		if (res == 0) {
 		if (res == 0) {
+			/*
+			 * Determine how many times we have missed receiving
+			 * this sequence number.  sq_item_miss_count increments
+			 * a counter for the sequence number.  The miss count
+			 * will be returned and compared.  This allows time for
+			 * delayed multicast messages to be received before
+			 * declaring the message is missing and requesting a
+			 * retransmit.
+			 */
+			res = sq_item_miss_count (sort_queue, instance->my_aru + i);
+			if (res < instance->totem_config->miss_count_const) {
+				continue;
+			}
+
 			/*
 			/*
 			 * Determine if missing message is already in retransmit list
 			 * Determine if missing message is already in retransmit list
 			 */
 			 */

+ 24 - 0
include/corosync/sq.h

@@ -42,6 +42,7 @@ struct sq {
 	unsigned int size;
 	unsigned int size;
 	void *items;
 	void *items;
 	unsigned int *items_inuse;
 	unsigned int *items_inuse;
+	unsigned int *items_miss_count;
 	unsigned int size_per_item;
 	unsigned int size_per_item;
 	unsigned int head_seqid;
 	unsigned int head_seqid;
 	unsigned int item_count;
 	unsigned int item_count;
@@ -112,7 +113,12 @@ static inline int sq_init (
 	    == NULL) {
 	    == NULL) {
 		return (-ENOMEM);
 		return (-ENOMEM);
 	}
 	}
+	if ((sq->items_miss_count = malloc (item_count * sizeof (unsigned int)))
+	    == NULL) {
+		return (-ENOMEM);
+	}
 	memset (sq->items_inuse, 0, item_count * sizeof (unsigned int));
 	memset (sq->items_inuse, 0, item_count * sizeof (unsigned int));
+	memset (sq->items_miss_count, 0, item_count * sizeof (unsigned int));
 	return (0);
 	return (0);
 }
 }
 
 
@@ -124,6 +130,7 @@ static inline void sq_reinit (struct sq *sq, unsigned int head_seqid)
 
 
 	memset (sq->items, 0, sq->item_count * sq->size_per_item);
 	memset (sq->items, 0, sq->item_count * sq->size_per_item);
 	memset (sq->items_inuse, 0, sq->item_count * sizeof (unsigned int));
 	memset (sq->items_inuse, 0, sq->item_count * sizeof (unsigned int));
+	memset (sq->items_miss_count, 0, sq->item_count * sizeof (unsigned int));
 }
 }
 
 
 static inline void sq_assert (const struct sq *sq, unsigned int pos)
 static inline void sq_assert (const struct sq *sq, unsigned int pos)
@@ -149,11 +156,14 @@ static inline void sq_copy (struct sq *sq_dest, const struct sq *sq_src)
 		sq_src->item_count * sq_src->size_per_item);
 		sq_src->item_count * sq_src->size_per_item);
 	memcpy (sq_dest->items_inuse, sq_src->items_inuse,
 	memcpy (sq_dest->items_inuse, sq_src->items_inuse,
 		sq_src->item_count * sizeof (unsigned int));
 		sq_src->item_count * sizeof (unsigned int));
+	memcpy (sq_dest->items_miss_count, sq_src->items_miss_count,
+		sq_src->item_count * sizeof (unsigned int));
 }
 }
 
 
 static inline void sq_free (struct sq *sq) {
 static inline void sq_free (struct sq *sq) {
 	free (sq->items);
 	free (sq->items);
 	free (sq->items_inuse);
 	free (sq->items_inuse);
+	free (sq->items_miss_count);
 }
 }
 
 
 static inline void *sq_item_add (
 static inline void *sq_item_add (
@@ -178,6 +188,7 @@ static inline void *sq_item_add (
 	} else {
 	} else {
 		sq->items_inuse[sq_position] = seqid;
 		sq->items_inuse[sq_position] = seqid;
 	}
 	}
+	sq->items_miss_count[sq_position] = 0;
 
 
 	return (sq_item);
 	return (sq_item);
 }
 }
@@ -204,6 +215,17 @@ static inline unsigned int sq_item_inuse (
 	return (sq->items_inuse[sq_position] != 0);
 	return (sq->items_inuse[sq_position] != 0);
 }
 }
 
 
+static inline unsigned int sq_item_miss_count (
+	const struct sq *sq,
+	unsigned int seq_id)
+{
+	unsigned int sq_position;
+
+	sq_position = (sq->head - sq->head_seqid + seq_id) % sq->size;
+	sq->items_miss_count[sq_position]++;
+	return (sq->items_miss_count[sq_position]);
+}
+
 static inline unsigned int sq_size_get (
 static inline unsigned int sq_size_get (
 	const struct sq *sq)
 	const struct sq *sq)
 {
 {
@@ -286,6 +308,8 @@ static inline void sq_items_release (struct sq *sq, unsigned int seqid)
 //		printf ("releasing %d for %d\n", oldhead, seqid - sq->head_seqid + 1);
 //		printf ("releasing %d for %d\n", oldhead, seqid - sq->head_seqid + 1);
 		memset (&sq->items_inuse[oldhead], 0,
 		memset (&sq->items_inuse[oldhead], 0,
 			(seqid - sq->head_seqid + 1) * sizeof (unsigned int));
 			(seqid - sq->head_seqid + 1) * sizeof (unsigned int));
+		memset (&sq->items_miss_count[oldhead], 0,
+			(seqid - sq->head_seqid + 1) * sizeof (unsigned int));
 	}
 	}
 	sq->head_seqid = seqid + 1;
 	sq->head_seqid = seqid + 1;
 }
 }

+ 5 - 0
include/corosync/totem/totem.h

@@ -174,6 +174,8 @@ struct totem_config {
 	int crypto_sign_type;
 	int crypto_sign_type;
 
 
 	totem_transport_t transport_number;
 	totem_transport_t transport_number;
+
+	unsigned int miss_count_const;
 };
 };
 
 
 #define TOTEM_CONFIGURATION_TYPE
 #define TOTEM_CONFIGURATION_TYPE
@@ -257,6 +259,9 @@ typedef struct {
 
 
 } totemsrp_stats_t;
 } totemsrp_stats_t;
 
 
+ 
+ #define TOTEM_CONFIGURATION_TYPE
+
 typedef struct {
 typedef struct {
 	totem_stats_header_t hdr;
 	totem_stats_header_t hdr;
 	totemsrp_stats_t *srp;
 	totemsrp_stats_t *srp;

+ 10 - 0
man/corosync.conf.5

@@ -431,6 +431,16 @@ processor on receipt of the token.  The max_messages parameter is limited to
 
 
 The default is 17 messages.
 The default is 17 messages.
 
 
+.TP
+miss_count_const
+This constant defines the maximum number of times on receipt of a token
+a message is checked for retransmission before a retransmission occurs.  This
+parameter is useful to modify for switches that delay multicast packets
+compared to unicast packets.  The default setting works well for nearly all
+modern switches.
+
+The default is 5 messages.
+
 .TP
 .TP
 rrp_problem_count_timeout
 rrp_problem_count_timeout
 This specifies the time in milliseconds to wait before decrementing the
 This specifies the time in milliseconds to wait before decrementing the