Ver código fonte

totemiba: Add multicast recovery

Totemiba wasn't able to survive SubnetManager handover or
restart. If SM was migrated to another node, corosync logged
"multicast error" and losses connectivity.

Commit should solve this situation.

Signed-off-by: Yevheniy Demchenko <zheka@uvt.cz>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
Yevheniy Demchenko 11 anos atrás
pai
commit
0ea20a3d54
1 arquivos alterados com 54 adições e 8 exclusões
  1. 54 8
      exec/totemiba.c

+ 54 - 8
exec/totemiba.c

@@ -82,6 +82,8 @@
 
 #define MAX_MTU_SIZE 4096
 
+#define MCAST_REJOIN_MSEC	100
+
 struct totemiba_instance {
 	struct sockaddr bind_addr;
 
@@ -206,6 +208,10 @@ struct totemiba_instance {
 	struct list_head token_send_buf_head;
 
 	struct list_head recv_token_recv_buf_head;
+
+	int mcast_seen_joined;
+
+	poll_timer_handle mcast_rejoin;
 };
 union u {
 	uint64_t wr_id;
@@ -517,6 +523,31 @@ static int mcast_cq_recv_event_fn (hdb_handle_t poll_handle, int fd, int events,
 	return (0);
 }
 
+static void mcast_rejoin (void *data)
+{
+	int res;
+	struct totemiba_instance *instance = (struct totemiba_instance *)data;
+
+	res = rdma_leave_multicast (instance->mcast_cma_id, &instance->mcast_addr);
+	if (instance->mcast_ah) {
+		ibv_destroy_ah (instance->mcast_ah);
+		instance->mcast_ah = 0;
+	}
+
+	res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
+	if (res != 0) {
+		log_printf (LOGSYS_LEVEL_DEBUG,
+		    "rdma_join_multicast failed, errno=%d, rejoining in %u ms\n",
+		    MCAST_REJOIN_MSEC,
+		    errno);
+		poll_timer_add (instance->totemiba_poll_handle,
+			MCAST_REJOIN_MSEC,
+			(void *)instance,
+			mcast_rejoin,
+			&instance->mcast_rejoin);
+	}
+}
+
 static int mcast_rdma_event_fn (hdb_handle_t poll_handle, int fd, int events, void *context)
 {
 	struct totemiba_instance *instance = (struct totemiba_instance *)context;
@@ -534,8 +565,16 @@ static int mcast_rdma_event_fn (hdb_handle_t poll_handle, int fd, int events, vo
 	 * occurs when we resolve the multicast address
 	 */
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
+		res = rdma_join_multicast (instance->mcast_cma_id, &instance->mcast_addr, instance);
 		usleep(1000);
+		if (res == 0) break;
+	case RDMA_CM_EVENT_MULTICAST_ERROR:
+		log_printf (LOGSYS_LEVEL_ERROR, "multicast error, trying to rejoin in %u ms\n", MCAST_REJOIN_MSEC);
+		poll_timer_add (instance->totemiba_poll_handle,
+			MCAST_REJOIN_MSEC,
+			(void *)instance,
+			mcast_rejoin,
+			&instance->mcast_rejoin);
 		break;
 	/*
 	 * occurs when the CM joins the multicast group
@@ -544,14 +583,15 @@ static int mcast_rdma_event_fn (hdb_handle_t poll_handle, int fd, int events, vo
 		instance->mcast_qpn = event->param.ud.qp_num;
 		instance->mcast_qkey = event->param.ud.qkey;
 		instance->mcast_ah = ibv_create_ah (instance->mcast_pd, &event->param.ud.ah_attr);
-
-		instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id);
+		if (instance->mcast_seen_joined == 0) {
+			log_printf (LOGSYS_LEVEL_DEBUG, "joining mcast 1st time, running callbacks\n");
+			instance->totemiba_iface_change_fn (instance->rrp_context, &instance->my_id);
+			instance->mcast_seen_joined=1;
+		}
+		log_printf (LOGSYS_LEVEL_DEBUG, "Joined multicast!\n");
 		break;
 	case RDMA_CM_EVENT_ADDR_ERROR:
 	case RDMA_CM_EVENT_ROUTE_ERROR:
-	case RDMA_CM_EVENT_MULTICAST_ERROR:
-		log_printf (LOGSYS_LEVEL_ERROR, "multicast error\n");
-		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		break;
 	default:
@@ -1454,7 +1494,10 @@ int totemiba_mcast_flush_send (
 	sge.lkey = send_buf->mr->lkey;
 	sge.addr = (uintptr_t)msg;
 
-	res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
+	if (instance->mcast_ah != 0) {
+		res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
+	}
+
 	return (res);
 }
 
@@ -1492,7 +1535,10 @@ int totemiba_mcast_noflush_send (
 	sge.lkey = send_buf->mr->lkey;
 	sge.addr = (uintptr_t)msg;
 
-	res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
+	if (instance->mcast_ah != 0) {
+		res = ibv_post_send (instance->mcast_cma_id->qp, &send_wr, &failed_send_wr);
+	}
+
 	return (res);
 }