Просмотр исходного кода

totemiba: Properly allocate RDMA buffers

1. In UD mode receivnig side of RDMA application should have enough
space in buffer to hold data and GRH. Also, sge.length on the receiving
size should be set to max_msg_size + sizeof (struct ibv_grh). Current
corosync doesn't take grh in the account and does not work if mtu is set
to the real mtu of IB port (it works if netmtu is set to < 2048-40).
2. ibv_wc.byte_len is the actual lentgh of the received packet, i.e.
msg_len + GRH. GRH length should be substracted in further proceeding.
If not, it might cause problems when messages get retransmitted, as
their apparent size will constantly grow.
3. Current corosync will not work with rdma and mtus > 2048. Most modern
IB HW supports 4096 mtu.

Signed-off-by: Yevheniy Demchenko <zheka@uvt.cz>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
Yevheniy Demchenko 12 лет назад
Родитель
Сommit
795b04780f
1 измененных файлов с 8 добавлено и 7 удалено
  1. 8 7
      exec/totemiba.c

+ 8 - 7
exec/totemiba.c

@@ -227,7 +227,7 @@ struct recv_buf {
 	struct ibv_recv_wr recv_wr;
 	struct ibv_sge sge;
 	struct ibv_mr *mr;
-	char buffer[MAX_MTU_SIZE];
+	char buffer[MAX_MTU_SIZE + sizeof (struct ibv_grh)];
 };
 
 struct send_buf {
@@ -270,7 +270,7 @@ static inline struct send_buf *mcast_send_buf_get (
 	}
 	send_buf->mr = ibv_reg_mr (instance->mcast_pd,
 		send_buf->buffer,
-		2048, IBV_ACCESS_LOCAL_WRITE);
+		MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
 	if (send_buf->mr == NULL) {
 		log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n");
 		free (send_buf);
@@ -307,7 +307,7 @@ static inline struct send_buf *token_send_buf_get (
 	}
 	send_buf->mr = ibv_reg_mr (instance->send_token_pd,
 		send_buf->buffer,
-		2048, IBV_ACCESS_LOCAL_WRITE);
+		MAX_MTU_SIZE, IBV_ACCESS_LOCAL_WRITE);
 	if (send_buf->mr == NULL) {
 		log_printf (LOGSYS_LEVEL_ERROR, "couldn't register memory range\n");
 		free (send_buf);
@@ -354,7 +354,7 @@ static inline struct recv_buf *recv_token_recv_buf_create (
 	}
 
 	recv_buf->mr = ibv_reg_mr (instance->recv_token_pd, &recv_buf->buffer,
-		2048,
+		MAX_MTU_SIZE + sizeof (struct ibv_grh),
 		IBV_ACCESS_LOCAL_WRITE);
 
 	recv_buf->recv_wr.next = NULL;
@@ -362,7 +362,7 @@ static inline struct recv_buf *recv_token_recv_buf_create (
 	recv_buf->recv_wr.num_sge = 1;
 	recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
 
-	recv_buf->sge.length = 2048;
+	recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
 	recv_buf->sge.lkey = recv_buf->mr->lkey;
 	recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
 
@@ -421,7 +421,7 @@ static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *
 	}
 
 	mr = ibv_reg_mr (instance->mcast_pd, &recv_buf->buffer,
-		2048,
+		MAX_MTU_SIZE + sizeof (struct ibv_grh),
 		IBV_ACCESS_LOCAL_WRITE);
 
 	recv_buf->recv_wr.next = NULL;
@@ -429,7 +429,7 @@ static inline struct recv_buf *mcast_recv_buf_create (struct totemiba_instance *
 	recv_buf->recv_wr.num_sge = 1;
 	recv_buf->recv_wr.wr_id = (uintptr_t)recv_buf;
 
-	recv_buf->sge.length = 2048;
+	recv_buf->sge.length = MAX_MTU_SIZE + sizeof (struct ibv_grh);
 	recv_buf->sge.lkey = mr->lkey;
 	recv_buf->sge.addr = (uintptr_t)recv_buf->buffer;
 
@@ -466,6 +466,7 @@ static inline void iba_deliver_fn (struct totemiba_instance *instance, uint64_t
 	recv_buf = wrid2void(wr_id);
 	addr = &recv_buf->buffer[sizeof (struct ibv_grh)];
 
+	bytes -= sizeof (struct ibv_grh);
 	instance->totemiba_deliver_fn (instance->rrp_context, addr, bytes);
 }