Просмотр исходного кода

cpg: Handle errors from totem_mcast

totem_mcast function can return -1 if corosync is overloaded. Sadly in
many calls of this functions was error code ether not handled at all, or
handled by assert.

Commit changes behaviour to ether return CS_ERR_TRY_AGAIN or put error
code to later layers to handle it.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Steven Dake <sdake@redhat.com>
Jan Friesse 14 лет назад
Родитель
Сommit
eef5827dbb
1 измененных файлов с 34 добавлено и 5 удалено
  1. 34 5
      services/cpg.c

+ 34 - 5
services/cpg.c

@@ -865,12 +865,19 @@ static void cpg_pd_finalize (struct cpg_pd *cpd)
 static int cpg_lib_exit_fn (void *conn)
 static int cpg_lib_exit_fn (void *conn)
 {
 {
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
+	int result;
 
 
 	log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p\n", conn);
 	log_printf(LOGSYS_LEVEL_DEBUG, "exit_fn for conn=%p\n", conn);
 
 
 	if (cpd->group_name.length > 0) {
 	if (cpd->group_name.length > 0) {
-		cpg_node_joinleave_send (cpd->pid, &cpd->group_name,
+		result = cpg_node_joinleave_send (cpd->pid, &cpd->group_name,
 				MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN);
 				MESSAGE_REQ_EXEC_CPG_PROCLEAVE, CONFCHG_CPG_REASON_PROCDOWN);
+		if (result == -1) {
+			/*
+			 * Call this function again later
+			 */
+			return (result);
+		}
 	}
 	}
 
 
 	cpg_pd_finalize (cpd);
 	cpg_pd_finalize (cpd);
@@ -1286,9 +1293,11 @@ static void message_handler_req_lib_cpg_join (void *conn, const void *message)
 {
 {
 	const struct req_lib_cpg_join *req_lib_cpg_join = message;
 	const struct req_lib_cpg_join *req_lib_cpg_join = message;
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
+	struct cpg_pd tmp_cpd;
 	struct res_lib_cpg_join res_lib_cpg_join;
 	struct res_lib_cpg_join res_lib_cpg_join;
 	cs_error_t error = CPG_OK;
 	cs_error_t error = CPG_OK;
 	struct list_head *iter;
 	struct list_head *iter;
+	int result;
 
 
 	/* Test, if we don't have same pid and group name joined */
 	/* Test, if we don't have same pid and group name joined */
 	for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
 	for (iter = cpg_pd_list_head.next; iter != &cpg_pd_list_head; iter = iter->next) {
@@ -1321,15 +1330,28 @@ static void message_handler_req_lib_cpg_join (void *conn, const void *message)
 	switch (cpd->cpd_state) {
 	switch (cpd->cpd_state) {
 	case CPD_STATE_UNJOINED:
 	case CPD_STATE_UNJOINED:
 		error = CPG_OK;
 		error = CPG_OK;
+		/*
+		 * Make copy of cpd, to restore if cpg_node_joinleave_send fails
+		 */
+		memcpy (&tmp_cpd, cpd, sizeof(tmp_cpd));
 		cpd->cpd_state = CPD_STATE_JOIN_STARTED;
 		cpd->cpd_state = CPD_STATE_JOIN_STARTED;
 		cpd->pid = req_lib_cpg_join->pid;
 		cpd->pid = req_lib_cpg_join->pid;
 		cpd->flags = req_lib_cpg_join->flags;
 		cpd->flags = req_lib_cpg_join->flags;
 		memcpy (&cpd->group_name, &req_lib_cpg_join->group_name,
 		memcpy (&cpd->group_name, &req_lib_cpg_join->group_name,
 			sizeof (cpd->group_name));
 			sizeof (cpd->group_name));
 
 
-		cpg_node_joinleave_send (req_lib_cpg_join->pid,
+		result = cpg_node_joinleave_send (req_lib_cpg_join->pid,
 			&req_lib_cpg_join->group_name,
 			&req_lib_cpg_join->group_name,
 			MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN);
 			MESSAGE_REQ_EXEC_CPG_PROCJOIN, CONFCHG_CPG_REASON_JOIN);
+
+		if (result == -1) {
+			error = CPG_ERR_TRY_AGAIN;
+			/*
+			 * Restore cpd
+			 */
+			memcpy (cpd, &tmp_cpd, sizeof(tmp_cpd));
+			goto response_send;
+		}
 		break;
 		break;
 	case CPD_STATE_LEAVE_STARTED:
 	case CPD_STATE_LEAVE_STARTED:
 		error = CPG_ERR_BUSY;
 		error = CPG_ERR_BUSY;
@@ -1356,6 +1378,7 @@ static void message_handler_req_lib_cpg_leave (void *conn, const void *message)
 	cs_error_t error = CPG_OK;
 	cs_error_t error = CPG_OK;
 	struct req_lib_cpg_leave  *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message;
 	struct req_lib_cpg_leave  *req_lib_cpg_leave = (struct req_lib_cpg_leave *)message;
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
 	struct cpg_pd *cpd = (struct cpg_pd *)api->ipc_private_data_get (conn);
+	int result;
 
 
 	log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p\n", conn);
 	log_printf(LOGSYS_LEVEL_DEBUG, "got leave request on %p\n", conn);
 
 
@@ -1372,10 +1395,14 @@ static void message_handler_req_lib_cpg_leave (void *conn, const void *message)
 	case CPD_STATE_JOIN_COMPLETED:
 	case CPD_STATE_JOIN_COMPLETED:
 		error = CPG_OK;
 		error = CPG_OK;
 		cpd->cpd_state = CPD_STATE_LEAVE_STARTED;
 		cpd->cpd_state = CPD_STATE_LEAVE_STARTED;
-		cpg_node_joinleave_send (req_lib_cpg_leave->pid,
+		result = cpg_node_joinleave_send (req_lib_cpg_leave->pid,
 			&req_lib_cpg_leave->group_name,
 			&req_lib_cpg_leave->group_name,
 			MESSAGE_REQ_EXEC_CPG_PROCLEAVE,
 			MESSAGE_REQ_EXEC_CPG_PROCLEAVE,
 			CONFCHG_CPG_REASON_LEAVE);
 			CONFCHG_CPG_REASON_LEAVE);
+		if (result == -1) {
+			error = CPG_ERR_TRY_AGAIN;
+			cpd->cpd_state = CPD_STATE_JOIN_COMPLETED;
+		}
 		break;
 		break;
 	}
 	}
 
 
@@ -1458,8 +1485,10 @@ static void message_handler_req_lib_cpg_mcast (void *conn, const void *message)
  		req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message;
  		req_exec_cpg_iovec[1].iov_base = (char *)&req_lib_cpg_mcast->message;
  		req_exec_cpg_iovec[1].iov_len = msglen;
  		req_exec_cpg_iovec[1].iov_len = msglen;
 
 
- 		result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
- 		assert(result == 0);
+		result = api->totem_mcast (req_exec_cpg_iovec, 2, TOTEM_AGREED);
+		if (result == -1) {
+			error = CPG_ERR_TRY_AGAIN;
+		}
  	}
  	}
 
 
 	res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast);
 	res_lib_cpg_mcast.header.size = sizeof(res_lib_cpg_mcast);