Kaynağa Gözat

Fix malloc deadlock in signal handler

This patch solves situations, where malloc is called 
inside signal handler. It creates thread, which waits
for semaphore unlock and then starts shutdown sequence.

RHBZ#547511


git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@2644 fd59a12c-fef9-0310-b244-a6a79926bd2f
Jan Friesse 16 yıl önce
ebeveyn
işleme
64c6c6f324
2 değiştirilmiş dosya ile 38 ekleme ve 4 silme
  1. 7 3
      exec/coroipcs.c
  2. 31 1
      exec/main.c

+ 7 - 3
exec/coroipcs.c

@@ -1054,6 +1054,11 @@ void coroipcs_ipc_exit (void)
 
 		conn_info = list_entry (list, struct conn_info, list);
 
+		if (conn_info->state != CONN_STATE_THREAD_ACTIVE)
+			continue;
+
+		ipc_disconnect (conn_info);
+
 #if _POSIX_THREAD_PROCESS_SHARED > 0
 		sem_destroy (&conn_info->control_buffer->sem0);
 		sem_destroy (&conn_info->control_buffer->sem1);
@@ -1073,8 +1078,6 @@ void coroipcs_ipc_exit (void)
 			conn_info->response_size);
 		res = circular_memory_unmap (conn_info->dispatch_buffer,
 			conn_info->dispatch_size);
-
-		sem_post_exit_thread (conn_info);
 	}
 }
 
@@ -1656,7 +1659,6 @@ int coroipcs_handler_dispatch (
 		 * ipc thread is the only reference at startup
 		 */
 		conn_info->refcount = 1;
-		conn_info->state = CONN_STATE_THREAD_ACTIVE;
 
 		conn_info->private_data = api->malloc (api->private_data_size_get (conn_info->service));
 		memset (conn_info->private_data, 0,
@@ -1690,6 +1692,8 @@ int coroipcs_handler_dispatch (
 		if (conn_info->service == SOCKET_SERVICE_INIT) {
 			conn_info->service = -1;
 		}
+
+		conn_info->state = CONN_STATE_THREAD_ACTIVE;
 	} else
 	if (revent & POLLIN) {
 		coroipcs_refcount_inc (conn_info);

+ 31 - 1
exec/main.c

@@ -56,6 +56,7 @@
 #include <signal.h>
 #include <sched.h>
 #include <time.h>
+#include <semaphore.h>
 
 #include <corosync/swab.h>
 #include <corosync/corotypes.h>
@@ -133,6 +134,10 @@ static hdb_handle_t object_connection_handle;
 
 static corosync_timer_handle_t corosync_stats_timer_handle;
 
+static pthread_t corosync_exit_thread;
+
+static sem_t corosync_exit_sem;
+
 hdb_handle_t corosync_poll_handle_get (void)
 {
 	return (corosync_poll_handle);
@@ -152,8 +157,8 @@ void corosync_state_dump (void)
 static void unlink_all_completed (void)
 {
 	poll_stop (0);
-	totempg_finalize ();
 	coroipcs_ipc_exit ();
+	totempg_finalize ();
 
 	corosync_exit_error (AIS_DONE_EXIT);
 }
@@ -167,7 +172,17 @@ void corosync_shutdown_request (void)
 	if (called == 0) {
 		called = 1;
 	}
+
+	sem_post (&corosync_exit_sem);
+}
+
+static void *corosync_exit_thread_handler (void *arg)
+{
+	sem_wait (&corosync_exit_sem);
+
 	corosync_service_unlink_all (api, unlink_all_completed);
+
+	return arg;
 }
 
 static void sigusr2_handler (int num)
@@ -1420,6 +1435,21 @@ int main (int argc, char **argv)
 
 // TODO what is this hack for?	usleep(totem_config.token_timeout * 2000);
 
+	/*
+	 * Create semaphore and start "exit" thread
+	 */
+	res = sem_init (&corosync_exit_sem, 0, 0);
+	if (res != 0) {
+		log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create exit thread.\n");
+		corosync_exit_error (AIS_DONE_FATAL_ERR);
+	}
+
+	res = pthread_create (&corosync_exit_thread, NULL, corosync_exit_thread_handler, NULL);
+	if (res != 0) {
+		log_printf (LOGSYS_LEVEL_ERROR, "Corosync Executive couldn't create exit thread.\n");
+		corosync_exit_error (AIS_DONE_FATAL_ERR);
+	}
+
 	/*
 	 * if totempg_initialize doesn't have root priveleges, it cannot
 	 * bind to a specific interface.  This only matters if