Просмотр исходного кода

This patch adds two extra features to the CFG service in corosync, these
are taken (conceptually at least) from cman.

1. corosync_cfg_killnode() this will tell a remote node to exit.
2. corosync_cfg_tryshutdown() this will do a semi-controlled shutdown in
that it will consult any interested attached daemons if they are willing
to let corosync be shut down. If they all agree then the rest of the
cluster will be informed before the node dies.



git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@1683 fd59a12c-fef9-0310-b244-a6a79926bd2f

Christine Caulfield 17 лет назад
Родитель
Сommit
39b505682d
5 измененных файлов с 807 добавлено и 19 удалено
  1. 39 0
      include/corosync/cfg.h
  2. 44 2
      include/corosync/ipc_cfg.h
  3. 114 6
      lib/cfg.c
  4. 520 7
      services/cfg.c
  5. 90 4
      tools/corosync-cfgtool.c

+ 39 - 0
include/corosync/cfg.h

@@ -82,6 +82,22 @@ typedef enum {
 	COROSYNC_CFG_STATETYPE_PRESENCE = 4
 } CorosyncCfgStateTypeT;
 
+/* Shutdown types.
+   REQUEST is the normal shutdown. other daemons will be consulted
+   REGARDLESS will tell other daemons but ignore their opinions
+   IMMEDIATE will shut down straight away (but still tell other nodes)
+*/
+typedef enum {
+	COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST = 0,
+	COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS = 1,
+	COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE = 2,
+} CorosyncCfgShutdownFlagsT;
+
+typedef enum {
+	COROSYNC_CFG_SHUTDOWN_FLAG_NO = 0,
+	COROSYNC_CFG_SHUTDOWN_FLAG_YES = 1,
+} CorosyncCfgShutdownReplyFlagsT;
+
 typedef struct {
 	SaNameT name;
 	CorosyncCfgStateTypeT stateType;
@@ -97,9 +113,15 @@ typedef void (*CorosyncCfgStateTrackCallbackT) (
 	CorosyncCfgStateNotificationBufferT *notificationBuffer,
 	SaAisErrorT error);
 
+typedef void (*CorosyncCfgShutdownCallbackT) (
+	corosync_cfg_handle_t cfg_handle,
+	CorosyncCfgShutdownFlagsT flags);
+
 typedef struct {
 	CorosyncCfgStateTrackCallbackT
 		corosyncCfgStateTrackCallback;
+	CorosyncCfgShutdownCallbackT
+		corosyncCfgShutdownCallback;
 } CorosyncCfgCallbacksT;
 
 /*
@@ -163,6 +185,23 @@ corosync_cfg_administrative_state_set (
 	CorosyncCfgAdministrativeTargetT administrativeTarget,
 	CorosyncCfgAdministrativeStateT administrativeState);
 
+SaAisErrorT
+corosync_cfg_kill_node (
+	corosync_cfg_handle_t cfg_handle,
+	unsigned int nodeid,
+	char *reason);
+
+SaAisErrorT
+corosync_cfg_try_shutdown (
+	corosync_cfg_handle_t cfg_handle,
+	CorosyncCfgShutdownFlagsT flags);
+
+
+SaAisErrorT
+corosync_cfg_replyto_shutdown (
+	corosync_cfg_handle_t cfg_handle,
+	CorosyncCfgShutdownReplyFlagsT flags);
+
 SaAisErrorT
 corosync_cfg_state_track (
         corosync_cfg_handle_t cfg_handle,

+ 44 - 2
include/corosync/ipc_cfg.h

@@ -47,7 +47,10 @@ enum req_lib_cfg_types {
         MESSAGE_REQ_CFG_ADMINISTRATIVESTATESET = 4,
         MESSAGE_REQ_CFG_ADMINISTRATIVESTATEGET = 5,
         MESSAGE_REQ_CFG_SERVICELOAD = 6,
-        MESSAGE_REQ_CFG_SERVICEUNLOAD = 7
+        MESSAGE_REQ_CFG_SERVICEUNLOAD = 7,
+	MESSAGE_REQ_CFG_KILLNODE = 8,
+	MESSAGE_REQ_CFG_TRYSHUTDOWN = 9,
+	MESSAGE_REQ_CFG_REPLYTOSHUTDOWN = 10
 };
 
 enum res_lib_cfg_types {
@@ -58,7 +61,10 @@ enum res_lib_cfg_types {
         MESSAGE_RES_CFG_ADMINISTRATIVESTATESET = 4,
         MESSAGE_RES_CFG_ADMINISTRATIVESTATEGET = 5,
         MESSAGE_RES_CFG_SERVICELOAD = 6,
-        MESSAGE_RES_CFG_SERVICEUNLOAD = 7
+        MESSAGE_RES_CFG_SERVICEUNLOAD = 7,
+	MESSAGE_RES_CFG_KILLNODE = 8,
+	MESSAGE_RES_CFG_TRYSHUTDOWN = 9,
+	MESSAGE_RES_CFG_TESTSHUTDOWN = 10
 };
 
 struct req_lib_cfg_statetrack {
@@ -140,6 +146,35 @@ struct res_lib_cfg_serviceunload {
 	mar_res_header_t header __attribute__((aligned(8)));
 };
 
+struct req_lib_cfg_killnode {
+	mar_req_header_t header __attribute__((aligned(8)));
+	unsigned int nodeid __attribute__((aligned(8)));
+	SaNameT reason __attribute__((aligned(8)));
+};
+
+struct res_lib_cfg_killnode {
+	mar_res_header_t header __attribute__((aligned(8)));
+};
+
+struct req_lib_cfg_tryshutdown {
+	mar_req_header_t header __attribute__((aligned(8)));
+	unsigned int flags;
+};
+
+struct res_lib_cfg_tryshutdown {
+	mar_res_header_t header __attribute__((aligned(8)));
+};
+
+struct req_lib_cfg_replytoshutdown {
+	mar_res_header_t header __attribute__((aligned(8)));
+	unsigned int response;
+};
+
+struct res_lib_cfg_testshutdown {
+	mar_res_header_t header __attribute__((aligned(8)));
+	unsigned int flags;
+};
+
 typedef enum {
 	AIS_AMF_ADMINISTRATIVETARGET_SERVICEUNIT = 0,
 	AIS_AMF_ADMINISTRATIVETARGET_SERVICEGROUP = 1,
@@ -153,4 +188,11 @@ typedef enum {
 	AIS_AMF_ADMINISTRATIVESTATE_STOPPING = 2
 } corosyncAdministrativeState;
 
+typedef enum {
+	CFG_SHUTDOWN_FLAG_REQUEST = 0,
+	CFG_SHUTDOWN_FLAG_REGARDLESS = 1,
+	CFG_SHUTDOWN_FLAG_IMMEDIATE = 2,
+} CorosyncShutdownFlagsT;
+
+
 #endif /* AIS_IPC_CFG_H_DEFINED */

+ 114 - 6
lib/cfg.c

@@ -7,7 +7,7 @@
  * Author: Steven Dake (sdake@redhat.com)
  *
  * This software licensed under BSD license, the text of which follows:
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
@@ -115,7 +115,7 @@ corosync_cfg_initialize (
 	cfg_instance->response_fd = -1;
 
 	cfg_instance->dispatch_fd = -1;
-	
+
 	error = saServiceConnect (&cfg_instance->response_fd,
 		&cfg_instance->dispatch_fd, CFG_SERVICE);
 	if (error != SA_AIS_OK) {
@@ -172,6 +172,7 @@ corosync_cfg_dispatch (
 	int cont = 1; /* always continue do loop except when set to 0 */
 	int dispatch_avail;
 	struct cfg_instance *cfg_instance;
+	struct res_lib_cfg_testshutdown *res_lib_cfg_testshutdown;
 #ifdef COMPILE_OUT
 	struct res_lib_corosync_healthcheckcallback *res_lib_corosync_healthcheckcallback;
 	struct res_lib_corosync_readinessstatesetcallback *res_lib_corosync_readinessstatesetcallback;
@@ -267,9 +268,14 @@ corosync_cfg_dispatch (
 		 * Dispatch incoming response
 		 */
 		switch (dispatch_data.header.id) {
-
+		case MESSAGE_RES_CFG_TESTSHUTDOWN:
+			if (callbacks.corosyncCfgShutdownCallback) {
+				res_lib_cfg_testshutdown = (struct res_lib_cfg_testshutdown *)&dispatch_data;
+				callbacks.corosyncCfgShutdownCallback(cfg_handle, res_lib_cfg_testshutdown->flags);
+			}
+			break;
 		default:
-			error = SA_AIS_ERR_LIBRARY;	
+			error = SA_AIS_ERR_LIBRARY;
 			goto error_nounlock;
 			break;
 		}
@@ -416,10 +422,10 @@ error_free_contents:
 	}
 
 	free (*status);
-	
+
 error_free_interface_names:
 	free (*interface_names);
-	
+
 no_error:
 	saHandleInstancePut (&cfg_hdb, cfg_handle);
 
@@ -674,3 +680,105 @@ corosync_cfg_admin_state_set (
 
         return (error == SA_AIS_OK ? res_lib_cfg_administrativestateset.header.error : error);
 }
+
+SaAisErrorT
+corosync_cfg_kill_node (
+	corosync_cfg_handle_t cfg_handle,
+	unsigned int nodeid,
+	char *reason)
+{
+	struct cfg_instance *cfg_instance;
+	struct req_lib_cfg_killnode req_lib_cfg_killnode;
+	struct res_lib_cfg_killnode res_lib_cfg_killnode;
+	SaAisErrorT error;
+
+	if (strlen(reason) >= SA_MAX_NAME_LENGTH)
+		return SA_AIS_ERR_NAME_TOO_LONG;
+
+	error = saHandleInstanceGet (&cfg_hdb, cfg_handle,
+		(void *)&cfg_instance);
+	if (error != SA_AIS_OK) {
+		return (error);
+	}
+
+	req_lib_cfg_killnode.header.id = MESSAGE_REQ_CFG_KILLNODE;
+	req_lib_cfg_killnode.header.size = sizeof (struct req_lib_cfg_killnode);
+	req_lib_cfg_killnode.nodeid = nodeid;
+	strcpy((char *)req_lib_cfg_killnode.reason.value, reason);
+	req_lib_cfg_killnode.reason.length = strlen(reason)+1;
+
+	error = saSendReceiveReply (cfg_instance->response_fd,
+		&req_lib_cfg_killnode,
+		sizeof (struct req_lib_cfg_killnode),
+		&res_lib_cfg_killnode,
+		sizeof (struct res_lib_cfg_killnode));
+
+	error = res_lib_cfg_killnode.header.error;
+
+	pthread_mutex_unlock (&cfg_instance->response_mutex);
+
+	saHandleInstancePut (&cfg_hdb, cfg_handle);
+
+        return (error == SA_AIS_OK ? res_lib_cfg_killnode.header.error : error);
+}
+
+SaAisErrorT
+corosync_cfg_try_shutdown (
+	corosync_cfg_handle_t cfg_handle,
+	CorosyncCfgShutdownFlagsT flags)
+{
+	struct cfg_instance *cfg_instance;
+	struct req_lib_cfg_tryshutdown req_lib_cfg_tryshutdown;
+	struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
+	SaAisErrorT error;
+
+	error = saHandleInstanceGet (&cfg_hdb, cfg_handle,
+		(void *)&cfg_instance);
+	if (error != SA_AIS_OK) {
+		return (error);
+	}
+
+	req_lib_cfg_tryshutdown.header.id = MESSAGE_REQ_CFG_TRYSHUTDOWN;
+	req_lib_cfg_tryshutdown.header.size = sizeof (struct req_lib_cfg_tryshutdown);
+	req_lib_cfg_tryshutdown.flags = flags;
+
+	error = saSendReceiveReply (cfg_instance->response_fd,
+		&req_lib_cfg_tryshutdown,
+		sizeof (struct req_lib_cfg_tryshutdown),
+		&res_lib_cfg_tryshutdown,
+		sizeof (struct res_lib_cfg_tryshutdown));
+
+	pthread_mutex_unlock (&cfg_instance->response_mutex);
+
+	saHandleInstancePut (&cfg_hdb, cfg_handle);
+
+        return (error == SA_AIS_OK ? res_lib_cfg_tryshutdown.header.error : error);
+}
+
+SaAisErrorT
+corosync_cfg_replyto_shutdown (
+	corosync_cfg_handle_t cfg_handle,
+	CorosyncCfgShutdownReplyFlagsT response)
+{
+	struct cfg_instance *cfg_instance;
+	struct req_lib_cfg_replytoshutdown req_lib_cfg_replytoshutdown;
+	struct iovec iov;
+	SaAisErrorT error;
+
+	error = saHandleInstanceGet (&cfg_hdb, cfg_handle,
+		(void *)&cfg_instance);
+	if (error != SA_AIS_OK) {
+		return (error);
+	}
+
+	req_lib_cfg_replytoshutdown.header.id = MESSAGE_REQ_CFG_REPLYTOSHUTDOWN;
+	req_lib_cfg_replytoshutdown.header.size = sizeof (struct req_lib_cfg_replytoshutdown);
+	req_lib_cfg_replytoshutdown.response = response;
+
+	iov.iov_base = &req_lib_cfg_replytoshutdown;
+	iov.iov_len = sizeof (struct req_lib_cfg_replytoshutdown);
+	error = saSendMsgRetry (cfg_instance->response_fd,
+				&iov, 1);
+
+        return (error);
+}

+ 520 - 7
services/cfg.c

@@ -7,7 +7,7 @@
  * Author: Steven Dake (sdake@redhat.com)
  *
  * This software licensed under BSD license, the text of which follows:
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
@@ -60,7 +60,31 @@
 LOGSYS_DECLARE_SUBSYS ("CFG", LOG_INFO);
 
 enum cfg_message_req_types {
-        MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0
+        MESSAGE_REQ_EXEC_CFG_RINGREENABLE = 0,
+	MESSAGE_REQ_EXEC_CFG_KILLNODE = 1,
+	MESSAGE_REQ_EXEC_CFG_SHUTDOWN = 2
+};
+
+#define DEFAULT_SHUTDOWN_TIMEOUT 5
+
+static struct list_head trackers_list;
+
+/*
+ * Variables controlling a requested shutdown
+ */
+static corosync_timer_handle_t shutdown_timer;
+static struct cfg_info *shutdown_con;
+static uint32_t shutdown_flags;
+static int shutdown_yes;
+static int shutdown_no;
+static int shutdown_expected;
+
+struct cfg_info
+{
+	struct list_head list;
+	void *conn;
+	void *tracker_conn;
+	enum {SHUTDOWN_REPLY_UNKNOWN, SHUTDOWN_REPLY_YES, SHUTDOWN_REPLY_NO} shutdown_reply;
 };
 
 static void cfg_confchg_fn (
@@ -82,6 +106,16 @@ static void message_handler_req_exec_cfg_ringreenable (
         void *message,
         unsigned int nodeid);
 
+static void message_handler_req_exec_cfg_killnode (
+        void *message,
+        unsigned int nodeid);
+
+static void message_handler_req_exec_cfg_shutdown (
+        void *message,
+        unsigned int nodeid);
+
+static void exec_cfg_killnode_endian_convert (void *msg);
+
 static void message_handler_req_lib_cfg_ringstatusget (
 	void *conn,
 	void *msg);
@@ -114,6 +148,18 @@ static void message_handler_req_lib_cfg_serviceunload (
 	void *conn,
 	void *msg);
 
+static void message_handler_req_lib_cfg_killnode (
+	void *conn,
+	void *msg);
+
+static void message_handler_req_lib_cfg_tryshutdown (
+	void *conn,
+	void *msg);
+
+static void message_handler_req_lib_cfg_replytoshutdown (
+	void *conn,
+	void *msg);
+
 /*
  * Service Handler Definition
  */
@@ -166,13 +212,38 @@ static struct corosync_lib_handler cfg_lib_engine[] =
 		.response_size		= sizeof (struct res_lib_cfg_serviceunload),
 		.response_id		= MESSAGE_RES_CFG_SERVICEUNLOAD,
 		.flow_control		= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
+	},
+	{ /* 8 */
+		.lib_handler_fn		= message_handler_req_lib_cfg_killnode,
+		.response_size		= sizeof (struct res_lib_cfg_killnode),
+		.response_id		= MESSAGE_RES_CFG_KILLNODE,
+		.flow_control		= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
+	},
+	{ /* 9 */
+		.lib_handler_fn		= message_handler_req_lib_cfg_tryshutdown,
+		.response_size		= sizeof (struct res_lib_cfg_tryshutdown),
+		.response_id		= MESSAGE_RES_CFG_TRYSHUTDOWN,
+		.flow_control		= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
+	},
+	{ /* 10 */
+		.lib_handler_fn		= message_handler_req_lib_cfg_replytoshutdown,
+		.response_size		= 0,
+		.response_id		= 0,
+		.flow_control		= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
 	}
 };
 
 static struct corosync_exec_handler cfg_exec_engine[] =
 {
-	{
-		message_handler_req_exec_cfg_ringreenable
+	{ /* 0 */
+		.exec_handler_fn = message_handler_req_exec_cfg_ringreenable,
+	},
+	{ /* 1 */
+		.exec_handler_fn = message_handler_req_exec_cfg_killnode,
+		.exec_endian_convert_fn	= exec_cfg_killnode_endian_convert
+	},
+	{ /* 2 */
+		.exec_handler_fn = message_handler_req_exec_cfg_shutdown,
 	}
 };
 
@@ -182,8 +253,9 @@ static struct corosync_exec_handler cfg_exec_engine[] =
 struct corosync_service_engine cfg_service_engine = {
 	.name					= "corosync configuration service",
 	.id					= CFG_SERVICE,
-	.private_data_size			= 0,
-	.flow_control				= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED, 
+	.private_data_size			= sizeof(struct cfg_info),
+	.flow_control				= COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED,
+	.allow_inquorate			= COROSYNC_LIB_ALLOW_INQUORATE,
 	.lib_init_fn				= cfg_lib_init_fn,
 	.lib_exit_fn				= cfg_lib_exit_fn,
 	.lib_engine				= cfg_lib_engine,
@@ -238,12 +310,24 @@ struct req_exec_cfg_ringreenable {
         mar_message_source_t source __attribute__((aligned(8)));
 };
 
+struct req_exec_cfg_killnode {
+	mar_req_header_t header __attribute__((aligned(8)));
+        mar_uint32_t nodeid __attribute__((aligned(8)));
+	mar_name_t reason __attribute__((aligned(8)));
+};
+
+struct req_exec_cfg_shutdown {
+	mar_req_header_t header __attribute__((aligned(8)));
+};
+
 /* IMPL */
 
 static int cfg_exec_init_fn (
 	struct corosync_api_v1 *corosync_api_v1)
 {
 	api = corosync_api_v1;
+
+	list_init(&trackers_list);
 	return (0);
 }
 
@@ -256,15 +340,192 @@ static void cfg_confchg_fn (
 {
 }
 
+/*
+ * Tell other nodes we are shutting down
+ */
+static int send_shutdown()
+{
+	struct req_exec_cfg_shutdown req_exec_cfg_shutdown;
+	struct iovec iovec;
+
+	ENTER();
+	req_exec_cfg_shutdown.header.size =
+		sizeof (struct req_exec_cfg_shutdown);
+	req_exec_cfg_shutdown.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
+		MESSAGE_REQ_EXEC_CFG_SHUTDOWN);
+
+	iovec.iov_base = (char *)&req_exec_cfg_shutdown;
+	iovec.iov_len = sizeof (struct req_exec_cfg_shutdown);
+
+	assert (api->totem_mcast (&iovec, 1, TOTEM_SAFE) == 0);
+
+	LEAVE();
+	return 0;
+}
+
+static void send_test_shutdown(void * conn, int status)
+{
+	struct res_lib_cfg_testshutdown res_lib_cfg_testshutdown;
+	struct list_head *iter;
+
+	ENTER();
+	res_lib_cfg_testshutdown.header.size = sizeof(struct res_lib_cfg_testshutdown);
+	res_lib_cfg_testshutdown.header.id = MESSAGE_RES_CFG_TESTSHUTDOWN;
+	res_lib_cfg_testshutdown.header.error = status;
+	res_lib_cfg_testshutdown.flags = shutdown_flags;
+
+	if (conn) {
+		TRACE1("sending testshutdown to %p", conn);
+		api->ipc_conn_send_response(conn, &res_lib_cfg_testshutdown,
+					    sizeof(res_lib_cfg_testshutdown));
+	} else {
+		for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) {
+			struct cfg_info *ci = list_entry(iter, struct cfg_info, list);
+
+			TRACE1("sending testshutdown to %p", ci->tracker_conn);
+			api->ipc_conn_send_response(ci->tracker_conn, &res_lib_cfg_testshutdown,
+						    sizeof(res_lib_cfg_testshutdown));
+		}
+	}
+	LEAVE();
+}
+
+static void check_shutdown_status()
+{
+	ENTER();
+
+	/*
+	 * Shutdown client might have gone away
+	 */
+	if (!shutdown_con) {
+		LEAVE();
+		return;
+	}
+
+	/*
+	 * All replies safely gathered in ?
+	 */
+	if (shutdown_yes + shutdown_no >= shutdown_expected) {
+		struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
+
+		api->timer_delete(shutdown_timer);
+
+		if (shutdown_yes >= shutdown_expected ||
+		    shutdown_flags == CFG_SHUTDOWN_FLAG_REGARDLESS) {
+			TRACE1("shutdown confirmed");
+
+			/*
+			 * Tell other nodes we are going down
+			 */
+			send_shutdown();
+
+			res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
+			res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
+			res_lib_cfg_tryshutdown.header.error = SA_AIS_OK;
+
+			/*
+			 * Tell originator that shutdown was confirmed
+			 */
+			api->ipc_conn_send_response(shutdown_con->conn, &res_lib_cfg_tryshutdown,
+						    sizeof(res_lib_cfg_tryshutdown));
+			shutdown_con = NULL;
+		}
+		else {
+
+			TRACE1("shutdown cancelled");
+			res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
+			res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
+			res_lib_cfg_tryshutdown.header.error = SA_AIS_ERR_BUSY;
+
+			/*
+			 * Tell originator that shutdown was cancelled
+			 */
+			api->ipc_conn_send_response(shutdown_con->conn, &res_lib_cfg_tryshutdown,
+						    sizeof(res_lib_cfg_tryshutdown));
+			shutdown_con = NULL;
+		}
+
+		log_printf(LOG_DEBUG, "shutdown decision is: (yes count: %d, no count: %d) flags=%x\n", shutdown_yes, shutdown_no, shutdown_flags);
+	}
+	LEAVE();
+}
+
+
+/*
+ * Not all nodes responded to the shutdown (in time)
+ */
+static void shutdown_timer_fn(void *arg)
+{
+	ENTER();
+
+	/*
+	 * Mark undecideds as "NO"
+	 */
+	shutdown_no = shutdown_expected;
+	check_shutdown_status();
+
+	send_test_shutdown(NULL, SA_AIS_ERR_TIMEOUT);
+	LEAVE();
+}
+
+static void remove_ci_from_shutdown(struct cfg_info *ci)
+{
+	ENTER();
+
+	/*
+	 * If the controlling shutdown process has quit, then cancel the
+	 * shutdown session
+	 */
+	if (ci == shutdown_con) {
+		shutdown_con = NULL;
+		api->timer_delete(shutdown_timer);
+	}
+
+	if (!list_empty(&ci->list)) {
+		list_del(&ci->list);
+		list_init(&ci->list);
+
+		/*
+		 * Remove our option
+		 */
+		if (shutdown_con) {
+			if (ci->shutdown_reply == SHUTDOWN_REPLY_YES)
+				shutdown_yes--;
+			if (ci->shutdown_reply == SHUTDOWN_REPLY_NO)
+				shutdown_no--;
+		}
+
+		/*
+		 * If we are leaving, then that's an implicit YES to shutdown
+		 */
+		ci->shutdown_reply = SHUTDOWN_REPLY_YES;
+		shutdown_yes++;
+
+		check_shutdown_status();
+	}
+	LEAVE();
+}
+
+
 int cfg_lib_exit_fn (void *conn)
 {
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
+
+	ENTER();
+	if (!list_empty(&ci->list)) {
+		list_del(&ci->list);
+		remove_ci_from_shutdown(ci);
+	}
+	LEAVE();
 	return (0);
 }
 
 static int cfg_lib_init_fn (void *conn)
 {
-	
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
+
 	ENTER();
+	list_init(&ci->list);
 	LEAVE();
 
         return (0);
@@ -295,6 +556,52 @@ static void message_handler_req_exec_cfg_ringreenable (
 	LEAVE();
 }
 
+static void exec_cfg_killnode_endian_convert (void *msg)
+{
+	struct req_exec_cfg_killnode *req_exec_cfg_killnode =
+		(struct req_exec_cfg_killnode *)msg;
+	ENTER();
+
+	swab_mar_name_t(&req_exec_cfg_killnode->reason);
+	LEAVE();
+}
+
+
+static void message_handler_req_exec_cfg_killnode (
+        void *message,
+        unsigned int nodeid)
+{
+	struct req_exec_cfg_killnode *req_exec_cfg_killnode =
+		(struct req_exec_cfg_killnode *)message;
+	SaNameT reason;
+
+	ENTER();
+	log_printf(LOG_DEBUG, "request to kill node %d(us=%d): %s\n",  req_exec_cfg_killnode->nodeid, api->totem_nodeid_get(), reason.value);
+        if (req_exec_cfg_killnode->nodeid == api->totem_nodeid_get()) {
+		marshall_from_mar_name_t(&reason, &req_exec_cfg_killnode->reason);
+		log_printf(LOG_NOTICE, "Killed by node %d: %s\n",
+			   nodeid, reason.value);
+		corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT);
+	}
+	LEAVE();
+}
+
+/*
+ * Self shutdown
+ */
+static void message_handler_req_exec_cfg_shutdown (
+        void *message,
+        unsigned int nodeid)
+{
+	ENTER();
+
+	log_printf(LOG_NOTICE, "Node %d was shut down by sysadmin\n", nodeid);
+	if (nodeid == api->totem_nodeid_get()) {
+		corosync_fatal_error(COROSYNC_FATAL_ERROR_EXIT);
+	}
+	LEAVE();
+}
+
 
 /*
  * Library Interface Implementation
@@ -365,9 +672,36 @@ static void message_handler_req_lib_cfg_statetrack (
 	void *conn,
 	void *msg)
 {
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
 //	struct req_lib_cfg_statetrack *req_lib_cfg_statetrack = (struct req_lib_cfg_statetrack *)message;
+	struct res_lib_cfg_statetrack res_lib_cfg_statetrack;
 
 	ENTER();
+
+	/*
+	 * We only do shutdown tracking at the moment
+	 */
+	if (list_empty(&ci->list)) {
+		list_add(&ci->list, &trackers_list);
+		ci->tracker_conn = api->ipc_conn_partner_get (conn);
+
+		if (shutdown_con) {
+			/*
+			 * Shutdown already in progress, ask the newcomer's opinion
+			 */
+			ci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN;
+			shutdown_expected++;
+			send_test_shutdown(ci->tracker_conn, SA_AIS_OK);
+		}
+	}
+
+	res_lib_cfg_statetrack.header.size = sizeof(struct res_lib_cfg_statetrack);
+	res_lib_cfg_statetrack.header.id = MESSAGE_RES_CFG_STATETRACKSTART;
+	res_lib_cfg_statetrack.header.error = SA_AIS_OK;
+
+	api->ipc_conn_send_response(conn, &res_lib_cfg_statetrack,
+				    sizeof(res_lib_cfg_statetrack));
+
 	LEAVE();
 }
 
@@ -375,9 +709,11 @@ static void message_handler_req_lib_cfg_statetrackstop (
 	void *conn,
 	void *msg)
 {
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
 //	struct req_lib_cfg_statetrackstop *req_lib_cfg_statetrackstop = (struct req_lib_cfg_statetrackstop *)message;
 
 	ENTER();
+	remove_ci_from_shutdown(ci);
 	LEAVE();
 }
 
@@ -386,6 +722,7 @@ static void message_handler_req_lib_cfg_administrativestateset (
 	void *msg)
 {
 //	struct req_lib_cfg_administrativestateset *req_lib_cfg_administrativestateset = (struct req_lib_cfg_administrativestateset *)message;
+
 	ENTER();
 	LEAVE();
 }
@@ -444,3 +781,179 @@ static void message_handler_req_lib_cfg_serviceunload (
 		sizeof (struct res_lib_cfg_serviceunload));
 	LEAVE();
 }
+
+
+static void message_handler_req_lib_cfg_killnode (
+	void *conn,
+	void *msg)
+{
+	struct req_lib_cfg_killnode *req_lib_cfg_killnode = (struct req_lib_cfg_killnode *)msg;
+	struct res_lib_cfg_killnode res_lib_cfg_killnode;
+	struct req_exec_cfg_killnode req_exec_cfg_killnode;
+	struct iovec iovec;
+	int res;
+
+	ENTER();
+	req_exec_cfg_killnode.header.size =
+		sizeof (struct req_exec_cfg_killnode);
+	req_exec_cfg_killnode.header.id = SERVICE_ID_MAKE (CFG_SERVICE,
+		MESSAGE_REQ_EXEC_CFG_KILLNODE);
+	req_exec_cfg_killnode.nodeid = req_lib_cfg_killnode->nodeid;
+	marshall_to_mar_name_t(&req_exec_cfg_killnode.reason, &req_lib_cfg_killnode->reason);
+
+	iovec.iov_base = (char *)&req_exec_cfg_killnode;
+	iovec.iov_len = sizeof (struct req_exec_cfg_killnode);
+
+	res = api->totem_mcast (&iovec, 1, TOTEM_SAFE);
+
+	res_lib_cfg_killnode.header.size = sizeof(struct res_lib_cfg_killnode);
+	res_lib_cfg_killnode.header.id = MESSAGE_RES_CFG_KILLNODE;
+	res_lib_cfg_killnode.header.error = SA_AIS_OK;
+
+	api->ipc_conn_send_response(conn, &res_lib_cfg_killnode,
+				    sizeof(res_lib_cfg_killnode));
+
+	LEAVE();
+}
+
+
+static void message_handler_req_lib_cfg_tryshutdown (
+	void *conn,
+	void *msg)
+{
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
+	struct req_lib_cfg_tryshutdown *req_lib_cfg_tryshutdown = (struct req_lib_cfg_tryshutdown *)msg;
+	struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
+	struct list_head *iter;
+
+	ENTER();
+
+	if (req_lib_cfg_tryshutdown->flags == CFG_SHUTDOWN_FLAG_IMMEDIATE) {
+
+		/*
+		 * Tell other nodes
+		 */
+		send_shutdown();
+
+		res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
+		res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
+		res_lib_cfg_tryshutdown.header.error = SA_AIS_OK;
+		api->ipc_conn_send_response(conn, &res_lib_cfg_tryshutdown,
+					    sizeof(res_lib_cfg_tryshutdown));
+
+		LEAVE();
+		return;
+	}
+
+	/*
+	 * Shutdown in progress, return an error
+	 */
+	if (shutdown_con) {
+		struct res_lib_cfg_tryshutdown res_lib_cfg_tryshutdown;
+
+		res_lib_cfg_tryshutdown.header.size = sizeof(struct res_lib_cfg_tryshutdown);
+		res_lib_cfg_tryshutdown.header.id = MESSAGE_RES_CFG_TRYSHUTDOWN;
+		res_lib_cfg_tryshutdown.header.error = SA_AIS_ERR_EXIST;
+
+		api->ipc_conn_send_response(conn, &res_lib_cfg_tryshutdown,
+					    sizeof(res_lib_cfg_tryshutdown));
+
+
+		LEAVE();
+
+		return;
+	}
+
+	ci->conn = conn;
+	shutdown_con = (struct cfg_info *)api->ipc_private_data_get (conn);
+	shutdown_flags = req_lib_cfg_tryshutdown->flags;
+	shutdown_yes = 0;
+	shutdown_no = 0;
+
+	/*
+	 * Count the number of listeners
+	 */
+	shutdown_expected = 0;
+
+	for (iter = trackers_list.next; iter != &trackers_list; iter = iter->next) {
+		struct cfg_info *ci = list_entry(iter, struct cfg_info, list);
+		ci->shutdown_reply = SHUTDOWN_REPLY_UNKNOWN;
+		shutdown_expected++;
+	}
+
+	/*
+	 * If no-one is listening for events then we can just go down now
+	 */
+	if (shutdown_expected == 0) {
+		send_shutdown();
+		LEAVE();
+		return;
+	}
+	else {
+		unsigned int cfg_handle;
+		unsigned int find_handle;
+		char *timeout_str;
+		unsigned int shutdown_timeout = DEFAULT_SHUTDOWN_TIMEOUT;
+
+		/*
+		 * Look for a shutdown timeout in objdb
+		 */
+		api->object_find_create(OBJECT_PARENT_HANDLE, "cfg", strlen("cfg"), &find_handle);
+		api->object_find_next(find_handle, &cfg_handle);
+		api->object_find_destroy(find_handle);
+
+		if (cfg_handle) {
+			if ( !api->object_key_get(cfg_handle,
+						  "shutdown_timeout",
+						  strlen("shutdown_timeout"),
+						  (void *)&timeout_str,
+						  NULL)) {
+				shutdown_timeout = atoi(timeout_str);
+			}
+		}
+
+		/*
+		 * Start the timer. If we don't get a full set of replies before this goes
+		 * off we'll cancel the shutdown
+		 */
+		api->timer_add_duration((unsigned long long)shutdown_timeout*1000000000, NULL,
+					shutdown_timer_fn, &shutdown_timer);
+
+		/*
+		 * Tell the users we would like to shut down
+		 */
+		send_test_shutdown(NULL, SA_AIS_OK);
+	}
+
+	/*
+	 * We don't sent a reply to the caller here.
+	 * We send it when we know if we can shut down or not
+	 */
+
+	LEAVE();
+}
+
+static void message_handler_req_lib_cfg_replytoshutdown (
+	void *conn,
+	void *msg)
+{
+	struct cfg_info *ci = (struct cfg_info *)api->ipc_private_data_get (conn);
+	struct req_lib_cfg_replytoshutdown *req_lib_cfg_replytoshutdown = (struct req_lib_cfg_replytoshutdown *)msg;
+
+	ENTER();
+	if (!shutdown_con) {
+		LEAVE();
+		return;
+	}
+
+	if (req_lib_cfg_replytoshutdown->response) {
+		shutdown_yes++;
+		ci->shutdown_reply = SHUTDOWN_REPLY_YES;
+	}
+	else {
+		shutdown_no++;
+		ci->shutdown_reply = SHUTDOWN_REPLY_NO;
+	}
+	check_shutdown_status();
+	LEAVE();
+}

+ 90 - 4
tools/corosync-cfgtool.c

@@ -38,6 +38,7 @@
 #include <signal.h>
 #include <unistd.h>
 #include <string.h>
+#include <pthread.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/select.h>
@@ -134,9 +135,83 @@ void service_unload_do (char *service, unsigned int version)
 	corosync_cfg_finalize (handle);
 }
 
+void shutdown_callback (corosync_cfg_handle_t cfg_handle, CorosyncCfgShutdownFlagsT flags)
+{
+	printf("shutdown callback called, flags = %d\n",flags);
+
+	corosync_cfg_replyto_shutdown (cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_YES);
+}
+
+void *shutdown_dispatch_thread(void *arg)
+{
+	int res = SA_AIS_OK;
+	corosync_cfg_handle_t *handle = arg;
+
+	while (res == SA_AIS_OK) {
+		res = corosync_cfg_dispatch(*handle, SA_DISPATCH_ALL);
+		if (res != SA_AIS_OK)
+			printf ("Could not dispatch cfg messages: %d\n", res);
+	}
+	return NULL;
+}
+
+void shutdown_do()
+{
+	SaAisErrorT result;
+	corosync_cfg_handle_t handle;
+	CorosyncCfgCallbacksT callbacks;
+	CorosyncCfgStateNotificationT notificationBuffer;
+	pthread_t dispatch_thread;
+
+	printf ("Shutting down corosync\n");
+	callbacks.corosyncCfgShutdownCallback = shutdown_callback;
+
+	result = corosync_cfg_initialize (&handle, &callbacks);
+	if (result != SA_AIS_OK) {
+		printf ("Could not initialize corosync configuration API error %d\n", result);
+		exit (1);
+	}
+
+	pthread_create(&dispatch_thread, NULL, shutdown_dispatch_thread, &handle);
+
+	result = corosync_cfg_state_track (handle,
+					   0,
+					   &notificationBuffer);
+	if (result != SA_AIS_OK) {
+		printf ("Could not start corosync cfg tracking error %d\n", result);
+		exit (1);
+	}
+
+	result = corosync_cfg_try_shutdown (handle, COROSYNC_CFG_SHUTDOWN_FLAG_REQUEST);
+	if (result != SA_AIS_OK) {
+		printf ("Could not shutdown (error = %d)\n", result);
+	}
+
+	corosync_cfg_finalize (handle);
+}
+
+void killnode_do(unsigned int nodeid)
+{
+	SaAisErrorT result;
+	corosync_cfg_handle_t handle;
+
+	printf ("Killing node %d\n", nodeid);
+	result = corosync_cfg_initialize (&handle, NULL);
+	if (result != SA_AIS_OK) {
+		printf ("Could not initialize corosync configuration API error %d\n", result);
+		exit (1);
+	}
+	result = corosync_cfg_kill_node (handle, nodeid, "Killed by corosync-cfgtool");
+	if (result != SA_AIS_OK) {
+		printf ("Could not kill node (error = %d)\n", result);
+	}
+	corosync_cfg_finalize (handle);
+}
+
+
 void usage_do (void)
 {
-	printf ("corosync-cfgtool [-s] [-r] [-l] [-u] [service_name] [-v] [version]\n\n");
+	printf ("corosync-cfgtool [-s] [-r] [-l] [-u] [service_name] [-v] [version] [-k] [nodeid]\n\n");
 	printf ("A tool for displaying and configuring active parameters within corosync.\n");
 	printf ("options:\n");
 	printf ("\t-s\tDisplays the status of the current rings on this node.\n");
@@ -144,12 +219,15 @@ void usage_do (void)
 	printf ("\t\tre-enable redundant ring operation.\n");
 	printf ("\t-l\tLoad a service identified by name.\n");
 	printf ("\t-u\tUnload a service identified by name.\n");
+	printf ("\t-k\tKill a node identified by node id.\n");
+	printf ("\t-h\tShutdown corosync cleanly on this node.\n");
 }
 
 int main (int argc, char *argv[]) {
-	const char *options = "srl:u:v:";
+	const char *options = "srl:u:v:k:h";
 	int opt;
 	int service_load = 0;
+	unsigned int nodeid;
 	int service_unload = 0;
 	char *service = NULL;
 	unsigned int version = 0;
@@ -173,17 +251,25 @@ int main (int argc, char *argv[]) {
 			service_unload = 1;
 			service = strdup (optarg);
 			break;
+		case 'k':
+			nodeid = atoi (optarg);
+			killnode_do(nodeid);
+			break;
+		case 'h':
+			shutdown_do();
+			break;
 		case 'v':
 			version = atoi (optarg);
+			break;
 		}
 	}
 
 	if (service_load) {
 		service_load_do (service, version);
-	} else 
+	} else
 	if (service_unload) {
 		service_unload_do (service, version);
 	}
-		
+
 	return (0);
 }