Просмотр исходного кода

cfg: New API to get extended node/link infomation

Current we horribly over-use totempg_ifaces_get() to
retrieve information about knet interfaces. This is an attempt to
improve on that.

All transports are supported (so not only Knet but also UDP(U)).

This patch builds best against the "onwire-upgrade" branch of knet
as that's what sparked my interest in getting more information out.

Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
Christine Caulfield 5 лет назад
Родитель
Сommit
9e7f62d27d

+ 2 - 0
configure.ac

@@ -194,6 +194,8 @@ AC_CHECK_LIB([knet],[knet_handle_enable_access_lists],
 	     [AC_DEFINE_UNQUOTED([HAVE_KNET_ACCESS_LIST], 1, [have knet access list])])
 AC_CHECK_LIB([knet],[knet_handle_crypto_set_config],
 	     [AC_DEFINE_UNQUOTED([HAVE_KNET_CRYPTO_RECONF], 1, [have knet crypto reconfig support])])
+AC_CHECK_LIB([knet],[knet_handle_get_onwire_ver],
+	     [AC_DEFINE_UNQUOTED([HAVE_KNET_ONWIRE_VER], 1, [have knet onwire versioning])])
 LIBS="$OLDLIBS"
 
 # Checks for library functions.

+ 66 - 0
exec/cfg.c

@@ -65,6 +65,7 @@
 #include <corosync/corodefs.h>
 
 #include "totemconfig.h"
+#include "totemknet.h"
 #include "service.h"
 #include "main.h"
 
@@ -141,6 +142,10 @@ static void message_handler_req_lib_cfg_ringstatusget (
 	void *conn,
 	const void *msg);
 
+static void message_handler_req_lib_cfg_nodestatusget (
+	void *conn,
+	const void *msg);
+
 static void message_handler_req_lib_cfg_ringreenable (
 	void *conn,
 	const void *msg);
@@ -213,6 +218,10 @@ static struct corosync_lib_handler cfg_lib_engine[] =
 	{ /* 8 */
 		.lib_handler_fn		= message_handler_req_lib_cfg_reopen_log_files,
 		.flow_control		= CS_LIB_FLOW_CONTROL_NOT_REQUIRED
+	},
+	{ /* 9 */
+		.lib_handler_fn		= message_handler_req_lib_cfg_nodestatusget,
+		.flow_control		= CS_LIB_FLOW_CONTROL_NOT_REQUIRED
 	}
 };
 
@@ -957,6 +966,63 @@ send_response:
 	LEAVE();
 }
 
+
+static void message_handler_req_lib_cfg_nodestatusget (
+	void *conn,
+	const void *msg)
+{
+	struct res_lib_cfg_nodestatusget res_lib_cfg_nodestatusget;
+	struct req_lib_cfg_nodestatusget *req_lib_cfg_nodestatusget = (struct req_lib_cfg_nodestatusget *)msg;
+	struct totem_node_status node_status;
+	cs_error_t res = CS_OK;
+	int i;
+
+	ENTER();
+
+	/* Currently only one structure version supported */
+	if (req_lib_cfg_nodestatusget->version == TOTEM_NODE_STATUS_STRUCTURE_VERSION)
+	{
+		res_lib_cfg_nodestatusget.header.id = MESSAGE_RES_CFG_NODESTATUSGET;
+		res_lib_cfg_nodestatusget.header.size = sizeof (struct res_lib_cfg_nodestatusget);
+
+		memset(&node_status, 0, sizeof(node_status));
+		res = totempg_nodestatus_get(req_lib_cfg_nodestatusget->nodeid,
+				       &node_status);
+		if (res == 0) {
+			res_lib_cfg_nodestatusget.node_status.nodeid = req_lib_cfg_nodestatusget->nodeid;
+			res_lib_cfg_nodestatusget.node_status.version = node_status.version;
+			res_lib_cfg_nodestatusget.node_status.reachable = node_status.reachable;
+			res_lib_cfg_nodestatusget.node_status.remote = node_status.remote;
+			res_lib_cfg_nodestatusget.node_status.external = node_status.external;
+			res_lib_cfg_nodestatusget.node_status.onwire_min = node_status.onwire_min;
+			res_lib_cfg_nodestatusget.node_status.onwire_max = node_status.onwire_max;
+			res_lib_cfg_nodestatusget.node_status.onwire_ver= node_status.onwire_ver;
+
+			for (i=0; i < KNET_MAX_LINK; i++) {
+				res_lib_cfg_nodestatusget.node_status.link_status[i].enabled = node_status.link_status[i].enabled;
+				res_lib_cfg_nodestatusget.node_status.link_status[i].connected = node_status.link_status[i].connected;
+				res_lib_cfg_nodestatusget.node_status.link_status[i].dynconnected = node_status.link_status[i].dynconnected;
+				res_lib_cfg_nodestatusget.node_status.link_status[i].mtu = node_status.link_status[i].mtu;
+				memcpy(res_lib_cfg_nodestatusget.node_status.link_status[i].src_ipaddr,
+				       node_status.link_status[i].src_ipaddr, CFG_MAX_HOST_LEN);
+				memcpy(res_lib_cfg_nodestatusget.node_status.link_status[i].dst_ipaddr,
+				       node_status.link_status[i].dst_ipaddr, CFG_MAX_HOST_LEN);
+			}
+		}
+	} else {
+		res = CS_ERR_NOT_SUPPORTED;
+	}
+
+	res_lib_cfg_nodestatusget.header.error = res;
+	api->ipc_response_send (
+		conn,
+		&res_lib_cfg_nodestatusget,
+		sizeof (struct res_lib_cfg_nodestatusget));
+
+	LEAVE();
+}
+
+
 static void message_handler_req_lib_cfg_ringreenable (
 	void *conn,
 	const void *msg)

+ 77 - 0
exec/totemknet.c

@@ -488,6 +488,83 @@ static int node_compare(const void *aptr, const void *bptr)
 #define OWN_INDEX_NONE -1
 #endif
 
+int totemknet_nodestatus_get (
+	void *knet_context,
+	unsigned int nodeid,
+	struct totem_node_status *node_status)
+{
+	int i;
+	int res = 0;
+	struct knet_link_status link_status;
+	struct totemknet_instance *instance = (struct totemknet_instance *)knet_context;
+	struct knet_host_status knet_host_status;
+	uint8_t link_list[KNET_MAX_LINK];
+	size_t num_links;
+
+	if (!instance->knet_handle) {
+		return CS_ERR_NOT_EXIST; /* Not using knet */
+	}
+
+	if (!node_status) {
+		return CS_ERR_INVALID_PARAM;
+	}
+
+	res = knet_host_get_status(instance->knet_handle,
+				   nodeid,
+				   &knet_host_status);
+	if (res) {
+		knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_host_status(%d) failed: %d", nodeid, res);
+		return (-1);
+	}
+	node_status->nodeid = nodeid;
+	node_status->reachable = knet_host_status.reachable;
+	node_status->remote = knet_host_status.remote;
+	node_status->external = knet_host_status.external;
+
+#ifdef HAVE_KNET_ONWIRE_VER
+	res = knet_handle_get_onwire_ver(instance->knet_handle,
+					 nodeid,
+					 &node_status->onwire_min,
+					 &node_status->onwire_max,
+					 &node_status->onwire_ver);
+	if (res) {
+		knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_handle_get_onwire_ver(%d) failed: %d", nodeid, res);
+		return (-1);
+	}
+#endif
+	/* Get link info */
+	res = knet_link_get_link_list(instance->knet_handle,
+				      nodeid, link_list, &num_links);
+	if (res) {
+		knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_list(%d) failed: %d", nodeid, res);
+		return (-1);
+	}
+
+	for (i=0; i < num_links; i++) {
+		if (!instance->totem_config->interfaces[link_list[i]].configured) {
+			continue;
+		}
+		res = knet_link_get_status(instance->knet_handle,
+					   nodeid,
+					   link_list[i],
+					   &link_status,
+					   sizeof(link_status));
+		if (res == 0) {
+			node_status->link_status[i].enabled = link_status.enabled;
+			node_status->link_status[i].connected = link_status.connected;
+			node_status->link_status[i].dynconnected = link_status.dynconnected;
+			node_status->link_status[i].mtu = link_status.mtu;
+			memcpy(node_status->link_status[i].src_ipaddr, link_status.src_ipaddr, KNET_MAX_HOST_LEN);
+			memcpy(node_status->link_status[i].dst_ipaddr, link_status.dst_ipaddr, KNET_MAX_HOST_LEN);
+		} else {
+			knet_log_printf (LOGSYS_LEVEL_WARNING, "knet_link_get_link_status(%d, %d) failed: %d", nodeid, link_list[i], res);
+		}
+	}
+	return res;
+}
+
+
+
 int totemknet_ifaces_get (void *knet_context,
 	char ***status,
 	unsigned int *iface_count)

+ 3 - 0
exec/totemknet.h

@@ -102,6 +102,9 @@ extern int totemknet_finalize (void *knet_context);
 
 extern void totemknet_net_mtu_adjust (void *knet_context, struct totem_config *totem_config);
 
+extern int totemknet_nodestatus_get (void *knet_context, unsigned int nodeid,
+				     struct totem_node_status *node_status);
+
 extern int totemknet_ifaces_get (void *net_context,
 	char ***status,
 	unsigned int *iface_count);

+ 21 - 0
exec/totemnet.c

@@ -115,6 +115,11 @@ struct transport {
 		char ***status,
 		unsigned int *iface_count);
 
+	int (*nodestatus_get) (
+		void *transport_context,
+		unsigned int nodeid,
+		struct totem_node_status *node_status);
+
 	int (*token_target_set) (
 		void *transport_context,
 		unsigned int nodeid);
@@ -179,6 +184,7 @@ struct transport transport_entries[] = {
 		.finalize = totemudp_finalize,
 		.net_mtu_adjust = totemudp_net_mtu_adjust,
 		.ifaces_get = totemudp_ifaces_get,
+		.nodestatus_get = totemudp_nodestatus_get,
 		.token_target_set = totemudp_token_target_set,
 		.crypto_set = totemudp_crypto_set,
 		.recv_mcast_empty = totemudp_recv_mcast_empty,
@@ -203,6 +209,7 @@ struct transport transport_entries[] = {
 		.finalize = totemudpu_finalize,
 		.net_mtu_adjust = totemudpu_net_mtu_adjust,
 		.ifaces_get = totemudpu_ifaces_get,
+		.nodestatus_get = totemudpu_nodestatus_get,
 		.token_target_set = totemudpu_token_target_set,
 		.crypto_set = totemudpu_crypto_set,
 		.recv_mcast_empty = totemudpu_recv_mcast_empty,
@@ -227,6 +234,7 @@ struct transport transport_entries[] = {
 		.finalize = totemknet_finalize,
 		.net_mtu_adjust = totemknet_net_mtu_adjust,
 		.ifaces_get = totemknet_ifaces_get,
+		.nodestatus_get = totemknet_nodestatus_get,
 		.token_target_set = totemknet_token_target_set,
 		.crypto_set = totemknet_crypto_set,
 		.recv_mcast_empty = totemknet_recv_mcast_empty,
@@ -473,6 +481,19 @@ int totemnet_iface_set (void *net_context,
 	return (res);
 }
 
+extern int totemnet_nodestatus_get (
+	void *net_context,
+	unsigned int nodeid,
+	struct totem_node_status *node_status)
+{
+	struct totemnet_instance *instance = (struct totemnet_instance *)net_context;
+	unsigned int res;
+
+	res = instance->transport->nodestatus_get (instance->transport_context, nodeid, node_status);
+
+	return (res);
+}
+
 int totemnet_ifaces_get (
 	void *net_context,
 	char ***status,

+ 5 - 0
exec/totemnet.h

@@ -125,6 +125,11 @@ extern void totemnet_stats_clear (void *net_context);
 
 extern const char *totemnet_iface_print (void *net_context);
 
+extern int totemnet_nodestatus_get (
+	void *net_context,
+	unsigned int nodeid,
+	struct totem_node_status *node_status);
+
 extern int totemnet_ifaces_get (
 	void *net_context,
 	char ***status,

+ 7 - 0
exec/totempg.c

@@ -1447,6 +1447,13 @@ int totempg_iface_set (
 	return (res);
 }
 
+int totempg_nodestatus_get (unsigned int nodeid,
+			    struct totem_node_status *node_status)
+{
+	memset(node_status, 0, sizeof(struct totem_node_status));
+	return totemsrp_nodestatus_get (totemsrp_context, nodeid, node_status);
+}
+
 int totempg_ifaces_get (
 	unsigned int nodeid,
 	unsigned int *interface_id,

+ 21 - 0
exec/totemsrp.c

@@ -1039,6 +1039,27 @@ void totemsrp_finalize (
 	free (instance);
 }
 
+int totemsrp_nodestatus_get (
+	void *srp_context,
+	unsigned int nodeid,
+	struct totem_node_status *node_status)
+{
+	struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
+	int i;
+
+	node_status->version = TOTEM_NODE_STATUS_STRUCTURE_VERSION;
+
+	/* Fill in 'reachable' here as the lower level UDP[u] layers don't know */
+	for (i = 0; i < instance->my_proc_list_entries; i++) {
+		if (instance->my_proc_list[i].nodeid == nodeid) {
+			node_status->reachable = 1;
+		}
+	}
+
+	return totemnet_nodestatus_get(instance->totemnet_context, nodeid, node_status);
+}
+
+
 /*
  * Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller,
  * with interaces_size number of items. iface_count is final number of interfaces filled by this

+ 3 - 0
exec/totemsrp.h

@@ -101,6 +101,9 @@ void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int v
 
 extern void totemsrp_net_mtu_adjust (struct totem_config *totem_config);
 
+extern int totemsrp_nodestatus_get (void *srp_context, unsigned int nodeid,
+				    struct totem_node_status *node_status);
+
 extern int totemsrp_ifaces_get (
 	void *srp_context,
 	unsigned int nodeid,

+ 29 - 0
exec/totemudp.c

@@ -1334,6 +1334,35 @@ extern int totemudp_iface_check (void *udp_context)
 	return (res);
 }
 
+int totemudp_nodestatus_get (void *udp_context, unsigned int nodeid,
+			     struct totem_node_status *node_status)
+{
+	struct totemudp_instance *instance = (struct totemudp_instance *)udp_context;
+	struct qb_list_head *list;
+	struct totemudp_member *member;
+
+	qb_list_for_each(list, &(instance->member_list)) {
+		member = qb_list_entry (list,
+			struct totemudp_member,
+			list);
+
+		if (member->member.nodeid == nodeid) {
+			node_status->nodeid = nodeid;
+			/* reachable is filled in by totemsrp */
+			node_status->link_status[0].enabled = 1;
+			if (instance->netif_bind_state == BIND_STATE_REGULAR) {
+				node_status->link_status[0].enabled = 1;
+			} else {
+				node_status->link_status[0].enabled = 0;
+			}
+			node_status->link_status[0].connected = node_status->reachable;
+			node_status->link_status[0].mtu = instance->totem_config->net_mtu;
+			strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1);
+		}
+	}
+	return (0);
+}
+
 int totemudp_ifaces_get (
 	void *net_context,
 	char ***status,

+ 3 - 0
exec/totemudp.h

@@ -92,6 +92,9 @@ extern int totemudp_mcast_noflush_send (
 	const void *msg,
 	unsigned int msg_len);
 
+extern int totemudp_nodestatus_get (void *net_context, unsigned int nodeid,
+				    struct totem_node_status *node_status);
+
 extern int totemudp_ifaces_get (void *net_context,
 	char ***status,
 	unsigned int *iface_count);

+ 28 - 0
exec/totemudpu.c

@@ -793,6 +793,34 @@ static int totemudpu_build_sockets_ip (
 	return 0;
 }
 
+int totemudpu_nodestatus_get (void *udpu_context, unsigned int nodeid,
+			      struct totem_node_status *node_status)
+{
+	struct totemudpu_instance *instance = (struct totemudpu_instance *)udpu_context;
+	struct qb_list_head *list;
+	struct totemudpu_member *member;
+
+	qb_list_for_each(list, &(instance->member_list)) {
+		member = qb_list_entry (list,
+			struct totemudpu_member,
+			list);
+
+		if (member->member.nodeid == nodeid) {
+			node_status->nodeid = nodeid;
+			/* reachable is filled in by totemsrp */
+			if (instance->netif_bind_state == BIND_STATE_REGULAR) {
+				node_status->link_status[0].enabled = 1;
+			} else {
+				node_status->link_status[0].enabled = 0;
+			}
+			node_status->link_status[0].connected = node_status->reachable;
+			node_status->link_status[0].mtu = instance->totem_config->net_mtu;
+			strncpy(node_status->link_status[0].src_ipaddr, totemip_print(&member->member), KNET_MAX_HOST_LEN-1);
+		}
+	}
+	return (0);
+}
+
 int totemudpu_ifaces_get (
 	void *net_context,
 	char ***status,

+ 3 - 0
exec/totemudpu.h

@@ -92,6 +92,9 @@ extern int totemudpu_mcast_noflush_send (
 	const void *msg,
 	unsigned int msg_len);
 
+extern int totemudpu_nodestatus_get (void *net_context, unsigned int nodeid,
+				    struct totem_node_status *node_status);
+
 extern int totemudpu_ifaces_get (void *net_context,
 	char ***status,
 	unsigned int *iface_count);

+ 37 - 0
include/corosync/cfg.h

@@ -162,6 +162,43 @@ corosync_cfg_ring_status_get (
 	char ***status,
 	unsigned int *interface_count);
 
+#define CFG_NODE_STATUS_STRUCT_VERSION 1
+#define CFG_MAX_HOST_LEN 256
+#define CFG_MAX_LINKS 8
+struct corosync_knet_link_status {
+	uint8_t enabled;	        /* link is configured and admin enabled for traffic */
+	uint8_t connected;              /* link is connected for data (local view) */
+	uint8_t dynconnected;	        /* link has been activated by remote dynip */
+	unsigned int mtu;		/* current detected MTU on this link */
+	char src_ipaddr[CFG_MAX_HOST_LEN];
+	char dst_ipaddr[CFG_MAX_HOST_LEN];
+};
+
+struct corosync_knet_node_status {
+        uint32_t version;
+	unsigned int nodeid;
+	uint8_t reachable;
+	uint8_t remote;
+	uint8_t external;
+	uint8_t onwire_min;
+	uint8_t onwire_max;
+	uint8_t onwire_ver;
+	struct corosync_knet_link_status link_status[CFG_MAX_LINKS];
+};
+
+/**
+ * @brief corosync_cfg_node_status_get
+ * @param cfg_handle
+ * @param nodeid
+ * @param node_status
+ * @return
+ */
+cs_error_t
+corosync_cfg_node_status_get (
+	corosync_cfg_handle_t cfg_handle,
+	unsigned int nodeid,
+	struct corosync_knet_node_status *node_status);
+
 /**
  * @brief corosync_cfg_kill_node
  * @param cfg_handle

+ 21 - 2
include/corosync/ipc_cfg.h

@@ -59,7 +59,8 @@ enum req_lib_cfg_types {
 	MESSAGE_REQ_CFG_GET_NODE_ADDRS = 5,
 	MESSAGE_REQ_CFG_LOCAL_GET = 6,
 	MESSAGE_REQ_CFG_RELOAD_CONFIG = 7,
-	MESSAGE_REQ_CFG_REOPEN_LOG_FILES = 8
+	MESSAGE_REQ_CFG_REOPEN_LOG_FILES = 8,
+	MESSAGE_REQ_CFG_NODESTATUSGET = 9
 };
 
 /**
@@ -81,7 +82,8 @@ enum res_lib_cfg_types {
 	MESSAGE_RES_CFG_LOCAL_GET = 12,
 	MESSAGE_RES_CFG_REPLYTOSHUTDOWN = 13,
 	MESSAGE_RES_CFG_RELOAD_CONFIG = 14,
-	MESSAGE_RES_CFG_REOPEN_LOG_FILES = 15
+	MESSAGE_RES_CFG_REOPEN_LOG_FILES = 15,
+	MESSAGE_RES_CFG_NODESTATUSGET = 16
 };
 
 /**
@@ -101,6 +103,23 @@ struct res_lib_cfg_ringstatusget {
 	char interface_status[CFG_MAX_INTERFACES][CFG_INTERFACE_STATUS_MAX_LEN] __attribute__((aligned(8)));
 };
 
+/**
+ * @brief The req_lib_cfg_nodestatusget struct
+ */
+struct req_lib_cfg_nodestatusget {
+	struct qb_ipc_request_header header __attribute__((aligned(8)));
+	unsigned int nodeid __attribute__((aligned(8)));
+	mar_uint32_t version __attribute__((aligned(8)));
+};
+
+/**
+ * @brief The res_lib_cfg_nodestatusget struct
+ */
+struct res_lib_cfg_nodestatusget {
+	struct qb_ipc_response_header header __attribute__((aligned(8)));
+	struct corosync_knet_node_status node_status __attribute__((aligned(8)));
+};
+
 /**
  * @brief The req_lib_cfg_ringreenable struct
  */

+ 19 - 0
include/corosync/totem/totem.h

@@ -253,6 +253,25 @@ struct totem_config {
 	    unsigned int nodeid);
 };
 
+/*
+ * Node status returned from the API
+ * Usually the same as the cfg version (except for
+ * link_status)
+ */
+#define TOTEM_NODE_STATUS_STRUCTURE_VERSION 1
+struct totem_node_status {
+	uint32_t version; /* Structure version */
+	unsigned int nodeid;
+	uint8_t reachable;
+	uint8_t remote;
+	uint8_t external;
+	uint8_t onwire_min;
+	uint8_t onwire_max;
+	uint8_t onwire_ver;
+	struct knet_link_status link_status[KNET_MAX_LINK];
+};
+
+
 #define TOTEM_CONFIGURATION_TYPE
 enum totem_configuration_type {
 	TOTEM_CONFIGURATION_REGULAR,

+ 3 - 0
include/corosync/totem/totempg.h

@@ -146,6 +146,9 @@ extern int totempg_ifaces_get (
 	char ***status,
         unsigned int *iface_count);
 
+extern int totempg_nodestatus_get (unsigned int nodeid,
+				   struct totem_node_status *node_status);
+
 extern void* totempg_get_stats (void);
 
 void totempg_event_signal (enum totem_event_type type, int value);

+ 51 - 1
lib/cfg.c

@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002-2005 MontaVista Software, Inc.
- * Copyright (c) 2006-2018 Red Hat, Inc.
+ * Copyright (c) 2006-2020 Red Hat, Inc.
  *
  * All rights reserved.
  *
@@ -367,6 +367,56 @@ exit_handle_put:
 	return (error);
 }
 
+cs_error_t
+corosync_cfg_node_status_get (
+	corosync_cfg_handle_t cfg_handle,
+	unsigned int nodeid,
+	struct corosync_knet_node_status *node_status)
+{
+	struct cfg_inst *cfg_inst;
+	struct req_lib_cfg_nodestatusget req_lib_cfg_nodestatusget;
+	struct res_lib_cfg_nodestatusget res_lib_cfg_nodestatusget;
+	cs_error_t error;
+	struct iovec iov;
+
+	if (!node_status) {
+		return (CS_ERR_INVALID_PARAM);
+	}
+
+	error = hdb_error_to_cs(hdb_handle_get (&cfg_hdb, cfg_handle, (void *)&cfg_inst));
+	if (error != CS_OK) {
+		return (error);
+	}
+
+	req_lib_cfg_nodestatusget.header.size = sizeof (struct req_lib_cfg_nodestatusget);
+	req_lib_cfg_nodestatusget.header.id = MESSAGE_REQ_CFG_NODESTATUSGET;
+	req_lib_cfg_nodestatusget.nodeid = nodeid;
+	req_lib_cfg_nodestatusget.version = CFG_NODE_STATUS_STRUCT_VERSION;
+
+	iov.iov_base = (void *)&req_lib_cfg_nodestatusget,
+	iov.iov_len = sizeof (struct req_lib_cfg_nodestatusget),
+
+	error = qb_to_cs_error (qb_ipcc_sendv_recv(cfg_inst->c,
+		&iov,
+		1,
+		&res_lib_cfg_nodestatusget,
+		sizeof (struct res_lib_cfg_nodestatusget), CS_IPC_TIMEOUT_MS));
+
+	if (error == CS_OK) {
+		memcpy(node_status, &res_lib_cfg_nodestatusget.node_status, sizeof(struct corosync_knet_node_status));
+	}
+
+	/* corosync sent us something we don't really understand.
+	   - we might need to revisit this in the case of future structure versions */
+	if (res_lib_cfg_nodestatusget.node_status.version != CFG_NODE_STATUS_STRUCT_VERSION) {
+		error = CS_ERR_NOT_SUPPORTED;
+	}
+
+	(void)hdb_handle_put (&cfg_hdb, cfg_handle);
+
+	return (error);
+}
+
 cs_error_t
 corosync_cfg_kill_node (
 	corosync_cfg_handle_t cfg_handle,

+ 1 - 0
lib/libcfg.versions

@@ -11,6 +11,7 @@ COROSYNC_CFG_0.82 {
 		corosync_cfg_track;
 		corosync_cfg_track_stop;
 		corosync_cfg_ring_status_get;
+		corosync_cfg_node_status_get;
 		corosync_cfg_ring_reenable;
 		corosync_cfg_service_load;
 		corosync_cfg_service_unload;

+ 1 - 1
lib/libcfg.verso

@@ -1 +1 @@
-7.1.0
+7.2.0

+ 30 - 4
man/corosync-cfgtool.8

@@ -35,7 +35,7 @@
 .SH "NAME"
 corosync-cfgtool \- An administrative tool for corosync.
 .SH "SYNOPSIS"
-.B corosync\-cfgtool [[\-i IP_address] [\-b] \-s] [\-R] [\-L] [\-k nodeid] [\-a nodeid] [\-h] [\-H]
+.B corosync\-cfgtool [[\-i IP_address] [\-b] [\-s] [\-n] [\-R] [\-L] [\-k nodeid] [\-a nodeid] [\-h] [\-H]
 .SH "DESCRIPTION"
 .B corosync\-cfgtool
 A tool for displaying and configuring active parameters within corosync.
@@ -48,7 +48,7 @@ Finds only information about the specified interface IP address or link id with
 Displays the status of the current links on this node for UDP/UDPU, with extended status
 for KNET.
 After each link, the nodes on that link are displayed in order with their status,
-for example there are 3 nodes with KNET transportation:
+for example there are 3 nodes with KNET transport:
 
 LINK ID 0
         addr    = 192.168.100.80
@@ -58,14 +58,14 @@ LINK ID 0
                 nodeid  3:      connected
 .TP
 .B -b
-Displays the brief status of the current links on this node (KNET only) when used
+Displays the brief status of the current links on this node when used
 with "-s". If any interfaces are faulty, 1 is returned by the binary. If all interfaces
 are active 0 is returned to the shell.
 After each link, the nodes on that link are displayed in order with their status
 encoded into a single digit, or characters 'n', 'd' and '?' with special meaning.
 1=link enabled, 2=link connected, So a 3 in a node position indicates that the
 link is both enabled and connected. Status represented by character 'n' is used for
-localhost link. Character '?' means that Crosync was unable to get status of link from knet (log
+localhost link. Character '?' means that Corosync was unable to get status of link from knet (log
 should contain more information). Character 'd' shouldn't appear and it means that Corosync
 was unable to configure a link and it is result of some error which should have been logged.
 
@@ -75,6 +75,32 @@ LINK ID 0
         addr    = 192.168.100.80
         status  = n33
 .TP
+.B -n
+Displays the status of the current nodes in the system with their link status(es).
+.P
+.nf
+Local node ID 1, transport knet
+nodeid: 2 reachable   onwire (min/max/cur): 0, 1, 1
+   LINK: 0 (192.168.1.101->192.168.1.102)  enabled connected mtu: 1397
+   LINK: 1 (192.168.4.1->192.168.4.2)  enabled mtu: 469
+   LINK: 2 (192.168.9.1->192.168.9.2)  enabled mtu: 469
+.fi
+.P
+Only reachable nodes are displayed so "reachable" should always be there.
+.br
+'onwire' versions are the knet on-wire versions that are supported/in use (where appropriate).
+.br
+IP addresses are the local and remote IP addresses (for UDP[U] only the local IP address is shown)
+.br
+enabled - means the link has been brought up
+.br
+connected - means that the link is connected to the remote node
+.br
+dynconnected - is not currently implemented
+.br
+mtu - shows the size of data packets. Should be the link packet size less a small amount
+for protocol overheads and encryption
+.TP
 .B -R
 Tell all instances of corosync in this cluster to reload corosync.conf.
 

+ 133 - 130
tools/corosync-cfgtool.c

@@ -71,6 +71,7 @@
 enum user_action {
 	ACTION_NOOP=0,
 	ACTION_LINKSTATUS_GET,
+	ACTION_NODESTATUS_GET,
 	ACTION_RELOAD_CONFIG,
 	ACTION_REOPEN_LOG_FILES,
 	ACTION_SHUTDOW,
@@ -89,35 +90,35 @@ static int node_compare(const void *aptr, const void *bptr)
 }
 
 static int
-linkstatusget_do (char *interface_name, int brief)
+nodestatusget_do (enum user_action action, int brief)
 {
 	cs_error_t result;
 	corosync_cfg_handle_t handle;
 	cmap_handle_t cmap_handle;
-	unsigned int interface_count;
-	char **interface_names;
-	char **interface_status;
-	uint32_t nodeid_list[KNET_MAX_HOST];
 	char iter_key[CMAP_KEYNAME_MAXLEN];
-	unsigned int i;
 	cmap_iter_handle_t iter;
+	unsigned int local_nodeid;
+	unsigned int local_nodeid_index=0;
+	unsigned int other_nodeid_index=0;
 	unsigned int nodeid;
 	int nodeid_match_guard;
 	cmap_value_types_t type;
 	size_t value_len;
-	int rc = EXIT_SUCCESS;
-	int len, s = 0, t;
-	char stat_ch;
 	char *str;
-	totem_transport_t transport_number = TOTEM_TRANSPORT_KNET;
-	int no_match = 1;
+	char *transport_str = NULL;
+	uint32_t nodeid_list[KNET_MAX_HOST];
+	int s = 0;
+	int rc = EXIT_SUCCESS;
+	int transport_number = TOTEM_TRANSPORT_KNET;
+	int i,j;
+	struct corosync_knet_node_status node_status;
 
-	printf ("Printing link status.\n");
 	result = corosync_cfg_initialize (&handle, NULL);
 	if (result != CS_OK) {
 		fprintf (stderr, "Could not initialize corosync configuration API error %d\n", result);
 		exit (EXIT_FAILURE);
 	}
+
 	result = cmap_initialize (&cmap_handle);
 	if (result != CS_OK) {
 		fprintf (stderr, "Could not initialize corosync cmap API error %d\n", result);
@@ -132,7 +133,19 @@ linkstatusget_do (char *interface_name, int brief)
 		if (strcmp (str, "udp") == 0) {
 			transport_number = TOTEM_TRANSPORT_UDP;
 		}
-		free(str);
+		transport_str = str;
+	}
+	if (!transport_str) {
+		transport_str = strdup("knet"); /* It's the default */
+	}
+
+	result = corosync_cfg_local_get(handle, &local_nodeid);
+	if (result != CS_OK) {
+		fprintf (stderr, "Could not get the local node id, the error is: %d\n", result);
+		free(transport_str);
+		cmap_finalize(cmap_handle);
+		corosync_cfg_finalize(handle);
+		return EXIT_FAILURE;
 	}
 
 	/* Get a list of nodes. We do it this way rather than using votequorum as cfgtool
@@ -141,6 +154,9 @@ linkstatusget_do (char *interface_name, int brief)
 	result = cmap_iter_init(cmap_handle, "nodelist.node.", &iter);
 	if (result != CS_OK) {
 		fprintf (stderr, "Could not get nodelist from cmap. error %d\n", result);
+		free(transport_str);
+		cmap_finalize(cmap_handle);
+		corosync_cfg_finalize(handle);
 		exit (EXIT_FAILURE);
 	}
 
@@ -154,140 +170,120 @@ linkstatusget_do (char *interface_name, int brief)
 			continue;
 		}
 		if (cmap_get_uint32(cmap_handle, iter_key, &nodeid) == CS_OK) {
+			if (nodeid == local_nodeid) {
+				local_nodeid_index = s;
+			} else {
+				/* Bit of an odd one this. but local node only uses one link (of course, to itself)
+				   so if we want to know which links are active across the cluster we need to look
+				   at another node (any other) node's link list */
+				other_nodeid_index = s;
+			}
 			nodeid_list[s++] = nodeid;
 		}
 	}
-
-	/* totemknet returns nodes in nodeid order - even though it doesn't tell us
-	   what the nodeid is. So sort our node list and we can then look up
-	   knet node pos to get an actual nodeid.
-	   Yep, I really should have totally rewritten the cfg interface for this.
-	*/
+	/* It's nice to have these in nodeid order */
 	qsort(nodeid_list, s, sizeof(uint32_t), node_compare);
 
-	result = corosync_cfg_local_get(handle, &nodeid);
-	if (result != CS_OK) {
-		fprintf (stderr, "Could not get the local node id, the error is: %d\n", result);
+	cmap_finalize(cmap_handle);
+
+	printf ("Local node ID " CS_PRI_NODE_ID ", transport %s\n", local_nodeid, transport_str);
+
+        /* If node status requested then do print node-based info */
+	if (action == ACTION_NODESTATUS_GET) {
+		for (i=0; i<s; i++) {
+			result = corosync_cfg_node_status_get(handle, nodeid_list[i], &node_status);
+			if (result == CS_OK) {
+				/* Only display node info if it is reachable (and not us) */
+				if (node_status.reachable && node_status.nodeid != local_nodeid) {
+					printf("nodeid: %d", node_status.nodeid);
+					printf(" reachable");
+					if (node_status.remote) {
+						printf(" remote");
+					}
+					if (node_status.external) {
+						printf(" external");
+					}
+#ifdef HAVE_KNET_ONWIRE_VER
+					if (transport_number == TOTEM_TRANSPORT_KNET) {
+						printf("   onwire (min/max/cur): %d, %d, %d",
+						       node_status.onwire_min,
+						       node_status.onwire_max,
+						       node_status.onwire_ver);
+					}
+#endif
+					printf("\n");
+					for (j=0; j<CFG_MAX_LINKS; j++) {
+						if (node_status.link_status[j].enabled) {
+							printf("   LINK: %d", j);
+							printf(" (%s%s%s)",
+							       node_status.link_status[j].src_ipaddr,
+							       transport_number==TOTEM_TRANSPORT_KNET?"->":"",
+							       node_status.link_status[j].dst_ipaddr);
+							if (node_status.link_status[j].enabled) {
+								printf(" enabled");
+							}
+							if (node_status.link_status[j].connected) {
+								printf(" connected");
+							}
+							if (node_status.link_status[j].dynconnected) {
+								printf(" dynconnected");
+							}
+							printf(" mtu: %d\n", node_status.link_status[j].mtu);
+						}
+					}
+					printf("\n");
+				}
+			}
+		}
 	}
+	/* Print in link order */
 	else {
-		printf ("Local node ID " CS_PRI_NODE_ID "\n", nodeid);
-	}
+		struct corosync_knet_node_status node_info[s];
+		memset(node_info, 0, sizeof(node_info));
 
-	result = corosync_cfg_ring_status_get (handle,
-				&interface_names,
-				&interface_status,
-				&interface_count);
-	if (result != CS_OK) {
-		fprintf (stderr, "Could not get the link status, the error is: %d\n", result);
-	} else {
-		for (i = 0; i < interface_count; i++) {
-			char *cur_iface_name_space = strchr(interface_names[i], ' ');
-			int show_current_iface;
-
-			s = 0;
-			/*
-			 * Interface_name is "<linkid> <IP address>"
-			 * separate them out
-			 */
-			if (!cur_iface_name_space) {
-				continue;
-			}
-			*cur_iface_name_space = '\0';
-
-			show_current_iface = 1;
-			if (interface_name != NULL && interface_name[0] != '\0' &&
-			    strcmp(interface_name, interface_names[i]) != 0 &&
-			    strcmp(interface_name, cur_iface_name_space + 1) != 0) {
-				show_current_iface = 0;
+		for (i=0; i<s; i++) {
+			result = corosync_cfg_node_status_get(handle, nodeid_list[i], &node_info[i]);
+			if (result != CS_OK) {
+				fprintf (stderr, "Could not get the node status for nodeid %d, the error is: %d\n", nodeid_list[i], result);
 			}
+		}
 
-			if (show_current_iface) {
-				no_match = 0;
-				printf ("LINK ID %s\n", interface_names[i]);
-				printf ("\taddr\t= %s\n", cur_iface_name_space + 1);
-				/*
-				 * UDP(U) interface_status is always OK and doesn't contain
-				 * detailed information (only knet does).
-				 */
-				if ((!brief) && (transport_number == TOTEM_TRANSPORT_KNET)) {
-					len = strlen(interface_status[i]);
-					printf ("\tstatus:\n");
-					while (s < len) {
-						nodeid = nodeid_list[s];
-						printf("\t\tnodeid %2d:\t", nodeid);
-						stat_ch = interface_status[i][s];
-
-						/* Set return code to 1 if status is not localhost or connected. */
-						if (rc == EXIT_SUCCESS) {
-							if ((stat_ch != 'n') && (stat_ch != '3')) {
-								rc = EXIT_FAILURE;
-							}
+		for (i=0; i<CFG_MAX_LINKS; i++) {
+			if (node_info[other_nodeid_index].link_status[i].enabled) {
+				printf("LINK ID %d\n", i);
+				printf("\taddr\t= %s\n", node_info[other_nodeid_index].link_status[i].src_ipaddr);
+				if (brief) {
+					printf("\tstatus\t= ");
+					for (j=0; j<s; j++) {
+						char status = (node_info[j].link_status[i].enabled |
+							       (node_info[j].link_status[i].connected << 1)) + '0';
+						if (status == '0') {
+							status = 'n';
 						}
-
-						if (stat_ch >= '0' && stat_ch <= '9') {
-							t = stat_ch - '0';
-
-							/*
-							 * bit 0 - enabled
-							 * bit 1 - connected
-							 * bit 2 - dynconnected
-							 */
-							if (t & 0x2) {
+						printf("%c", status);
+					}
+					printf("\n");
+				} else {
+					printf("\tstatus:\n");
+					for (j=0; j<s; j++) {
+						printf("\t\tnodeid: %3d:\t", node_info[j].nodeid);
+						if (j == local_nodeid_index) {
+							printf("localhost");
+						} else {
+							if (node_info[j].link_status[i].connected) {
 								printf("connected");
 							} else {
 								printf("disconnected");
 							}
-
-							if (!(t & 0x1)) {
-								printf(" (not enabled)");
-							}
-							printf("\n");
-						} else if (stat_ch == 'n') {
-							printf("localhost\n");
-						} else if (stat_ch == '?') {
-							printf("knet error\n");
-						} else if (stat_ch == 'd') {
-							printf("config error\n");
-						} else {
-							printf("can't decode status character '%c'\n", stat_ch);
-						}
-						s++;
-					}
-				} else {
-					printf ("\tstatus\t= %s\n", interface_status[i]);
-
-					/* Set return code to 1 if status is not localhost or connected. */
-					if ((rc == EXIT_SUCCESS) && (transport_number == TOTEM_TRANSPORT_KNET)) {
-						len = strlen(interface_status[i]);
-						while (s < len) {
-							stat_ch = interface_status[i][s];
-							if ((stat_ch != 'n') && (stat_ch != '3')) {
-								rc = EXIT_FAILURE;
-								break;
-							}
-							s++;
 						}
+						printf("\n");
 					}
 				}
 			}
 		}
-
-		/* No match for value of -i option */
-		if (no_match) {
-			rc = EXIT_FAILURE;
-			fprintf(stderr, "Can't match any IP address or link id\n");
-		}
-
-		for (i = 0; i < interface_count; i++) {
-			free(interface_status[i]);
-			free(interface_names[i]);
-		}
-		free(interface_status);
-		free(interface_names);
 	}
-
-	(void)cmap_finalize (cmap_handle);
-	(void)corosync_cfg_finalize (handle);
+	free(transport_str);
+	corosync_cfg_finalize(handle);
 	return rc;
 }
 
@@ -445,8 +441,9 @@ static void usage_do (void)
 	printf ("A tool for displaying and configuring active parameters within corosync.\n");
 	printf ("options:\n");
 	printf ("\t-i\tFinds only information about the specified interface IP address or link id when used with -s..\n");
-	printf ("\t-s\tDisplays the status of the current links on this node(UDP/UDPU), with extended status for KNET.\n");
-	printf ("\t-b\tDisplays the brief status of the current links on this node when used with -s.(KNET only)\n");
+	printf ("\t-s\tDisplays the status of the current links on this node.\n");
+	printf ("\t-n\tDisplays the status of the connected nodes and their links.\n");
+	printf ("\t-b\tDisplays the brief status of the current links on this node when used with -s.\n");
 	printf ("\t-R\tTell all instances of corosync in this cluster to reload corosync.conf.\n");
 	printf ("\t-L\tTell corosync to reopen all logging files.\n");
 	printf ("\t-k\tKill a node identified by node id.\n");
@@ -456,7 +453,7 @@ static void usage_do (void)
 }
 
 int main (int argc, char *argv[]) {
-	const char *options = "i:sbrRLk:a:hH";
+	const char *options = "i:snbrRLk:a:hH";
 	int opt;
 	unsigned int nodeid = 0;
 	char interface_name[128] = "";
@@ -474,6 +471,9 @@ int main (int argc, char *argv[]) {
 		case 's':
 			action = ACTION_LINKSTATUS_GET;
 			break;
+		case 'n':
+			action = ACTION_NODESTATUS_GET;
+			break;
 		case 'b':
 			brief = 1;
 			break;
@@ -512,7 +512,10 @@ int main (int argc, char *argv[]) {
 	}
 	switch(action) {
 	case ACTION_LINKSTATUS_GET:
-		rc = linkstatusget_do(interface_name, brief);
+		rc = nodestatusget_do(action, brief);
+		break;
+	case ACTION_NODESTATUS_GET:
+		rc = nodestatusget_do(action, brief);
 		break;
 	case ACTION_RELOAD_CONFIG:
 		rc = reload_config_do();