Răsfoiți Sursa

main: Add support for libcgroup

When corosync is started in environment where it ends in cgroup without
properly set rt_runtime_us it's impossible to get RT priority.

Already implemented workaround is to use higher non-RT priority.

This patch implements another solution. It moves corosync into root cpu
cgroup. Root cpu cgroup hopefully has enough RT budget.

Another solution was mentioned on ML
https://lists.freedesktop.org/archives/systemd-devel/2017-July/039353.html
but this means to generate some "random" values.

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
Jan Friesse 8 ani în urmă
părinte
comite
c56086c701
5 a modificat fișierele cu 131 adăugiri și 3 ștergeri
  1. 11 0
      configure.ac
  2. 7 0
      corosync.spec.in
  3. 5 0
      exec/Makefile.am
  4. 103 2
      exec/main.c
  5. 5 1
      man/corosync.8

+ 11 - 0
configure.ac

@@ -416,6 +416,10 @@ AC_ARG_ENABLE([qnetd],
 	[  --enable-qnetd                  : Quorum Net Daemon support ],,
 	[  --enable-qnetd                  : Quorum Net Daemon support ],,
 	[ enable_qnetd="no" ])
 	[ enable_qnetd="no" ])
 AM_CONDITIONAL(BUILD_QNETD, test x$enable_qnetd = xyes)
 AM_CONDITIONAL(BUILD_QNETD, test x$enable_qnetd = xyes)
+AC_ARG_ENABLE([libcgroup],
+	[  --enable-libcgroup                  : Enable libcgroup support ],,
+	[ enable_libcgroup="no" ])
+AM_CONDITIONAL(ENABLE_LIBCGROUP, test x$enable_libcgroup = xyes)
 
 
 # *FLAGS handling goes here
 # *FLAGS handling goes here
 
 
@@ -548,6 +552,13 @@ if test "x${enable_snmp}" = xyes; then
 fi
 fi
 AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
 AM_CONDITIONAL(BUILD_SNMP, test "${do_snmp}" = "1")
 
 
+if test "x${enable_libcgroup}" = xyes; then
+    PKG_CHECK_MODULES([libcgroup], [libcgroup])
+    AC_DEFINE_UNQUOTED([HAVE_LIBCGROUP], 1, [have libcgroup])
+    PACKAGE_FEATURES="$PACKAGE_FEATURES libcgroup"
+    WITH_LIST="$WITH_LIST --with libcgroup"
+fi
+
 # extra warnings
 # extra warnings
 EXTRA_WARNINGS=""
 EXTRA_WARNINGS=""
 
 

+ 7 - 0
corosync.spec.in

@@ -17,6 +17,7 @@
 %bcond_with runautogen
 %bcond_with runautogen
 %bcond_with qdevices
 %bcond_with qdevices
 %bcond_with qnetd
 %bcond_with qnetd
+%bcond_with libcgroup
 
 
 %global gitver %{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}
 %global gitver %{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}
 %global gittarver %{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
 %global gittarver %{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
@@ -74,6 +75,9 @@ Requires: nss-tools
 %if %{with qnetd}
 %if %{with qnetd}
 BuildRequires: sed
 BuildRequires: sed
 %endif
 %endif
+%if %{with libcgroup}
+BuildRequires: libcgroup-devel
+%endif
 
 
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
 
@@ -124,6 +128,9 @@ export rdmacm_LIBS=-lrdmacm \
 %endif
 %endif
 %if %{with qnetd}
 %if %{with qnetd}
 	--enable-qnetd \
 	--enable-qnetd \
+%endif
+%if %{with libcgroup}
+	--enable-libcgroup \
 %endif
 %endif
 	--with-initddir=%{_initrddir} \
 	--with-initddir=%{_initrddir} \
 	--with-systemddir=%{_unitdir} \
 	--with-systemddir=%{_unitdir} \

+ 5 - 0
exec/Makefile.am

@@ -78,5 +78,10 @@ corosync_LDADD		= libtotem_pg.la ../common_lib/libcorosync_common.la \
 
 
 corosync_DEPENDENCIES	= libtotem_pg.la ../common_lib/libcorosync_common.la
 corosync_DEPENDENCIES	= libtotem_pg.la ../common_lib/libcorosync_common.la
 
 
+if ENABLE_LIBCGROUP
+corosync_CFLAGS		+= $(libcgroup_CFLAGS)
+corosync_LDADD		+= $(libcgroup_LIBS)
+endif
+
 lint:
 lint:
 	-splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c
 	-splint $(LINT_FLAGS) $(CPPFLAGS) $(CFLAGS) *.c

+ 103 - 2
exec/main.c

@@ -110,6 +110,10 @@
 #include <corosync/logsys.h>
 #include <corosync/logsys.h>
 #include <corosync/icmap.h>
 #include <corosync/icmap.h>
 
 
+#ifdef HAVE_LIBCGROUP
+#include <libcgroup.h>
+#endif
+
 #include "quorum.h"
 #include "quorum.h"
 #include "totemsrp.h"
 #include "totemsrp.h"
 #include "logconfig.h"
 #include "logconfig.h"
@@ -1134,12 +1138,95 @@ error_close:
 	return (err);
 	return (err);
 }
 }
 
 
+static int corosync_move_to_root_cgroup(void) {
+	int res = -1;
+#ifdef HAVE_LIBCGROUP
+	int cg_ret;
+	struct cgroup *root_cgroup = NULL;
+	struct cgroup_controller *root_cpu_cgroup_controller = NULL;
+	char *current_cgroup_path = NULL;
+
+	cg_ret = cgroup_init();
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to initialize libcgroup: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	if (strcmp(current_cgroup_path, "/") == 0) {
+		log_printf(LOGSYS_LEVEL_DEBUG, "Corosync is already in root cgroup path");
+
+		res = 0;
+		goto exit_res;
+	}
+
+	root_cgroup = cgroup_new_cgroup("/");
+	if (root_cgroup == NULL) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup");
+
+		goto exit_res;
+	}
+
+	root_cpu_cgroup_controller = cgroup_add_controller(root_cgroup, "cpu");
+	if (root_cpu_cgroup_controller == NULL) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't create root cgroup cpu controller");
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_attach_task(root_cgroup);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't attach task to root cgroup: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	cg_ret = cgroup_get_current_controller_path(getpid(), "cpu", &current_cgroup_path);
+	if (cg_ret) {
+		log_printf(LOGSYS_LEVEL_WARNING, "Unable to get current cpu cgroup path: %s ",
+		    cgroup_strerror(cg_ret));
+
+		goto exit_res;
+	}
+
+	if (strcmp(current_cgroup_path, "/") == 0) {
+		log_printf(LOGSYS_LEVEL_NOTICE, "Corosync sucesfully moved to root cgroup");
+		res = 0;
+	} else {
+		log_printf(LOGSYS_LEVEL_WARNING, "Can't move Corosync to root cgroup");
+	}
+
+exit_res:
+	if (root_cgroup != NULL) {
+		cgroup_free(&root_cgroup);
+	}
+
+	/*
+	 * libcgroup doesn't define something like cgroup_fini so there is no way how to clean
+	 * it's cache. It has to be called when libcgroup authors decide to implement it.
+	 */
+
+#endif
+	 return (res);
+}
+
+
 int main (int argc, char **argv, char **envp)
 int main (int argc, char **argv, char **envp)
 {
 {
 	const char *error_string;
 	const char *error_string;
 	struct totem_config totem_config;
 	struct totem_config totem_config;
 	int res, ch;
 	int res, ch;
-	int background, sched_rr, prio, testonly;
+	int background, sched_rr, prio, testonly, move_to_root_cgroup;
 	struct stat stat_out;
 	struct stat stat_out;
 	enum e_corosync_done flock_err;
 	enum e_corosync_done flock_err;
 	uint64_t totem_config_warnings;
 	uint64_t totem_config_warnings;
@@ -1153,8 +1240,9 @@ int main (int argc, char **argv, char **envp)
 	sched_rr = 1;
 	sched_rr = 1;
 	prio = 0;
 	prio = 0;
 	testonly = 0;
 	testonly = 0;
+	move_to_root_cgroup = 1;
 
 
-	while ((ch = getopt (argc, argv, "fP:prtv")) != EOF) {
+	while ((ch = getopt (argc, argv, "fP:pRrtv")) != EOF) {
 
 
 		switch (ch) {
 		switch (ch) {
 			case 'f':
 			case 'f':
@@ -1179,6 +1267,9 @@ int main (int argc, char **argv, char **envp)
 					prio = tmpli;
 					prio = tmpli;
 				}
 				}
 				break;
 				break;
+			case 'R':
+				move_to_root_cgroup = 0;
+				break;
 			case 'r':
 			case 'r':
 				sched_rr = 1;
 				sched_rr = 1;
 				break;
 				break;
@@ -1198,6 +1289,7 @@ int main (int argc, char **argv, char **envp)
 					"        -f     : Start application in foreground.\n"\
 					"        -f     : Start application in foreground.\n"\
 					"        -p     : Do not set realtime scheduling.\n"\
 					"        -p     : Do not set realtime scheduling.\n"\
 					"        -r     : Set round robin realtime scheduling (default).\n"\
 					"        -r     : Set round robin realtime scheduling (default).\n"\
+					"        -R     : Do not try move corosync to root cpu cgroup (valid when built with libcgroup)\n" \
 					"        -P num : Set priority of process (no effect when -r is used)\n"\
 					"        -P num : Set priority of process (no effect when -r is used)\n"\
 					"        -t     : Test configuration and exit.\n"\
 					"        -t     : Test configuration and exit.\n"\
 					"        -v     : Display version and SVN revision of Corosync and exit.\n");
 					"        -v     : Display version and SVN revision of Corosync and exit.\n");
@@ -1312,6 +1404,15 @@ int main (int argc, char **argv, char **envp)
 		corosync_exit_error (COROSYNC_DONE_EXIT);
 		corosync_exit_error (COROSYNC_DONE_EXIT);
 	}
 	}
 
 
+
+	/*
+	 * Try to move corosync into root cpu cgroup. Failure is not fatal and
+	 * error is deliberately ignored.
+	 */
+	if (move_to_root_cgroup) {
+		(void)corosync_move_to_root_cgroup();
+	}
+
 	/*
 	/*
 	 * Set round robin realtime scheduling with priority 99
 	 * Set round robin realtime scheduling with priority 99
 	 */
 	 */

+ 5 - 1
man/corosync.8

@@ -35,7 +35,7 @@
 .SH NAME
 .SH NAME
 corosync \- The Corosync Cluster Engine.
 corosync \- The Corosync Cluster Engine.
 .SH SYNOPSIS
 .SH SYNOPSIS
-.B "corosync [\-f] [\-P num] [\-p] [\-r] [\-t] [\-v]"
+.B "corosync [\-f] [\-P num] [\-p] [\-r] [-R] [\-t] [\-v]"
 .SH DESCRIPTION
 .SH DESCRIPTION
 .B corosync
 .B corosync
 Corosync provides clustering infrastructure such as membership, messaging and quorum.
 Corosync provides clustering infrastructure such as membership, messaging and quorum.
@@ -62,6 +62,10 @@ meaning maximal / minimal priority (so minimal / maximal nice value).
 Set round robin realtime scheduling with maximal priority (default). When setting
 Set round robin realtime scheduling with maximal priority (default). When setting
 of scheduler fails, fallback to set maximal priority.
 of scheduler fails, fallback to set maximal priority.
 .TP
 .TP
+.B -R
+Do not try to move Corosync to root cpu cgroup. This feature is available only
+for corosync with libcgroup enabled during the build.
+.TP
 .B -t
 .B -t
 Test configuration and then exit.
 Test configuration and then exit.
 .TP
 .TP