corosync-qdevice.8 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. .\"/*
  2. .\" * Copyright (C) 2016-2019 Red Hat, Inc.
  3. .\" *
  4. .\" * All rights reserved.
  5. .\" *
  6. .\" * Author: Jan Friesse <jfriesse@redhat.com>
  7. .\" *
  8. .\" * This software licensed under BSD license, the text of which follows:
  9. .\" *
  10. .\" * Redistribution and use in source and binary forms, with or without
  11. .\" * modification, are permitted provided that the following conditions are met:
  12. .\" *
  13. .\" * - Redistributions of source code must retain the above copyright notice,
  14. .\" * this list of conditions and the following disclaimer.
  15. .\" * - Redistributions in binary form must reproduce the above copyright notice,
  16. .\" * this list of conditions and the following disclaimer in the documentation
  17. .\" * and/or other materials provided with the distribution.
  18. .\" * - Neither the name of Red Hat, Inc. nor the names of its
  19. .\" * contributors may be used to endorse or promote products derived from this
  20. .\" * software without specific prior written permission.
  21. .\" *
  22. .\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. .\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. .\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. .\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. .\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. .\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. .\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. .\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. .\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. .\" * THE POSSIBILITY OF SUCH DAMAGE.
  33. .\" */
  34. .TH COROSYNC-QDEVICE 8 2019-08-12
  35. .SH NAME
  36. corosync-qdevice \- QDevice daemon
  37. .SH SYNOPSIS
  38. .B "corosync-qdevice [-dfh] [-S option=value[,option2=value2,...]]"
  39. .SH DESCRIPTION
  40. .B corosync-qdevice
  41. is a daemon running on each node of a cluster. It provides a configured
  42. number of votes to the
  43. quorum subsystem based on a third-party arbitrator's decision. Its primary use
  44. is to allow a cluster to sustain more node failures than standard quorum rules allow.
  45. It is recommended for clusters with an even number of nodes and highly recommended
  46. for 2 node clusters.
  47. .SH OPTIONS
  48. .TP
  49. .B -d
  50. Forcefully turn on debug information without the need to change corosync.conf.
  51. For bumping syslog messages priority to info, use this parameter twice.
  52. .TP
  53. .B -f
  54. Do not daemonize, run in the foreground.
  55. .TP
  56. .B -h
  57. Show short help text
  58. .TP
  59. .B -S
  60. Set advanced settings described in its own section below. This option
  61. shouldn't be generally used because most of the options are
  62. not safe to change.
  63. .SH CONFIGURATION
  64. .B corosync-qdevice
  65. reads its configuration from corosync.conf file.
  66. The main configuration is within
  67. .B quorum.device
  68. sub-key. Each model also has its own configuration within a
  69. similarly named sub-key.
  70. .TP
  71. .B model
  72. Specifies the model to be used. This parameter is required.
  73. .B corosync-qdevice
  74. is modular and is able to support multiple different models. The model basically
  75. defines what type of arbitrator is used. Currently only
  76. .I net
  77. is supported.
  78. .TP
  79. .B timeout
  80. Specifies how often
  81. .B corosync-qdevice
  82. should call the votequorum_qdevice_poll function. It is also used by the
  83. .I net
  84. model to adjust
  85. its hearbeat timeout. It is recommended that you don't change this value.
  86. Default is
  87. .IR 10000 .
  88. .TP
  89. .B sync_timeout
  90. Specifies how often
  91. .B corosync-qdevice
  92. should call the votequorum_qdevice_poll function during a sync phase. It is recommended that you don't change this value.
  93. Default is
  94. .IR 30000 .
  95. .TP
  96. .B votes
  97. The number of votes provided to the cluster by qdevice. Default is (number_of_nodes - 1) or generally
  98. sum(votes_per_node) - 1.
  99. .PP
  100. .B quorum.device.heuristics
  101. subkey holds the configuration of the heuristics. Heuristics are set of commands executed locally on
  102. startup, cluster membership change, successful connect to
  103. .B corosync-qnetd
  104. and optionally also at regular times. Commands are executed in parallel.
  105. When all commands finish successfully
  106. (their return error code is zero) on time,
  107. heuristics have passed, otherwise they have failed. The heuristics result is sent to
  108. .B corosync-qnetd
  109. and there it's used in calculations to determine which partition should be quorate.
  110. .TP
  111. .B timeout
  112. Specifies maximum time in milliseconds how long
  113. .B corosync-qdevice
  114. waits till the heuristics commands finish. If some command doesn't finish before the timeout, it's
  115. killed and heuristics fail. This timeout is used for heuristics executed at regular times.
  116. Default value is half of the
  117. .BR quorum.device.timeout ", so"
  118. .IR 5000 .
  119. .TP
  120. .B sync_timeout
  121. Similar to quorum.device.heuristics.timeout but used during membership changes. Default
  122. value is half of the
  123. .BR quorum.device.sync_timeout ", so"
  124. .IR 15000 .
  125. .TP
  126. .B interval
  127. Specifies interval between two regular heuristics execution. Default value is
  128. 3 *
  129. .BR quorum.device.timeout ", so"
  130. .IR 30000 .
  131. .TP
  132. .B mode
  133. Can be one of
  134. .IR on ", " sync " or " off
  135. and specifies mode of operation of heuristics. Default is
  136. .IR off ,
  137. which means heuristics are disabled. When
  138. .I sync
  139. is set, heuristics are executed only during startup, membership change and when connection
  140. to
  141. .B corosync-qnetd
  142. is established. When heuristics should be running also on regular basis, this option
  143. should be set to
  144. .I on
  145. value.
  146. .TP
  147. .B exec_NAME
  148. defines executables.
  149. .I NAME
  150. can be arbitrary valid cmap key name string and it has no special meaning.
  151. The value of this variable must contain a command to execute. The value is parsed (split)
  152. into arguments similarly as Bourne shell would do. Quoting is possible by
  153. using backslash and double quotes.
  154. .PP
  155. .B quorum.device.net
  156. subkey holds the configuration for
  157. .B model
  158. .IR net .
  159. .TP
  160. .B tls
  161. Can be one of
  162. .IR on ", " off " or " required
  163. and specifies if tls should be used.
  164. .I on
  165. means a connection with TLS is attempted first, but if the server doesn't advertise TLS support
  166. then non-TLS will be used.
  167. .I off
  168. is used then TLS is not required and it's then not even tried. This mode is the
  169. only one which doesn't need a properly initialized NSS database.
  170. .I required
  171. means TLS is required and if the server doesn't support TLS, qdevice will
  172. exit with error message. Default is
  173. .IR on .
  174. .TP
  175. .B host
  176. Specifies the IP address or host name of the qnetd server to be used. This parameter
  177. is required.
  178. .TP
  179. .B port
  180. Specifies TCP port of qnetd server. Default is
  181. .IR 5403 .
  182. .TP
  183. .B algorithm
  184. Decision algorithm. Can be one of the
  185. .I ffsplit
  186. or
  187. .IR lms .
  188. (actually there are also
  189. .I test
  190. and
  191. .IR 2nodelms ,
  192. both of which are mainly for developers and shouldn't be used for production clusters).
  193. For a description of what each algorithm means and how the algorithms differ see their
  194. individual sections.
  195. Default value is
  196. .IR ffsplit .
  197. .TP
  198. .B tie_breaker
  199. can be one of
  200. .IR lowest ", " highest
  201. or valid_node_id (number) values. It's used as a fallback if qdevice has to decide between two or more
  202. equal partitions.
  203. .I lowest
  204. means the partition with the lowest node id is chosen.
  205. .I highest
  206. means the partition with highest node id is chosen. And valid_node_id means that the partition
  207. containing the node with the given node id is chosen.
  208. Default is
  209. .IR lowest .
  210. .TP
  211. .B connect_timeout
  212. Timeout when
  213. .B corosync-qdevice
  214. is trying to connect to
  215. .B corosync-qnetd
  216. host. Default is 0.8 *
  217. .BR quorum.device.timeout .
  218. .TP
  219. .B force_ip_version
  220. can be one of
  221. .I 0|4|6
  222. and forces the software to use the given IP version.
  223. .I 0
  224. (default value) means IPv6 is preferred and IPv4 should be used as a fallback.
  225. .PP
  226. Logging configuration is within the
  227. .B logging
  228. directive.
  229. .B corosync-qdevice
  230. parses and supports only
  231. .BR debug
  232. option. The
  233. .B logger_subsys
  234. sub-directive can be also used if
  235. .B subsys
  236. is set to
  237. .IR QDEVICE .
  238. .PP
  239. For
  240. .B corosync-qdevice
  241. to work correctly, the
  242. .B nodelist
  243. directive has to be used and properly configured. Also the
  244. .I net
  245. model requires that
  246. .B totem.cluster_name
  247. option is set.
  248. .SH MODEL NET TLS CONFIGURATION
  249. For
  250. .B model
  251. .I net
  252. to work using TLS, it's necessary to create the NSS database, import Qnetd
  253. CA certificate, and get/distribute a valid client certificate.
  254. If pcs is used (recommended) the following steps are not needed because pcs does them automatically.
  255. .B corosync-qdevice-net-certutil
  256. is the tool to perform required actions semi-automatically. Please consult the help output of
  257. it and its man page. For a first time configuration it may make sense to start with the
  258. .B -Q
  259. option.
  260. If TLS is not required just edit corosync.conf file and set
  261. .B quorum.device.net.tls
  262. to
  263. .IR off .
  264. Depending on configuration of NSS (stored in nss.config file usually in
  265. /etc/crypto-policies/back-ends/ directory) disabled ciphers or too short keys
  266. may be rejected. Proper solution is to regenerate NSS databases for both
  267. .B corosync-qnetd
  268. and
  269. .B corosync-qdevice
  270. daemons. As a quick workaround it's also possible to set environment variable
  271. .I NSS_IGNORE_SYSTEM_POLICY=1
  272. before running
  273. .B corosync-qdevice
  274. daemon.
  275. When NSS is updated it may also be needed to upgrade database into new format. There is no
  276. consensus on recommended way, but following command seems to work just fine (if qdevice
  277. sysconfdir is set to /etc)
  278. .nf
  279. # certutil -N -d /etc/corosync/qdevice/net/nssdb -f /etc/corosync/qdevice/net/nssdb/pwdfile.txt
  280. .fi
  281. .SH MODEL NET ALGORITHMS
  282. Algorithms are used to change behavior of how
  283. .B corosync-qnetd
  284. provides votes to a given node/partition. Currently there are two algorithms supported.
  285. .TP
  286. .B ffsplit
  287. This one makes sense only for clusters with an even number of nodes. It provides exactly one
  288. vote to the partition with the highest number of active nodes. If there are two exactly
  289. similar partitions,
  290. it provides its vote to the partition with higher score. The score is computed
  291. as (number_of_connected_nodes +
  292. number_of_connected_nodes_with_passed_heuristics - number_of_connected_nodes_with_failed_heuristics)
  293. If the scores are equal, the vote is provided to partition with the most clients connected to the qnetd
  294. server. If this number is also equal, then the tie_breaker is used. It is able to transition
  295. its vote if the currently active partition becomes partitioned and a non-active partition
  296. still has at least 50% of the active nodes. Because of this, a vote is not provided
  297. if the qnetd connection is not active.
  298. To use this algorithm it's required to set the number of votes per node to 1 (default)
  299. and the qdevice number of votes has to be also 1. This is achieved by setting
  300. .B quorum.device.votes
  301. key in corosync.conf file to 1.
  302. .TP
  303. .B lms
  304. Last-man-standing. If the node is the only one left in the cluster that can see the
  305. qnetd server then we return a vote.
  306. If more than one node can see the qnetd server but some nodes can't
  307. see each other then the cluster is divided up into 'partitions' based on
  308. their ring_id and this algorithm returns a vote to the partition with highest
  309. heuristics score (computed the same way as for the
  310. .B ffsplit
  311. algorithm), or if there is more than 1 partition with equal scores,
  312. the largest active partition or,
  313. if there is more than 1 equal partition, the partition that contains the tie_breaker
  314. node (lowest, highest, etc). For LMS to work, the number
  315. of qdevice votes has to be set to default (so just delete
  316. .B quorum.device.votes
  317. key from corosync.conf).
  318. .SH ADVANCED SETTINGS
  319. Set by using
  320. .B -S
  321. option. The default value is shown in parentheses) Options
  322. beginning with
  323. .B net_
  324. prefix are specific to
  325. .B model
  326. .IR net .
  327. .TP
  328. .B lock_file
  329. Lock file location. (/var/run/corosync-qdevice/corosync-qdevice.pid)
  330. .TP
  331. .B local_socket_file
  332. Internal IPC socket file location. (/var/run/corosync-qdevice/corosync-qdevice.sock)
  333. .TP
  334. .B local_socket_backlog
  335. Parameter passed to listen syscall. (10)
  336. .TP
  337. .B max_cs_try_again
  338. How many times to retry the call to a corosync function which has returned CS_ERR_TRY_AGAIN. (10)
  339. .TP
  340. .B votequorum_device_name
  341. Name used for qdevice registration. (Qdevice)
  342. .TP
  343. .B ipc_max_clients
  344. Maximum allowed simultaneous IPC clients. (10)
  345. .TP
  346. .B ipc_max_receive_size
  347. Maximum size of a message received by IPC client. (4096)
  348. .TP
  349. .B ipc_max_send_size
  350. Maximum size of a message allowed to be sent to an IPC client. (65536)
  351. .TP
  352. .B master_wins
  353. Force enable/disable master wins. (default is model)
  354. .TP
  355. .B heuristics_ipc_max_send_buffers
  356. Maximum number of heuristics worker send buffers. (128)
  357. .TP
  358. .B heuristics_ipc_max_send_receive_size
  359. Maximum size of a message allowed to be send to, or received from heuristics worker. (4096)
  360. .TP
  361. .B heuristics_min_timeout
  362. Minimum heuristics timeout accepted by client in ms. (1000)
  363. .TP
  364. .B heuristics_max_timeout
  365. Maximum heuristics timeout accepted by client in ms. (120000)
  366. .TP
  367. .B heuristics_min_interval
  368. Minimum heuristics interval accepted by client in ms. (1000)
  369. .TP
  370. .B heuristics_max_interval
  371. Maximum heuristics interval accepted by client in ms. (3600000)
  372. .TP
  373. .B heuristics_max_execs
  374. Maximum number of exec_ commands. (32)
  375. .TP
  376. .B heuristics_use_execvp
  377. Use execvp instead of execv for executing commands. (off)
  378. .TP
  379. .B heuristics_max_processes
  380. Maximum number of processes running at one time. (160)
  381. .TP
  382. .B heuristics_kill_list_interval
  383. Interval between status is gathered and eventually signal is sent
  384. to processes which didn't finished on time in ms. (5000)
  385. .TP
  386. .B net_nss_db_dir
  387. NSS database directory. (/etc/corosync/qdevice/net/nssdb)
  388. .TP
  389. .B net_initial_msg_receive_size
  390. Initial (used during connection parameters negotiation)
  391. maximum size of the receive buffer for message (maximum
  392. allowed message size received from qnetd). (32768)
  393. .TP
  394. .B net_initial_msg_send_size
  395. Initial (used during connection parameter negotiation)
  396. maximum size of one send buffer (message) to be sent to server. (32768)
  397. .TP
  398. .B net_min_msg_send_size
  399. Minimum required size of one send buffer (message) to be sent to server. (32768)
  400. .TP
  401. .B net_max_msg_receive_size
  402. Maximum allowed size of receive buffer for a message sent by server. (16777216)
  403. .TP
  404. .B net_max_send_buffers
  405. Maximum number of send buffers. (10)
  406. .TP
  407. .B net_nss_qnetd_cn
  408. Canonical name of qnetd server certificate. (Qnetd Server)
  409. .TP
  410. .B net_nss_client_cert_nickname
  411. NSS nickname of qdevice client certificate. (Cluster Cert)
  412. .TP
  413. .B net_heartbeat_interval_min
  414. Minimum heartbeat timeout accepted by client in ms. (1000)
  415. .TP
  416. .B net_heartbeat_interval_max
  417. Maximum heartbeat timeout accepted by client in ms. (120000)
  418. .TP
  419. .B net_min_connect_timeout
  420. Minimum connection timeout accepted by client in ms. (1000)
  421. .TP
  422. .B net_max_connect_timeout
  423. Maximum connection timeout accepted by client in ms. (120000)
  424. .TP
  425. .B net_test_algorithm_enabled
  426. Enable test algorithm. (if built with --enable-debug on, otherwise off)
  427. .SH EXAMPLE
  428. Define qdevice with
  429. .I net
  430. model connecting to qnetd running on qnetd.example.org host, using
  431. .I ffsplit
  432. algorithm.
  433. Heuristics is set to
  434. .I sync
  435. mode and executes two commands.
  436. .nf
  437. quorum {
  438. provider: corosync_votequorum
  439. device {
  440. votes: 1
  441. model: net
  442. net {
  443. tls: on
  444. host: qnetd.example.org
  445. algorithm: ffsplit
  446. }
  447. heuristics {
  448. mode: sync
  449. exec_ping: /bin/ping -q -c 1 "www.example.org"
  450. exec_test_txt_exists: /usr/bin/test -f /tmp/test.txt
  451. }
  452. }
  453. .fi
  454. .SH SEE ALSO
  455. .BR corosync-qdevice-tool (8)
  456. .BR corosync-qdevice-net-certutil (8)
  457. .BR corosync-qnetd (8)
  458. .BR corosync.conf (5)
  459. .BR votequorum_qdevice_poll (3)
  460. .SH AUTHOR
  461. Jan Friesse
  462. .PP