corosync-qdevice-net.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241
  1. /*
  2. * Copyright (c) 2015 Red Hat, Inc.
  3. *
  4. * All rights reserved.
  5. *
  6. * Author: Jan Friesse (jfriesse@redhat.com)
  7. *
  8. * This software licensed under BSD license, the text of which follows:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions are met:
  12. *
  13. * - Redistributions of source code must retain the above copyright notice,
  14. * this list of conditions and the following disclaimer.
  15. * - Redistributions in binary form must reproduce the above copyright notice,
  16. * this list of conditions and the following disclaimer in the documentation
  17. * and/or other materials provided with the distribution.
  18. * - Neither the name of the Red Hat, Inc. nor the names of its
  19. * contributors may be used to endorse or promote products derived from this
  20. * software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. * THE POSSIBILITY OF SUCH DAMAGE.
  33. */
  34. #include <config.h>
  35. #include <stdio.h>
  36. #include <nss.h>
  37. #include <secerr.h>
  38. #include <sslerr.h>
  39. #include <pk11func.h>
  40. #include <certt.h>
  41. #include <ssl.h>
  42. #include <prio.h>
  43. #include <prnetdb.h>
  44. #include <prerror.h>
  45. #include <prinit.h>
  46. #include <getopt.h>
  47. #include <err.h>
  48. #include <keyhi.h>
  49. /*
  50. * Needed for creating nspr handle from unix fd
  51. */
  52. #include <private/pprio.h>
  53. #include <cmap.h>
  54. #include <votequorum.h>
  55. #include "qnetd-defines.h"
  56. #include "dynar.h"
  57. #include "nss-sock.h"
  58. #include "tlv.h"
  59. #include "msg.h"
  60. #include "msgio.h"
  61. #include "qnetd-log.h"
  62. #include "timer-list.h"
  63. #define NSS_DB_DIR COROSYSCONFDIR "/qdevice-net/nssdb"
  64. /*
  65. * It's usually not a good idea to change following defines
  66. */
  67. #define QDEVICE_NET_INITIAL_MSG_RECEIVE_SIZE (1 << 15)
  68. #define QDEVICE_NET_INITIAL_MSG_SEND_SIZE (1 << 15)
  69. #define QDEVICE_NET_MIN_MSG_SEND_SIZE QDEVICE_NET_INITIAL_MSG_SEND_SIZE
  70. #define QDEVICE_NET_MAX_MSG_RECEIVE_SIZE (1 << 24)
  71. #define QNETD_NSS_SERVER_CN "Qnetd Server"
  72. #define QDEVICE_NET_NSS_CLIENT_CERT_NICKNAME "Cluster Cert"
  73. #define QDEVICE_NET_VOTEQUORUM_DEVICE_NAME "QdeviceNet"
  74. #define qdevice_net_log qnetd_log
  75. #define qdevice_net_log_nss qnetd_log_nss
  76. #define qdevice_net_log_init qnetd_log_init
  77. #define qdevice_net_log_close qnetd_log_close
  78. #define qdevice_net_log_set_debug qnetd_log_set_debug
  79. #define QDEVICE_NET_LOG_TARGET_STDERR QNETD_LOG_TARGET_STDERR
  80. #define QDEVICE_NET_LOG_TARGET_SYSLOG QNETD_LOG_TARGET_SYSLOG
  81. #define MAX_CS_TRY_AGAIN 10
  82. enum qdevice_net_state {
  83. QDEVICE_NET_STATE_WAITING_PREINIT_REPLY,
  84. QDEVICE_NET_STATE_WAITING_STARTTLS_BEING_SENT,
  85. QDEVICE_NET_STATE_WAITING_INIT_REPLY,
  86. QDEVICE_NET_STATE_WAITING_SET_OPTION_REPLY,
  87. };
  88. struct qdevice_net_instance {
  89. PRFileDesc *socket;
  90. size_t initial_send_size;
  91. size_t initial_receive_size;
  92. size_t max_receive_size;
  93. size_t min_send_size;
  94. struct dynar receive_buffer;
  95. struct dynar send_buffer;
  96. struct dynar echo_request_send_buffer;
  97. int sending_msg;
  98. int skipping_msg;
  99. int sending_echo_request_msg;
  100. size_t msg_already_received_bytes;
  101. size_t msg_already_sent_bytes;
  102. size_t echo_request_msg_already_sent_bytes;
  103. enum qdevice_net_state state;
  104. uint32_t expected_msg_seq_num;
  105. uint32_t echo_request_expected_msg_seq_num;
  106. uint32_t echo_reply_received_msg_seq_num;
  107. enum tlv_tls_supported tls_supported;
  108. int using_tls;
  109. uint32_t node_id;
  110. uint32_t heartbeat_interval; /* Heartbeat interval during normal operation */
  111. uint32_t sync_heartbeat_interval; /* Heartbeat interval during corosync sync */
  112. const char *host_addr;
  113. uint16_t host_port;
  114. const char *cluster_name;
  115. enum tlv_decision_algorithm_type decision_algorithm;
  116. struct timer_list main_timer_list;
  117. struct timer_list_entry *echo_request_timer;
  118. int schedule_disconnect;
  119. cmap_handle_t cmap_handle;
  120. votequorum_handle_t votequorum_handle;
  121. PRFileDesc *votequorum_poll_fd;
  122. };
  123. static votequorum_ring_id_t global_last_received_ring_id;
  124. static void
  125. err_nss(void) {
  126. errx(1, "nss error %d: %s", PR_GetError(), PR_ErrorToString(PR_GetError(), PR_LANGUAGE_I_DEFAULT));
  127. }
  128. static SECStatus
  129. qdevice_net_nss_bad_cert_hook(void *arg, PRFileDesc *fd) {
  130. if (PR_GetError() == SEC_ERROR_EXPIRED_CERTIFICATE ||
  131. PR_GetError() == SEC_ERROR_EXPIRED_ISSUER_CERTIFICATE ||
  132. PR_GetError() == SEC_ERROR_CRL_EXPIRED ||
  133. PR_GetError() == SEC_ERROR_KRL_EXPIRED ||
  134. PR_GetError() == SSL_ERROR_EXPIRED_CERT_ALERT) {
  135. qdevice_net_log(LOG_WARNING, "Server certificate is expired.");
  136. return (SECSuccess);
  137. }
  138. qdevice_net_log_nss(LOG_ERR, "Server certificate verification failure.");
  139. return (SECFailure);
  140. }
  141. static SECStatus
  142. qdevice_net_nss_get_client_auth_data(void *arg, PRFileDesc *sock, struct CERTDistNamesStr *caNames,
  143. struct CERTCertificateStr **pRetCert, struct SECKEYPrivateKeyStr **pRetKey)
  144. {
  145. qdevice_net_log(LOG_DEBUG, "Sending client auth data.");
  146. return (NSS_GetClientAuthData(arg, sock, caNames, pRetCert, pRetKey));
  147. }
  148. static int
  149. qdevice_net_schedule_send(struct qdevice_net_instance *instance)
  150. {
  151. if (instance->sending_msg) {
  152. /*
  153. * Msg is already scheduled for send
  154. */
  155. return (-1);
  156. }
  157. instance->msg_already_sent_bytes = 0;
  158. instance->sending_msg = 1;
  159. return (0);
  160. }
  161. static int
  162. qdevice_net_schedule_echo_request_send(struct qdevice_net_instance *instance)
  163. {
  164. if (instance->sending_echo_request_msg) {
  165. qdevice_net_log(LOG_ERR, "Can't schedule send of echo request msg, because "
  166. "previous message wasn't yet sent. Disconnecting from server.");
  167. return (-1);
  168. }
  169. if (instance->echo_reply_received_msg_seq_num != instance->echo_request_expected_msg_seq_num) {
  170. qdevice_net_log(LOG_ERR, "Server didn't send echo reply message on time. "
  171. "Disconnecting from server.");
  172. return (-1);
  173. }
  174. instance->echo_request_expected_msg_seq_num++;
  175. if (msg_create_echo_request(&instance->echo_request_send_buffer, 1, instance->echo_request_expected_msg_seq_num) == -1) {
  176. qdevice_net_log(LOG_ERR, "Can't allocate send buffer for echo request msg");
  177. return (-1);
  178. }
  179. instance->echo_request_msg_already_sent_bytes = 0;
  180. instance->sending_echo_request_msg = 1;
  181. return (0);
  182. }
  183. static void
  184. qdevice_net_log_msg_decode_error(int ret)
  185. {
  186. switch (ret) {
  187. case -1:
  188. qdevice_net_log(LOG_WARNING, "Received message with option with invalid length");
  189. break;
  190. case -2:
  191. qdevice_net_log(LOG_CRIT, "Can't allocate memory");
  192. break;
  193. case -3:
  194. qdevice_net_log(LOG_WARNING, "Received inconsistent msg (tlv len > msg size)");
  195. break;
  196. case -4:
  197. qdevice_net_log(LOG_ERR, "Received message with option with invalid value");
  198. break;
  199. default:
  200. qdevice_net_log(LOG_ERR, "Unknown error occured when decoding message");
  201. break;
  202. }
  203. }
  204. /*
  205. * -1 - Incompatible tls combination
  206. * 0 - Don't use TLS
  207. * 1 - Use TLS
  208. */
  209. static int
  210. qdevice_net_check_tls_compatibility(enum tlv_tls_supported server_tls, enum tlv_tls_supported client_tls)
  211. {
  212. int res;
  213. res = -1;
  214. switch (server_tls) {
  215. case TLV_TLS_UNSUPPORTED:
  216. switch (client_tls) {
  217. case TLV_TLS_UNSUPPORTED: res = 0; break;
  218. case TLV_TLS_SUPPORTED: res = 0; break;
  219. case TLV_TLS_REQUIRED: res = -1; break;
  220. }
  221. break;
  222. case TLV_TLS_SUPPORTED:
  223. switch (client_tls) {
  224. case TLV_TLS_UNSUPPORTED: res = 0; break;
  225. case TLV_TLS_SUPPORTED: res = 1; break;
  226. case TLV_TLS_REQUIRED: res = 1; break;
  227. }
  228. break;
  229. case TLV_TLS_REQUIRED:
  230. switch (client_tls) {
  231. case TLV_TLS_UNSUPPORTED: res = -1; break;
  232. case TLV_TLS_SUPPORTED: res = 1; break;
  233. case TLV_TLS_REQUIRED: res = 1; break;
  234. }
  235. break;
  236. }
  237. return (res);
  238. }
  239. static int
  240. qdevice_net_msg_received_preinit(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  241. {
  242. qdevice_net_log(LOG_ERR, "Received unexpected preinit message. Disconnecting from server");
  243. return (-1);
  244. }
  245. static int
  246. qdevice_net_msg_check_seq_number(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  247. {
  248. if (!msg->seq_number_set || msg->seq_number != instance->expected_msg_seq_num) {
  249. qdevice_net_log(LOG_ERR, "Received message doesn't contain seq_number or it's not expected one.");
  250. return (-1);
  251. }
  252. return (0);
  253. }
  254. static int
  255. qdevice_net_msg_check_echo_reply_seq_number(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  256. {
  257. if (!msg->seq_number_set) {
  258. qdevice_net_log(LOG_ERR, "Received echo reply message doesn't contain seq_number.");
  259. return (-1);
  260. }
  261. if (msg->seq_number != instance->echo_request_expected_msg_seq_num) {
  262. qdevice_net_log(LOG_ERR, "Server doesn't replied in expected time. Closing connection");
  263. return (-1);
  264. }
  265. return (0);
  266. }
  267. static int
  268. qdevice_net_send_init(struct qdevice_net_instance *instance)
  269. {
  270. enum msg_type *supported_msgs;
  271. size_t no_supported_msgs;
  272. enum tlv_opt_type *supported_opts;
  273. size_t no_supported_opts;
  274. tlv_get_supported_options(&supported_opts, &no_supported_opts);
  275. msg_get_supported_messages(&supported_msgs, &no_supported_msgs);
  276. instance->expected_msg_seq_num++;
  277. if (msg_create_init(&instance->send_buffer, 1, instance->expected_msg_seq_num,
  278. supported_msgs, no_supported_msgs, supported_opts, no_supported_opts,
  279. instance->node_id) == 0) {
  280. qdevice_net_log(LOG_ERR, "Can't allocate send buffer for init msg");
  281. return (-1);
  282. }
  283. if (qdevice_net_schedule_send(instance) != 0) {
  284. qdevice_net_log(LOG_ERR, "Can't schedule send of init msg");
  285. return (-1);
  286. }
  287. instance->state = QDEVICE_NET_STATE_WAITING_INIT_REPLY;
  288. return (0);
  289. }
  290. static int
  291. qdevice_net_msg_received_preinit_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  292. {
  293. int res;
  294. if (instance->state != QDEVICE_NET_STATE_WAITING_PREINIT_REPLY) {
  295. qdevice_net_log(LOG_ERR, "Received unexpected preinit reply message. Disconnecting from server");
  296. return (-1);
  297. }
  298. if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
  299. return (-1);
  300. }
  301. /*
  302. * Check TLS support
  303. */
  304. if (!msg->tls_supported_set || !msg->tls_client_cert_required_set) {
  305. qdevice_net_log(LOG_ERR, "Required tls_supported or tls_client_cert_required option is unset");
  306. return (-1);
  307. }
  308. res = qdevice_net_check_tls_compatibility(msg->tls_supported, instance->tls_supported);
  309. if (res == -1) {
  310. qdevice_net_log(LOG_ERR, "Incompatible tls configuration (server %u client %u)",
  311. msg->tls_supported, instance->tls_supported);
  312. return (-1);
  313. } else if (res == 1) {
  314. /*
  315. * Start TLS
  316. */
  317. instance->expected_msg_seq_num++;
  318. if (msg_create_starttls(&instance->send_buffer, 1, instance->expected_msg_seq_num) == 0) {
  319. qdevice_net_log(LOG_ERR, "Can't allocate send buffer for starttls msg");
  320. return (-1);
  321. }
  322. if (qdevice_net_schedule_send(instance) != 0) {
  323. qdevice_net_log(LOG_ERR, "Can't schedule send of starttls msg");
  324. return (-1);
  325. }
  326. instance->state = QDEVICE_NET_STATE_WAITING_STARTTLS_BEING_SENT;
  327. } else if (res == 0) {
  328. if (qdevice_net_send_init(instance) != 0) {
  329. return (-1);
  330. }
  331. }
  332. return (0);
  333. }
  334. static int
  335. qdevice_net_msg_received_init_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  336. {
  337. size_t zi;
  338. int res;
  339. if (instance->state != QDEVICE_NET_STATE_WAITING_INIT_REPLY) {
  340. qdevice_net_log(LOG_ERR, "Received unexpected init reply message. Disconnecting from server");
  341. return (-1);
  342. }
  343. if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
  344. return (-1);
  345. }
  346. if (!msg->server_maximum_request_size_set || !msg->server_maximum_reply_size_set) {
  347. qdevice_net_log(LOG_ERR, "Required maximum_request_size or maximum_reply_size option is unset");
  348. return (-1);
  349. }
  350. if (msg->supported_messages == NULL || msg->supported_options == NULL) {
  351. qdevice_net_log(LOG_ERR, "Required supported messages or supported options option is unset");
  352. return (-1);
  353. }
  354. if (msg->supported_decision_algorithms == NULL) {
  355. qdevice_net_log(LOG_ERR, "Required supported decision algorithms option is unset");
  356. return (-1);
  357. }
  358. if (msg->server_maximum_request_size < instance->min_send_size) {
  359. qdevice_net_log(LOG_ERR,
  360. "Server accepts maximum %zu bytes message but this client minimum is %zu bytes.",
  361. msg->server_maximum_request_size, instance->min_send_size);
  362. return (-1);
  363. }
  364. if (msg->server_maximum_reply_size > instance->max_receive_size) {
  365. qdevice_net_log(LOG_ERR,
  366. "Server may send message up to %zu bytes message but this client maximum is %zu bytes.",
  367. msg->server_maximum_reply_size, instance->max_receive_size);
  368. return (-1);
  369. }
  370. /*
  371. * Change buffer sizes
  372. */
  373. dynar_set_max_size(&instance->receive_buffer, msg->server_maximum_reply_size);
  374. dynar_set_max_size(&instance->send_buffer, msg->server_maximum_request_size);
  375. dynar_set_max_size(&instance->echo_request_send_buffer, msg->server_maximum_request_size);
  376. /*
  377. * Check if server supports decision algorithm we need
  378. */
  379. res = 0;
  380. for (zi = 0; zi < msg->no_supported_decision_algorithms && !res; zi++) {
  381. if (msg->supported_decision_algorithms[zi] == instance->decision_algorithm) {
  382. res = 1;
  383. }
  384. }
  385. if (!res) {
  386. qdevice_net_log(LOG_ERR, "Server doesn't support required decision algorithm");
  387. return (-1);
  388. }
  389. /*
  390. * Send set options message
  391. */
  392. instance->expected_msg_seq_num++;
  393. if (msg_create_set_option(&instance->send_buffer, 1, instance->expected_msg_seq_num,
  394. 1, instance->decision_algorithm, 1, instance->heartbeat_interval) == 0) {
  395. qdevice_net_log(LOG_ERR, "Can't allocate send buffer for set option msg");
  396. return (-1);
  397. }
  398. if (qdevice_net_schedule_send(instance) != 0) {
  399. qdevice_net_log(LOG_ERR, "Can't schedule send of set option msg");
  400. return (-1);
  401. }
  402. instance->state = QDEVICE_NET_STATE_WAITING_SET_OPTION_REPLY;
  403. return (0);
  404. }
  405. static int
  406. qdevice_net_msg_received_stattls(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  407. {
  408. qdevice_net_log(LOG_ERR, "Received unexpected starttls message. Disconnecting from server");
  409. return (-1);
  410. }
  411. static int
  412. qdevice_net_msg_received_server_error(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  413. {
  414. if (!msg->reply_error_code_set) {
  415. qdevice_net_log(LOG_ERR, "Received server error without error code set. Disconnecting from server");
  416. } else {
  417. qdevice_net_log(LOG_ERR, "Received server error %"PRIu16". Disconnecting from server",
  418. msg->reply_error_code);
  419. }
  420. return (-1);
  421. }
  422. static int
  423. qdevice_net_msg_received_set_option(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  424. {
  425. qdevice_net_log(LOG_ERR, "Received unexpected set option message. Disconnecting from server");
  426. return (-1);
  427. }
  428. static int
  429. qdevice_net_timer_send_heartbeat(void *data1, void *data2)
  430. {
  431. struct qdevice_net_instance *instance;
  432. instance = (struct qdevice_net_instance *)data1;
  433. if (qdevice_net_schedule_echo_request_send(instance) == -1) {
  434. instance->schedule_disconnect = 1;
  435. return (0);
  436. }
  437. /*
  438. * Schedule this function callback again
  439. */
  440. return (-1);
  441. }
  442. static int
  443. qdevice_net_msg_received_set_option_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  444. {
  445. if (qdevice_net_msg_check_seq_number(instance, msg) != 0) {
  446. return (-1);
  447. }
  448. if (!msg->decision_algorithm_set || !msg->heartbeat_interval_set) {
  449. qdevice_net_log(LOG_ERR, "Received set option reply message without required options. "
  450. "Disconnecting from server");
  451. }
  452. if (msg->decision_algorithm != instance->decision_algorithm ||
  453. msg->heartbeat_interval != instance->heartbeat_interval) {
  454. qdevice_net_log(LOG_ERR, "Server doesn't accept sent decision algorithm or heartbeat interval.");
  455. return (-1);
  456. }
  457. /*
  458. * Server accepted heartbeat interval -> schedule regular sending of echo request
  459. */
  460. if (instance->heartbeat_interval > 0) {
  461. instance->echo_request_timer = timer_list_add(&instance->main_timer_list, instance->heartbeat_interval,
  462. qdevice_net_timer_send_heartbeat, (void *)instance, NULL);
  463. if (instance->echo_request_timer == NULL) {
  464. qdevice_net_log(LOG_ERR, "Can't schedule regular sending of heartbeat.");
  465. return (-1);
  466. }
  467. }
  468. return (0);
  469. }
  470. static int
  471. qdevice_net_msg_received_echo_request(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  472. {
  473. qdevice_net_log(LOG_ERR, "Received unexpected echo request message. Disconnecting from server");
  474. return (-1);
  475. }
  476. static int
  477. qdevice_net_msg_received_echo_reply(struct qdevice_net_instance *instance, const struct msg_decoded *msg)
  478. {
  479. if (qdevice_net_msg_check_echo_reply_seq_number(instance, msg) != 0) {
  480. return (-1);
  481. }
  482. instance->echo_reply_received_msg_seq_num = msg->seq_number;
  483. return (0);
  484. }
  485. static int
  486. qdevice_net_msg_received(struct qdevice_net_instance *instance)
  487. {
  488. struct msg_decoded msg;
  489. int res;
  490. int ret_val;
  491. msg_decoded_init(&msg);
  492. res = msg_decode(&instance->receive_buffer, &msg);
  493. if (res != 0) {
  494. /*
  495. * Error occurred. Disconnect.
  496. */
  497. qdevice_net_log_msg_decode_error(res);
  498. qdevice_net_log(LOG_ERR, "Disconnecting from server");
  499. return (-1);
  500. }
  501. ret_val = 0;
  502. switch (msg.type) {
  503. case MSG_TYPE_PREINIT:
  504. ret_val = qdevice_net_msg_received_preinit(instance, &msg);
  505. break;
  506. case MSG_TYPE_PREINIT_REPLY:
  507. ret_val = qdevice_net_msg_received_preinit_reply(instance, &msg);
  508. break;
  509. case MSG_TYPE_STARTTLS:
  510. ret_val = qdevice_net_msg_received_stattls(instance, &msg);
  511. break;
  512. case MSG_TYPE_SERVER_ERROR:
  513. ret_val = qdevice_net_msg_received_server_error(instance, &msg);
  514. break;
  515. case MSG_TYPE_INIT_REPLY:
  516. ret_val = qdevice_net_msg_received_init_reply(instance, &msg);
  517. break;
  518. case MSG_TYPE_SET_OPTION:
  519. ret_val = qdevice_net_msg_received_set_option(instance, &msg);
  520. break;
  521. case MSG_TYPE_SET_OPTION_REPLY:
  522. ret_val = qdevice_net_msg_received_set_option_reply(instance, &msg);
  523. break;
  524. case MSG_TYPE_ECHO_REQUEST:
  525. ret_val = qdevice_net_msg_received_echo_request(instance, &msg);
  526. break;
  527. case MSG_TYPE_ECHO_REPLY:
  528. ret_val = qdevice_net_msg_received_echo_reply(instance, &msg);
  529. break;
  530. default:
  531. qdevice_net_log(LOG_ERR, "Received unsupported message %u. Disconnecting from server", msg.type);
  532. ret_val = -1;
  533. break;
  534. }
  535. msg_decoded_destroy(&msg);
  536. return (ret_val);
  537. }
  538. /*
  539. * -1 means end of connection (EOF) or some other unhandled error. 0 = success
  540. */
  541. static int
  542. qdevice_net_socket_read(struct qdevice_net_instance *instance)
  543. {
  544. int res;
  545. int ret_val;
  546. int orig_skipping_msg;
  547. orig_skipping_msg = instance->skipping_msg;
  548. res = msgio_read(instance->socket, &instance->receive_buffer, &instance->msg_already_received_bytes,
  549. &instance->skipping_msg);
  550. if (!orig_skipping_msg && instance->skipping_msg) {
  551. qdevice_net_log(LOG_DEBUG, "msgio_read set skipping_msg");
  552. }
  553. ret_val = 0;
  554. switch (res) {
  555. case 0:
  556. /*
  557. * Partial read
  558. */
  559. break;
  560. case -1:
  561. qdevice_net_log(LOG_DEBUG, "Server closed connection");
  562. ret_val = -1;
  563. break;
  564. case -2:
  565. qdevice_net_log_nss(LOG_ERR, "Unhandled error when reading from server. Disconnecting from server");
  566. ret_val = -1;
  567. break;
  568. case -3:
  569. qdevice_net_log(LOG_ERR, "Can't store message header from server. Disconnecting from server");
  570. ret_val = -1;
  571. break;
  572. case -4:
  573. qdevice_net_log(LOG_ERR, "Can't store message from server. Disconnecting from server");
  574. ret_val = -1;
  575. break;
  576. case -5:
  577. qdevice_net_log(LOG_WARNING, "Server sent unsupported msg type %u. Disconnecting from server",
  578. msg_get_type(&instance->receive_buffer));
  579. ret_val = -1;
  580. break;
  581. case -6:
  582. qdevice_net_log(LOG_WARNING,
  583. "Server wants to send too long message %u bytes. Disconnecting from server",
  584. msg_get_len(&instance->receive_buffer));
  585. ret_val = -1;
  586. break;
  587. case 1:
  588. /*
  589. * Full message received / skipped
  590. */
  591. if (!instance->skipping_msg) {
  592. if (qdevice_net_msg_received(instance) == -1) {
  593. ret_val = -1;
  594. }
  595. } else {
  596. errx(1, "net_socket_read in skipping msg state");
  597. }
  598. instance->skipping_msg = 0;
  599. instance->msg_already_received_bytes = 0;
  600. dynar_clean(&instance->receive_buffer);
  601. break;
  602. default:
  603. errx(1, "qdevice_net_socket_read unhandled error %d", res);
  604. break;
  605. }
  606. return (ret_val);
  607. }
  608. static int
  609. qdevice_net_socket_write_finished(struct qdevice_net_instance *instance)
  610. {
  611. PRFileDesc *new_pr_fd;
  612. if (instance->state == QDEVICE_NET_STATE_WAITING_STARTTLS_BEING_SENT) {
  613. /*
  614. * StartTLS sent to server. Begin with TLS handshake
  615. */
  616. if ((new_pr_fd = nss_sock_start_ssl_as_client(instance->socket, QNETD_NSS_SERVER_CN,
  617. qdevice_net_nss_bad_cert_hook,
  618. qdevice_net_nss_get_client_auth_data, (void *)QDEVICE_NET_NSS_CLIENT_CERT_NICKNAME,
  619. 0, NULL)) == NULL) {
  620. qdevice_net_log_nss(LOG_ERR, "Can't start TLS");
  621. return (-1);
  622. }
  623. /*
  624. * And send init msg
  625. */
  626. if (qdevice_net_send_init(instance) != 0) {
  627. return (-1);
  628. }
  629. instance->socket = new_pr_fd;
  630. }
  631. return (0);
  632. }
  633. static int
  634. qdevice_net_socket_write(struct qdevice_net_instance *instance)
  635. {
  636. int res;
  637. int send_echo_request;
  638. /*
  639. * Echo request has extra buffer and special processing. Messages other then echo request
  640. * has higher priority, but if echo request send was not completed
  641. * it's necesary to complete it.
  642. */
  643. send_echo_request = !(instance->sending_msg && instance->echo_request_msg_already_sent_bytes == 0);
  644. if (!send_echo_request) {
  645. res = msgio_write(instance->socket, &instance->send_buffer, &instance->msg_already_sent_bytes);
  646. } else {
  647. res = msgio_write(instance->socket, &instance->echo_request_send_buffer,
  648. &instance->echo_request_msg_already_sent_bytes);
  649. }
  650. if (res == 1) {
  651. if (!send_echo_request) {
  652. instance->sending_msg = 0;
  653. if (qdevice_net_socket_write_finished(instance) == -1) {
  654. return (-1);
  655. }
  656. } else {
  657. instance->sending_echo_request_msg = 0;
  658. }
  659. }
  660. if (res == -1) {
  661. qdevice_net_log_nss(LOG_CRIT, "PR_Send returned 0");
  662. return (-1);
  663. }
  664. if (res == -2) {
  665. qdevice_net_log_nss(LOG_ERR, "Unhandled error when sending message to server");
  666. return (-1);
  667. }
  668. return (0);
  669. }
  670. #define QDEVICE_NET_POLL_NO_FDS 2
  671. #define QDEVICE_NET_POLL_SOCKET 0
  672. #define QDEVICE_NET_POLL_VOTEQUORUM 1
  673. static int
  674. qdevice_net_poll(struct qdevice_net_instance *instance)
  675. {
  676. PRPollDesc pfds[QDEVICE_NET_POLL_NO_FDS];
  677. PRInt32 poll_res;
  678. int i;
  679. pfds[QDEVICE_NET_POLL_SOCKET].fd = instance->socket;
  680. pfds[QDEVICE_NET_POLL_SOCKET].in_flags = PR_POLL_READ;
  681. if (instance->sending_msg || instance->sending_echo_request_msg) {
  682. pfds[QDEVICE_NET_POLL_SOCKET].in_flags |= PR_POLL_WRITE;
  683. }
  684. pfds[QDEVICE_NET_POLL_VOTEQUORUM].fd = instance->votequorum_poll_fd;
  685. pfds[QDEVICE_NET_POLL_VOTEQUORUM].in_flags = PR_POLL_READ;
  686. instance->schedule_disconnect = 0;
  687. if ((poll_res = PR_Poll(pfds, QDEVICE_NET_POLL_NO_FDS,
  688. timer_list_time_to_expire(&instance->main_timer_list))) > 0) {
  689. for (i = 0; i < QDEVICE_NET_POLL_NO_FDS; i++) {
  690. if (pfds[i].out_flags & PR_POLL_READ) {
  691. switch (i) {
  692. case QDEVICE_NET_POLL_SOCKET:
  693. if (qdevice_net_socket_read(instance) == -1) {
  694. instance->schedule_disconnect = 1;
  695. }
  696. break;
  697. case QDEVICE_NET_POLL_VOTEQUORUM:
  698. if (votequorum_dispatch(instance->votequorum_handle, CS_DISPATCH_ALL) != CS_OK) {
  699. errx(1, "Can't dispatch votequorum messages");
  700. }
  701. break;
  702. default:
  703. errx(1, "Unhandled read poll descriptor %u", i);
  704. break;
  705. }
  706. }
  707. if (!instance->schedule_disconnect && pfds[i].out_flags & PR_POLL_WRITE) {
  708. switch (i) {
  709. case QDEVICE_NET_POLL_SOCKET:
  710. if (qdevice_net_socket_write(instance) == -1) {
  711. instance->schedule_disconnect = 1;
  712. }
  713. break;
  714. default:
  715. errx(1, "Unhandled write poll descriptor %u", i);
  716. break;
  717. }
  718. }
  719. if (!instance->schedule_disconnect &&
  720. pfds[i].out_flags & (PR_POLL_ERR|PR_POLL_NVAL|PR_POLL_HUP|PR_POLL_EXCEPT)) {
  721. switch (i) {
  722. case QDEVICE_NET_POLL_SOCKET:
  723. qdevice_net_log(LOG_CRIT, "POLL_ERR (%u) on main socket", pfds[i].out_flags);
  724. return (-1);
  725. break;
  726. default:
  727. errx(1, "Unhandled poll err on descriptor %u", i);
  728. break;
  729. }
  730. }
  731. }
  732. }
  733. if (!instance->schedule_disconnect) {
  734. timer_list_expire(&instance->main_timer_list);
  735. }
  736. if (instance->schedule_disconnect) {
  737. /*
  738. * Schedule disconnect can be set by this function or by some timer_list callback
  739. */
  740. return (-1);
  741. }
  742. return (0);
  743. }
  744. static int
  745. qdevice_net_instance_init(struct qdevice_net_instance *instance, size_t initial_receive_size,
  746. size_t initial_send_size, size_t min_send_size, size_t max_receive_size, enum tlv_tls_supported tls_supported,
  747. uint32_t node_id, enum tlv_decision_algorithm_type decision_algorithm, uint32_t heartbeat_interval,
  748. const char *host_addr, uint16_t host_port, const char *cluster_name)
  749. {
  750. memset(instance, 0, sizeof(*instance));
  751. instance->initial_receive_size = initial_receive_size;
  752. instance->initial_send_size = initial_send_size;
  753. instance->min_send_size = min_send_size;
  754. instance->max_receive_size = max_receive_size;
  755. instance->node_id = node_id;
  756. instance->decision_algorithm = decision_algorithm;
  757. instance->heartbeat_interval = heartbeat_interval;
  758. instance->host_addr = host_addr;
  759. instance->host_port = host_port;
  760. instance->cluster_name = cluster_name;
  761. dynar_init(&instance->receive_buffer, initial_receive_size);
  762. dynar_init(&instance->send_buffer, initial_send_size);
  763. dynar_init(&instance->echo_request_send_buffer, initial_send_size);
  764. timer_list_init(&instance->main_timer_list);
  765. instance->tls_supported = tls_supported;
  766. return (0);
  767. }
  768. static int
  769. qdevice_net_instance_destroy(struct qdevice_net_instance *instance)
  770. {
  771. timer_list_free(&instance->main_timer_list);
  772. dynar_destroy(&instance->receive_buffer);
  773. dynar_destroy(&instance->send_buffer);
  774. dynar_destroy(&instance->echo_request_send_buffer);
  775. /*
  776. * Close cmap and votequorum connections
  777. */
  778. if (votequorum_qdevice_unregister(instance->votequorum_handle, QDEVICE_NET_VOTEQUORUM_DEVICE_NAME) != CS_OK) {
  779. qdevice_net_log_nss(LOG_WARNING, "Unable to unregister votequorum device");
  780. }
  781. votequorum_finalize(instance->votequorum_handle);
  782. cmap_finalize(instance->cmap_handle);
  783. return (0);
  784. }
  785. static void
  786. qdevice_net_init_cmap(cmap_handle_t *handle)
  787. {
  788. cs_error_t res;
  789. int no_retries;
  790. no_retries = 0;
  791. while ((res = cmap_initialize(handle)) == CS_ERR_TRY_AGAIN && no_retries++ < MAX_CS_TRY_AGAIN) {
  792. sleep(1);
  793. }
  794. if (res != CS_OK) {
  795. errx(1, "Failed to initialize the cmap API. Error %s", cs_strerror(res));
  796. }
  797. }
  798. /*
  799. * Check string to value on, off, yes, no, 0, 1. Return 1 if value is on, yes or 1, 0 if
  800. * value is off, no or 0 and -1 otherwise.
  801. */
  802. static int
  803. qdevice_net_parse_bool_str(const char *str)
  804. {
  805. if (strcasecmp(str, "yes") == 0 || strcasecmp(str, "on") == 0 || strcasecmp(str, "1") == 0) {
  806. return (1);
  807. } else if (strcasecmp(str, "no") == 0 || strcasecmp(str, "off") == 0 || strcasecmp(str, "0") == 0) {
  808. return (0);
  809. }
  810. return (-1);
  811. }
  812. static void
  813. qdevice_net_instance_init_from_cmap(struct qdevice_net_instance *instance, cmap_handle_t cmap_handle)
  814. {
  815. uint32_t node_id;
  816. enum tlv_tls_supported tls_supported;
  817. int i;
  818. char *str;
  819. enum tlv_decision_algorithm_type decision_algorithm;
  820. uint32_t heartbeat_interval;
  821. uint32_t sync_heartbeat_interval;
  822. char *host_addr;
  823. int host_port;
  824. char *ep;
  825. char *cluster_name;
  826. /*
  827. * Check if provider is net
  828. */
  829. if (cmap_get_string(cmap_handle, "quorum.device.model", &str) != CS_OK) {
  830. errx(1, "Can't read quorum.device.model cmap key.");
  831. }
  832. if (strcmp(str, "net") != 0) {
  833. free(str);
  834. errx(1, "Configured device model is not net. This qdevice provider is only for net.");
  835. }
  836. free(str);
  837. /*
  838. * Get nodeid
  839. */
  840. if (cmap_get_uint32(cmap_handle, "runtime.votequorum.this_node_id", &node_id) != CS_OK) {
  841. errx(1, "Unable to retrive this node nodeid.");
  842. }
  843. /*
  844. * Check tls
  845. */
  846. if (cmap_get_string(cmap_handle, "quorum.device.net.tls", &str) == CS_OK) {
  847. if ((i = qdevice_net_parse_bool_str(str)) == -1) {
  848. free(str);
  849. errx(1, "quorum.device.net.tls value is not valid.");
  850. }
  851. if (i == 1) {
  852. tls_supported = TLV_TLS_SUPPORTED;
  853. } else {
  854. tls_supported = TLV_TLS_UNSUPPORTED;
  855. }
  856. free(str);
  857. }
  858. /*
  859. * Host
  860. */
  861. if (cmap_get_string(cmap_handle, "quorum.device.net.host", &str) != CS_OK) {
  862. free(str);
  863. errx(1, "Qdevice net daemon address is not defined (quorum.device.net.host)");
  864. }
  865. host_addr = str;
  866. if (cmap_get_string(cmap_handle, "quorum.device.net.port", &str) == CS_OK) {
  867. host_port = strtol(str, &ep, 10);
  868. free(str);
  869. if (host_port <= 0 || host_port > ((uint16_t)~0) || *ep != '\0') {
  870. errx(1, "quorum.device.net.port must be in range 0-65535");
  871. }
  872. } else {
  873. host_port = QNETD_DEFAULT_HOST_PORT;
  874. }
  875. /*
  876. * Cluster name
  877. */
  878. if (cmap_get_string(cmap_handle, "totem.cluster_name", &str) != CS_OK) {
  879. errx(1, "Cluster name (totem.cluster_name) has to be defined.");
  880. }
  881. cluster_name = str;
  882. /*
  883. * Configure timeouts
  884. */
  885. if (cmap_get_uint32(cmap_handle, "quorum.device.timeout", &heartbeat_interval) != CS_OK) {
  886. heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
  887. }
  888. heartbeat_interval = heartbeat_interval * 0.8;
  889. if (cmap_get_uint32(cmap_handle, "quorum.device.sync_timeout", &sync_heartbeat_interval) != CS_OK) {
  890. sync_heartbeat_interval = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
  891. }
  892. sync_heartbeat_interval = sync_heartbeat_interval * 0.8;
  893. /*
  894. * Choose decision algorithm
  895. */
  896. decision_algorithm = TLV_DECISION_ALGORITHM_TYPE_TEST;
  897. /*
  898. * Really initialize instance
  899. */
  900. if (qdevice_net_instance_init(instance,
  901. QDEVICE_NET_INITIAL_MSG_RECEIVE_SIZE, QDEVICE_NET_INITIAL_MSG_SEND_SIZE,
  902. QDEVICE_NET_MIN_MSG_SEND_SIZE, QDEVICE_NET_MAX_MSG_RECEIVE_SIZE,
  903. tls_supported, node_id, decision_algorithm,
  904. heartbeat_interval,
  905. host_addr, host_port, cluster_name) == -1) {
  906. errx(1, "Can't initialize qdevice-net");
  907. }
  908. instance->cmap_handle = cmap_handle;
  909. }
  910. static void qdevice_net_votequorum_notification(votequorum_handle_t votequorum_handle,
  911. uint64_t context, uint32_t quorate,
  912. votequorum_ring_id_t ring_id, uint32_t node_list_entries, votequorum_node_t node_list[])
  913. {
  914. memcpy(&global_last_received_ring_id, &ring_id, sizeof(ring_id));
  915. }
  916. static void
  917. qdevice_net_init_votequorum(struct qdevice_net_instance *instance)
  918. {
  919. votequorum_callbacks_t votequorum_callbacks;
  920. votequorum_handle_t votequorum_handle;
  921. cs_error_t res;
  922. int no_retries;
  923. int fd;
  924. memset(&votequorum_callbacks, 0, sizeof(votequorum_callbacks));
  925. votequorum_callbacks.votequorum_notify_fn = qdevice_net_votequorum_notification;
  926. no_retries = 0;
  927. while ((res = votequorum_initialize(&votequorum_handle, &votequorum_callbacks)) == CS_ERR_TRY_AGAIN &&
  928. no_retries++ < MAX_CS_TRY_AGAIN) {
  929. sleep(1);
  930. }
  931. if (res != CS_OK) {
  932. errx(1, "Failed to initialize the votequorum API. Error %s", cs_strerror(res));
  933. }
  934. if ((res = votequorum_trackstart(votequorum_handle, 0, CS_TRACK_CHANGES)) != CS_OK) {
  935. errx(1, "Can't start tracking votequorum changes. Error %s", cs_strerror(res));
  936. }
  937. if ((res = votequorum_qdevice_register(votequorum_handle, QDEVICE_NET_VOTEQUORUM_DEVICE_NAME)) != CS_OK) {
  938. errx(1, "Can't register votequorum device. Error %s", cs_strerror(res));
  939. }
  940. instance->votequorum_handle = votequorum_handle;
  941. votequorum_fd_get(votequorum_handle, &fd);
  942. if ((instance->votequorum_poll_fd = PR_CreateSocketPollFd(fd)) == NULL) {
  943. err_nss();
  944. }
  945. }
  946. int
  947. main(void)
  948. {
  949. struct qdevice_net_instance instance;
  950. cmap_handle_t cmap_handle;
  951. /*
  952. * Init
  953. */
  954. qdevice_net_init_cmap(&cmap_handle);
  955. qdevice_net_instance_init_from_cmap(&instance, cmap_handle);
  956. qdevice_net_log_init(QDEVICE_NET_LOG_TARGET_STDERR);
  957. qdevice_net_log_set_debug(1);
  958. if (nss_sock_init_nss((instance.tls_supported != TLV_TLS_UNSUPPORTED ? (char *)NSS_DB_DIR : NULL)) != 0) {
  959. err_nss();
  960. }
  961. /*
  962. * Try to connect to qnetd host
  963. */
  964. instance.socket = nss_sock_create_client_socket(instance.host_addr, instance.host_port, PR_AF_UNSPEC, 100);
  965. if (instance.socket == NULL) {
  966. err_nss();
  967. }
  968. if (nss_sock_set_nonblocking(instance.socket) != 0) {
  969. err_nss();
  970. }
  971. qdevice_net_init_votequorum(&instance);
  972. /*
  973. * Create and schedule send of preinit message to qnetd
  974. */
  975. instance.expected_msg_seq_num = 1;
  976. if (msg_create_preinit(&instance.send_buffer, instance.cluster_name, 1, instance.expected_msg_seq_num) == 0) {
  977. errx(1, "Can't allocate buffer");
  978. }
  979. if (qdevice_net_schedule_send(&instance) != 0) {
  980. errx(1, "Can't schedule send of preinit msg");
  981. }
  982. instance.state = QDEVICE_NET_STATE_WAITING_PREINIT_REPLY;
  983. /*
  984. * Main loop
  985. */
  986. while (qdevice_net_poll(&instance) == 0) {
  987. }
  988. /*
  989. * Cleanup
  990. */
  991. if (PR_Close(instance.socket) != PR_SUCCESS) {
  992. err_nss();
  993. }
  994. qdevice_net_instance_destroy(&instance);
  995. SSL_ClearSessionCache();
  996. if (NSS_Shutdown() != SECSuccess) {
  997. err_nss();
  998. }
  999. PR_Cleanup();
  1000. qdevice_net_log_close();
  1001. return (0);
  1002. }