amfnode.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783
  1. /** @file amfnode.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Constructors/destructors
  6. * - Serializers/deserializers
  7. *
  8. * All rights reserved.
  9. *
  10. *
  11. * This software licensed under BSD license, the text of which follows:
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright notice,
  17. * this list of conditions and the following disclaimer.
  18. * - Redistributions in binary form must reproduce the above copyright notice,
  19. * this list of conditions and the following disclaimer in the documentation
  20. * and/or other materials provided with the distribution.
  21. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  22. * contributors may be used to endorse or promote products derived from this
  23. * software without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  26. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  29. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  35. * THE POSSIBILITY OF SUCH DAMAGE.
  36. *
  37. * AMF Node Class Implementation
  38. *
  39. * This file contains functions for handling AMF nodes. It can be
  40. * viewed as the implementation of the AMF Node class (called NODE)
  41. * as described in SAI-Overview-B.02.01. The SA Forum specification
  42. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  43. * and is referred to as 'the spec' below.
  44. *
  45. * The functions in this file are responsible for:
  46. * - controlling the instantiation of the SUs hosted on current node and
  47. * controlling the assigning of workload to them when a node joins the
  48. * cluster (cluster start is controlled by the Cluster Class)
  49. * - controlling node level recovery and repair functions
  50. * - implementing error escallation level 2 and 3 (paragraph 3.12.2.2 and
  51. * 3.12.2.3 in the spec)
  52. * - handling run time attributes of the AMF NODE; cached
  53. * attributes are stored as variables and sent to the IMM service (future)
  54. * upon the changes described in the specification
  55. *
  56. * The node class contains the following state machines:
  57. * - administrative state machine (ADSM)
  58. * - operational state machine (OPSM)
  59. * - availability control state machine (ACSM)
  60. *
  61. * The administrative state machine will be implemented in the future.
  62. *
  63. * The operational state machine is primarily used to report status of the
  64. * node.
  65. *
  66. * The availability control state machine is used for control purposes.
  67. * ACSM contains three states of which two are composite.
  68. * Being a composite state means that the state contains substates.
  69. * ACSM states are:
  70. * - REPAIR_NEEDED
  71. * - ESCALLATION_LEVEL (LEVEL_0, LEVEL_2 and LEVEL_3)
  72. * - MANAGING_HOSTED_SERVICE_UNITS (
  73. * . FAILING_FAST (REBOOTING_NODE and ACTIVATING_STANDBY_NODE)
  74. * . FAILING_GRACEFULLY (SWITCHING_OVER, FAILING_OVER and REBOOTING_NODE)
  75. * . LEAVING_SPONTANEOUSLY (FAILING_OVER and
  76. * WAITING_FOR_NODE_TO_JOIN)
  77. * . JOINING (STARTING_SERVICE_UNITS and ASSIGNING_STANDBY_WORKLOAD)
  78. *
  79. * REPAIR_NEEDED indicates the node needs a manual repair and this state will be
  80. * maintained until the administrative command REPAIRED is entered (implemented
  81. * in the future)
  82. *
  83. * ESCALLATION_LEVEL is a kind of idle state where no actions are performed
  84. * and used only to remember the escallation level. Substate LEVEL_0 indicates
  85. * no escallation. LEVEL_2 indicates that so many component restarts have been
  86. * executed recently that a new component restart request will escalate
  87. * to service unit restart action. Node will request a service unit restart
  88. * from SU.
  89. * LEVEL_3 will be entered if either there are too many service unit restarts
  90. * been made or a component failover recovery action is requested. On level 3
  91. * the recovery action performed is service unit failover (paragraph 3.12.1.3).
  92. *
  93. * FAILING_FAST state executes a node re-boot and waits for the node to join
  94. * the cluster again.
  95. *
  96. * FAILING_GRACEFULLY state requests all SGs which have SUs hosted on current
  97. * node to switch or failover according to the procedures described in
  98. * paragraphs 3.12.1.3 before re-boot is executed. Then the confirmation is
  99. * awaited from all concerned SGs and finally a node re-boot is executed as
  100. * the repair action (see paragraph 2.12.1.4).
  101. *
  102. * LEAVING_SPONTANEOUSLY state handles the spontaneous leave of a node.
  103. *
  104. * JOINING state handles the start of a node in all cases except cluster start,
  105. * which is handled by the CLUSTER class.
  106. *
  107. * 1. Node Availability Control State Machine
  108. * ==========================================
  109. *
  110. * 1.1 State Transition Table
  111. *
  112. * State: Event: Action: New state:
  113. * ============================================================================
  114. * ESCALATION_LEVEL_0 node_sync_ready A6 JOINING_STARTING_APPLS
  115. * ESCALATION_LEVEL_0 node_leave A9,A8 LEAVING_SP_FAILING_OVER
  116. * JOINING_STARTING_APPLS appl_started [C4] A7 JOINING_ASSIGNING_WL
  117. * JOINING_ASSIGNING_WL appl_assigned [C5] ESCALATION_LEVEL_0
  118. * LEAVING_SP_FAILING_OVER sg_failed_over [C1] LEAVING_SP_WAIT_FOR_JOIN
  119. * LEAVING_SP_WAIT_FOR_JOIN node_sync_ready A6 JOINING_STARTING_APPLS
  120. *
  121. * 1.2 State Description
  122. * =====================
  123. * ESCALATION_LEVEL_0 - Node is synchronized and idle.
  124. * JOINING_STARTING_APPLS - JOINING_STARTING_APPLICATIONS
  125. * Node has ordered all applications to start its SUs
  126. * hosted on current node and is now waiting for them
  127. * to acknowledge that they have started.
  128. *
  129. * JOINING_ASSIGNING_WL - JOINING_ASSIGNING_WORKLOAD
  130. * Node has ordered all applications to assign workload
  131. * to all its SUs which currently have no workload and
  132. * is now waiting for the applications to acknowledge.
  133. *
  134. * LEAVING_SP_FAILING_OVER - LEAVING_SPONTANEOUSLY_FAILING_OVER
  135. * Node has received an event telling that this node
  136. * has left the cluster and has ordered all service
  137. * groups to failover those of its SUs that were
  138. * hosted on current node.
  139. *
  140. * LEAVING_SP_WAIT_FOR_JOIN - LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN
  141. * Node is waiting for current node to join again.
  142. *
  143. * 1.3 Actions
  144. * ===========
  145. * A1 -
  146. * A2 -
  147. * A3 -
  148. * A4 -
  149. * A5 -
  150. * A6 - [foreach application in cluster]start application
  151. * A7 - [foreach application in cluster]assign workload to application
  152. * A8 - [foreach application in cluster]
  153. * [foreach SG in application ]failover node
  154. * A9 - [foreach application in cluster]
  155. * [foreach SG in application ]
  156. * [foreach SU in SG where the SU is hosted on current node]
  157. * [foreach comp in such an SU]indicate that the node has left the cluster
  158. *
  159. * 1.4 Guards
  160. * ==========
  161. * C1 - All SG availability control state machines (ACSM) == IDLE
  162. * C2 -
  163. * C3 -
  164. * C4 - No applications are in ACSM state == STARTING_SGS
  165. * C5 - All applications have ACSM state == WORKLOAD_ASSIGNED
  166. */
  167. #include <stdlib.h>
  168. #include <assert.h>
  169. #include <unistd.h>
  170. #include "amf.h"
  171. #include "util.h"
  172. #include "print.h"
  173. #include "main.h"
  174. /******************************************************************************
  175. * Internal (static) utility functions
  176. *****************************************************************************/
  177. static void node_acsm_enter_leaving_spontaneously(struct amf_node *node)
  178. {
  179. ENTER("'%s'", node->name.value);
  180. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
  181. node->nodeid = 0;
  182. }
  183. static void node_acsm_enter_failing_over (struct amf_node *node)
  184. {
  185. struct amf_application *app;
  186. struct amf_sg *sg;
  187. struct amf_su *su;
  188. struct amf_comp *component = NULL;
  189. ENTER("'%s'", node->name.value);
  190. node->acsm_state = NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER;
  191. /*
  192. * Indicate to each component object in the model that current
  193. * node has left the cluster
  194. */
  195. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  196. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  197. for (su = sg->su_head; su != NULL; su = su->next) {
  198. if (name_match(&node->name, &su->saAmfSUHostedByNode)) {
  199. for (component = su->comp_head; component != NULL;
  200. component = component->next) {
  201. amf_comp_node_left(component);
  202. }
  203. }
  204. }
  205. }
  206. }
  207. /*
  208. * Let all service groups with service units hosted on current node failover
  209. * its workload
  210. */
  211. for (app = amf_cluster->application_head; app != NULL; app =
  212. app->next) {
  213. for (sg = app->sg_head; sg != NULL; sg =
  214. sg->next) {
  215. amf_sg_failover_node_req(sg, node);
  216. }
  217. }
  218. }
  219. /**
  220. *
  221. * @param node
  222. */
  223. static void failover_all_sg_on_node (amf_node_t *node)
  224. {
  225. amf_application_t *app;
  226. amf_sg_t *sg;
  227. amf_su_t *su;
  228. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  229. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  230. for (su = sg->su_head; su != NULL; su = su->next) {
  231. if (name_match(&su->saAmfSUHostedByNode, &node->name)) {
  232. amf_sg_failover_node_req (sg, node);
  233. break;
  234. }
  235. }
  236. }
  237. }
  238. }
  239. static void node_acsm_enter_failing_gracefully_failing_over (amf_node_t *node)
  240. {
  241. ENTER("");
  242. node->acsm_state = NODE_ACSM_FAILING_GRACEFULLY_FAILING_OVER;
  243. failover_all_sg_on_node (node);
  244. }
  245. static int has_all_sg_on_node_failed_over (amf_node_t *node)
  246. {
  247. amf_application_t *app;
  248. amf_sg_t *sg;
  249. amf_su_t *su;
  250. int has_all_sg_on_node_failed_over = 1;
  251. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  252. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  253. for (su = sg->su_head; su != NULL; su = su->next) {
  254. if (name_match(&su->saAmfSUHostedByNode, &node->name)) {
  255. if (sg->avail_state != SG_AC_Idle) {
  256. has_all_sg_on_node_failed_over = 0;
  257. goto out;
  258. }
  259. break;
  260. }
  261. }
  262. }
  263. }
  264. out:
  265. return has_all_sg_on_node_failed_over;
  266. }
  267. static void repair_node (amf_node_t *node)
  268. {
  269. ENTER("");
  270. char hostname[256];
  271. gethostname (hostname, 256);
  272. if (!strcmp (hostname, (const char*)node->saAmfNodeClmNode.value)) {
  273. /* TODO if(saAmfAutoRepair == SA_TRUE) */
  274. #ifdef DEBUG
  275. exit (0);
  276. #else
  277. system ("reboot");
  278. #endif
  279. }
  280. }
  281. static void enter_failing_gracefully_rebooting_node (amf_node_t *node)
  282. {
  283. ENTER("");
  284. node->acsm_state = NODE_ACSM_FAILING_GRACEFULLY_REBOOTING_NODE;
  285. repair_node (node);
  286. }
  287. static void node_acsm_enter_idle (amf_node_t *node)
  288. {
  289. ENTER ("history_state=%d",node->history_state);
  290. node->acsm_state = node->history_state;
  291. }
  292. /**
  293. *
  294. * @param node
  295. * @param app
  296. */
  297. static void node_acsm_enter_joining_assigning_workload (struct amf_node *node,
  298. struct amf_application *app)
  299. {
  300. log_printf(LOG_NOTICE,
  301. "Node=%s: all applications started, assigning workload.",
  302. node->name.value);
  303. ENTER("");
  304. node->acsm_state = NODE_ACSM_JOINING_ASSIGNING_WORKLOAD;
  305. for (app = app->cluster->application_head; app != NULL;
  306. app = app->next) {
  307. amf_application_assign_workload (app, node);
  308. }
  309. }
  310. /******************************************************************************
  311. * Event methods
  312. *****************************************************************************/
  313. /**
  314. * This event indicates that a node has unexpectedly left the cluster. Node
  315. * leave event is obtained from amf_confchg_fn.
  316. *
  317. * @param node
  318. */
  319. void amf_node_leave (struct amf_node *node)
  320. {
  321. assert (node != NULL);
  322. ENTER("'%s', CLM node '%s'", node->name.value,
  323. node->saAmfNodeClmNode.value);
  324. switch (node->acsm_state) {
  325. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
  326. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
  327. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
  328. node_acsm_enter_leaving_spontaneously(node);
  329. node_acsm_enter_failing_over (node);
  330. break;
  331. case NODE_ACSM_REPAIR_NEEDED:
  332. break;
  333. case NODE_ACSM_FAILING_GRACEFULLY_REBOOTING_NODE:
  334. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  335. node_acsm_enter_idle (node);
  336. break;
  337. default:
  338. log_printf (LOG_LEVEL_ERROR, "amf_node_leave called in state = %d"
  339. " (should have been deferred)", node->acsm_state);
  340. openais_exit_error (AIS_DONE_FATAL_ERR);
  341. break;
  342. }
  343. }
  344. /**
  345. *
  346. * @param node
  347. */
  348. void amf_node_failover (struct amf_node *node)
  349. {
  350. assert (node != NULL);
  351. ENTER("'%s', CLM node '%s'", node->name.value,
  352. node->saAmfNodeClmNode.value);
  353. switch (node->acsm_state) {
  354. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
  355. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
  356. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
  357. node_acsm_enter_failing_gracefully_failing_over (node);
  358. break;
  359. case NODE_ACSM_REPAIR_NEEDED:
  360. break;
  361. default:
  362. log_printf (LOG_LEVEL_ERROR, "amf_node_leave()called in state = %d"
  363. " (should have been deferred)", node->acsm_state);
  364. openais_exit_error (AIS_DONE_FATAL_ERR);
  365. break;
  366. }
  367. }
  368. /**
  369. *
  370. * @param node
  371. */
  372. void amf_node_switchover (struct amf_node *node)
  373. {
  374. }
  375. /**
  376. *
  377. * @param node
  378. */
  379. void amf_node_failfast (struct amf_node *node)
  380. {
  381. }
  382. /**
  383. *
  384. * @param node
  385. * @param comp
  386. */
  387. void amf_node_comp_restart_req (struct amf_node *node, struct amf_comp *comp)
  388. {
  389. amf_su_t *su = comp->su;
  390. ENTER("");
  391. switch (node->acsm_state) {
  392. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
  393. node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_2;
  394. amf_node_comp_restart_req (node, comp);
  395. break;
  396. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
  397. if (su->saAmfSURestartCount >= su->sg->saAmfSGSuRestartMax) {
  398. SaNameT dn;
  399. node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_3;
  400. amf_comp_operational_state_set (comp, SA_AMF_OPERATIONAL_DISABLED);
  401. amf_su_operational_state_set (su, SA_AMF_OPERATIONAL_DISABLED);
  402. amf_comp_dn_make (comp, &dn);
  403. log_printf (LOG_NOTICE, "Error detected for '%s', recovery "
  404. "action:\n\t\tSU failover", dn.value);
  405. amf_sg_failover_su_req (su->sg, su, node);
  406. } else {
  407. amf_su_restart (su);
  408. }
  409. break;
  410. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
  411. if (su->su_failover_cnt < node->saAmfNodeSuFailoverMax) {
  412. SaNameT dn;
  413. amf_comp_operational_state_set (comp, SA_AMF_OPERATIONAL_DISABLED);
  414. amf_su_operational_state_set (su, SA_AMF_OPERATIONAL_DISABLED);
  415. amf_comp_dn_make (comp, &dn);
  416. log_printf (LOG_NOTICE, "Error detected for '%s', recovery "
  417. "action:\n\t\tSU failover", dn.value);
  418. amf_sg_failover_su_req (su->sg, su, node);
  419. return;
  420. } else {
  421. node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
  422. amf_node_failover (node);
  423. }
  424. break;
  425. default:
  426. dprintf("%d",node->acsm_state);
  427. assert (0);
  428. break;
  429. }
  430. }
  431. /**
  432. *
  433. * @param node
  434. */
  435. void amf_node_comp_failover_req (amf_node_t *node, amf_comp_t *comp)
  436. {
  437. ENTER("");
  438. switch (node->acsm_state) {
  439. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
  440. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
  441. if (comp->su->saAmfSUFailover) {
  442. /* SU failover */
  443. amf_sg_failover_su_req (comp->su->sg,comp->su, node);
  444. } else {
  445. /* TODO: component failover */
  446. assert (0);
  447. }
  448. break;
  449. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
  450. if (comp->su->su_failover_cnt < node->saAmfNodeSuFailoverMax) {
  451. if (comp->su->saAmfSUFailover) {
  452. /* SU failover */
  453. amf_sg_failover_su_req (comp->su->sg,comp->su, node);
  454. } else {
  455. /* TODO: component failover */
  456. assert (0);
  457. }
  458. } else {
  459. node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
  460. amf_node_failover (node);
  461. }
  462. break;
  463. default:
  464. dprintf("%d",node->acsm_state);
  465. assert (0);
  466. break;
  467. }
  468. }
  469. /**
  470. * This event indicates that current node has joined and its cluster model has
  471. * been synchronized with the other nodes cluster models.
  472. *
  473. * @param node
  474. */
  475. void amf_node_sync_ready (struct amf_node *node)
  476. {
  477. struct amf_application *app;
  478. assert (node != NULL);
  479. log_printf(LOG_NOTICE, "Node=%s: sync ready, starting hosted SUs.",
  480. node->name.value);
  481. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  482. switch (node->acsm_state) {
  483. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_0:
  484. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_2:
  485. case NODE_ACSM_IDLE_ESCALLATION_LEVEL_3:
  486. case NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN:
  487. node->acsm_state = NODE_ACSM_JOINING_STARTING_APPLICATIONS;
  488. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  489. amf_application_start (app, node);
  490. }
  491. break;
  492. case NODE_ACSM_REPAIR_NEEDED:
  493. break;
  494. default:
  495. log_printf (LOG_LEVEL_ERROR, "amf_node_sync_ready() was called in "
  496. "state = %d (should have been deferred)",
  497. node->acsm_state);
  498. openais_exit_error (AIS_DONE_FATAL_ERR);
  499. break;
  500. }
  501. }
  502. /******************************************************************************
  503. * Event response methods
  504. *****************************************************************************/
  505. /**
  506. * This event indicates that an application has started. Started in this context
  507. * means that none of its contained service units is in an -ING state with other
  508. * words successfully instantiated, instantiation has failed or instantiation
  509. * was not possible (due to the node on which the SU was to be hosted is not
  510. * operational).
  511. *
  512. * @param node
  513. * @param application which has been started
  514. */
  515. void amf_node_application_started (struct amf_node *node,
  516. struct amf_application *app)
  517. {
  518. assert (node != NULL && app != NULL );
  519. ENTER ("Node=%s: application '%s' started", node->name.value,
  520. app->name.value);
  521. switch (node->acsm_state) {
  522. case NODE_ACSM_JOINING_STARTING_APPLICATIONS:
  523. if (amf_cluster_applications_started_with_no_starting_sgs(
  524. app->cluster)) {
  525. node_acsm_enter_joining_assigning_workload(node, app);
  526. }
  527. break;
  528. default:
  529. log_printf (LOG_LEVEL_ERROR, "amf_node_application_started()"
  530. "called in state = %d (unexpected !!)", node->acsm_state);
  531. openais_exit_error (AIS_DONE_FATAL_ERR);
  532. break;
  533. }
  534. }
  535. /**
  536. * This event indicates that an application has been assigned workload.
  537. *
  538. * @param node
  539. * @param application which has been assigned workload
  540. */
  541. void amf_node_application_workload_assigned (struct amf_node *node,
  542. struct amf_application *app)
  543. {
  544. assert (node != NULL && app != NULL );
  545. ENTER ("Node=%s: application '%s' started", node->name.value,
  546. app->name.value);
  547. switch (node->acsm_state) {
  548. case NODE_ACSM_JOINING_ASSIGNING_WORKLOAD:
  549. if (amf_cluster_applications_assigned (amf_cluster)) {
  550. log_printf(LOG_NOTICE, "Node=%s: all workload assigned",
  551. node->name.value);
  552. node_acsm_enter_idle (node);
  553. }
  554. break;
  555. default:
  556. log_printf (LOG_LEVEL_ERROR, "amf_node_application_workload_assigned()"
  557. "called in state = %d (unexpected !!)", node->acsm_state);
  558. openais_exit_error (AIS_DONE_FATAL_ERR);
  559. break;
  560. }
  561. }
  562. /**
  563. * This event indicates that an SG has failed over its workload after a node
  564. * failure.
  565. *
  566. * @param node
  567. * @param sg_in SG which is now ready with its failover
  568. */
  569. void amf_node_sg_failed_over (struct amf_node *node, struct amf_sg *sg_in)
  570. {
  571. assert (node != NULL);
  572. ENTER ("Node=%s: SG '%s' started %d", node->name.value,
  573. sg_in->name.value,node->acsm_state);
  574. switch (node->acsm_state) {
  575. case NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER:
  576. if (has_all_sg_on_node_failed_over (node)) { /*C2*/
  577. node->acsm_state =
  578. NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN;
  579. }
  580. break;
  581. case NODE_ACSM_LEAVING_SPONTANEOUSLY_WAITING_FOR_NODE_TO_JOIN:
  582. /* Accept reports of failed over sg that has completed. */
  583. break;
  584. case NODE_ACSM_FAILING_GRACEFULLY_FAILING_OVER:
  585. if (has_all_sg_on_node_failed_over (node)) { /*C2*/
  586. enter_failing_gracefully_rebooting_node (node);
  587. }
  588. break;
  589. default:
  590. log_printf (LOG_LEVEL_ERROR, "amf_node_sg_failed_over()"
  591. "called in state = %d (unexpected !!)", node->acsm_state);
  592. openais_exit_error (AIS_DONE_FATAL_ERR);
  593. break;
  594. }
  595. }
  596. /******************************************************************************
  597. * General methods
  598. *****************************************************************************/
  599. void amf_node_init (void)
  600. {
  601. log_init ("AMF");
  602. }
  603. /**
  604. * Node constructor
  605. * @param loc
  606. * @param cluster
  607. * @param node
  608. */
  609. struct amf_node *amf_node_new (struct amf_cluster *cluster, char *name) {
  610. struct amf_node *node = amf_calloc (1, sizeof (struct amf_node));
  611. setSaNameT (&node->name, name);
  612. node->saAmfNodeAdminState = SA_AMF_ADMIN_UNLOCKED;
  613. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  614. node->saAmfNodeAutoRepair = SA_TRUE;
  615. node->saAmfNodeSuFailOverProb = -1;
  616. node->saAmfNodeSuFailoverMax = ~0;
  617. node->cluster = cluster;
  618. node->next = cluster->node_head;
  619. cluster->node_head = node;
  620. node->acsm_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
  621. node->history_state = NODE_ACSM_IDLE_ESCALLATION_LEVEL_0;
  622. return node;
  623. }
  624. void *amf_node_serialize (struct amf_node *node, int *len)
  625. {
  626. char *buf = NULL;
  627. int offset = 0, size = 0;
  628. TRACE8 ("%s", node->name.value);
  629. buf = amf_serialize_SaNameT (buf, &size, &offset, &node->name);
  630. buf = amf_serialize_SaNameT (buf, &size, &offset, &node->saAmfNodeClmNode);
  631. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  632. node->saAmfNodeSuFailOverProb);
  633. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  634. node->saAmfNodeSuFailoverMax);
  635. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  636. node->saAmfNodeAutoRepair);
  637. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  638. node->saAmfNodeRebootOnInstantiationFailure);
  639. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  640. node->saAmfNodeRebootOnTerminationFailure);
  641. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  642. node->saAmfNodeAdminState);
  643. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  644. node->saAmfNodeOperState);
  645. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  646. node->nodeid);
  647. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  648. node->acsm_state);
  649. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  650. node->history_state);
  651. *len = offset;
  652. return buf;
  653. }
  654. struct amf_node *amf_node_deserialize (struct amf_cluster *cluster, char *buf) {
  655. char *tmp = buf;
  656. struct amf_node *node = amf_node_new (cluster, "");
  657. tmp = amf_deserialize_SaNameT (tmp, &node->name);
  658. tmp = amf_deserialize_SaNameT (tmp, &node->saAmfNodeClmNode);
  659. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeSuFailOverProb);
  660. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeSuFailoverMax);
  661. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeAutoRepair);
  662. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeRebootOnInstantiationFailure);
  663. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeRebootOnTerminationFailure);
  664. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeAdminState);
  665. tmp = amf_deserialize_SaUint32T (tmp, &node->saAmfNodeOperState);
  666. tmp = amf_deserialize_SaUint32T (tmp, &node->nodeid);
  667. tmp = amf_deserialize_SaUint32T (tmp, &node->acsm_state);
  668. tmp = amf_deserialize_SaUint32T (tmp, &node->history_state);
  669. return node;
  670. }
  671. struct amf_node *amf_node_find (SaNameT *name) {
  672. struct amf_node *node;
  673. assert (name != NULL && amf_cluster != NULL);
  674. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  675. if (name_match (&node->name, name)) {
  676. return node;
  677. }
  678. }
  679. dprintf ("node %s not found in configuration!", name->value);
  680. return NULL;
  681. }
  682. struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid) {
  683. struct amf_node *node;
  684. assert (amf_cluster != NULL);
  685. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  686. if (node->nodeid == nodeid) {
  687. return node;
  688. }
  689. }
  690. dprintf ("node %u not found in configuration!", nodeid);
  691. return NULL;
  692. }
  693. struct amf_node *amf_node_find_by_hostname (const char *hostname) {
  694. struct amf_node *node;
  695. assert (hostname != NULL && amf_cluster != NULL);
  696. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  697. if (strcmp ((char*)node->saAmfNodeClmNode.value, hostname) == 0) {
  698. return node;
  699. }
  700. }
  701. dprintf ("node %s not found in configuration!", hostname);
  702. return NULL;
  703. }