amfsg.c 46 KB


  1. /** @file amfsg.c
  2. *
  3. * Copyright (c) 2002-2006 MontaVista Software, Inc.
  4. * Author: Steven Dake (sdake@mvista.com)
  5. *
  6. * Copyright (c) 2006 Ericsson AB.
  7. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  8. * - Introduced AMF B.02 information model
  9. * - Use DN in API and multicast messages
  10. * - (Re-)Introduction of event based multicast messages
  11. * - Refactoring of code into several AMF files
  12. * - Component/SU restart, SU failover
  13. * - Constructors/destructors
  14. * - Serializers/deserializers
  15. *
  16. * All rights reserved.
  17. *
  18. *
  19. * This software licensed under BSD license, the text of which follows:
  20. *
  21. * Redistribution and use in source and binary forms, with or without
  22. * modification, are permitted provided that the following conditions are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright notice,
  25. * this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright notice,
  27. * this list of conditions and the following disclaimer in the documentation
  28. * and/or other materials provided with the distribution.
  29. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  30. * contributors may be used to endorse or promote products derived from this
  31. * software without specific prior written permission.
  32. *
  33. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  34. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  35. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  36. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  37. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  38. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  39. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  40. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  41. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  43. * THE POSSIBILITY OF SUCH DAMAGE.
  44. *
  45. * AMF Service Group Class Implementation
  46. *
  47. * This file contains functions for handling AMF-service groups(SGs). It can be
  48. * viewed as the implementation of the AMF Service Group class (called SG)
  49. * as described in SAI-Overview-B.02.01. The SA Forum specification
  50. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  51. * and is referred to as 'the spec' below.
  52. *
  53. * The functions in this file are responsible for:
  54. * -on request start the service group by instantiating the contained SUs
  55. * -on request assign the service instances it protects to the in-service
  56. * service units it contains respecting as many as possible of the configured
  57. * requirements for the group
  58. * -create and delete an SI-assignment object for each relation between
  59. * an SI and an SU
  60. * -order each contained SU to create and delete CSI-assignments
  61. * -request the Service Instance class (SI) to execute the transfer of the
  62. * HA-state set/remove requests to each component involved
  63. * -fully control the execution of component failover and SU failover
  64. * -on request control the execution of the initial steps of node switchover
  65. * and node failover
  66. * -fully handle the auto adjust procedure
  67. *
  68. * Currently only the 'n+m' redundancy model is implemented. It is the
  69. * ambition to identify n+m specific variables and functions and add the suffix
  70. * '_nplusm' to them so that they can be easily recognized.
  71. *
  72. * When SG is requested to assign workload to all SUs or all SUs hosted on
  73. * a specific node, a procedure containing several steps is executed:
  74. * <1> An algorithm is executed which assigns SIs to SUs respecting the rules
  75. * that has been configured for SG. The algorithm also has to consider
  76. * if assignments between som SIs and SUs already exist. The scope of this
  77. * algorithm is to create SI-assignments and set up requested HA-state for
  78. * each assignment but not to transfer those HA-states to the components.
  79. * <2> All SI-assignments with a requested HA state == ACTIVE are transferred
  80. * to the components concerned before any STANDBY assignments are
  81. * transferred. All components have to acknowledge the setting of the
  82. * ACTIVE HA state before the transfer of any STANDBY assignment is
  83. * initiated.
  84. * <3> All active assignments can not be transferred at the same time to the
  85. * different components because the rules for dependencies between SI and
  86. * SI application wide and CSI and CSI within one SI, has to be respected.
  87. *
  88. * SG is fully responsible for step <1> but not fully responsible for handling
  89. * step <2> and <3>. However, SG uses an attribute called 'dependency level'
  90. * when requsted to assign workload. This parameter refers to an integer that
  91. * has been calculated initially for each SI. The 'dependency level' indicates
  92. * to which extent an SI depends on other SIs such that an SI that depends on
  93. * no other SI is on dependecy_level == 1, an SI that depends only on an SI on
  94. * dependency_level == 1 is on dependency-level == 2.
  95. * An SI that depends on several SIs gets a
  96. * dependency_level that is one unit higher than the SI with the highest
  97. * dependency_level it depends on. When SG is requested to assign the workload
  98. * on a certain dependency level, it requests all SI objects on that level to
  99. * activate (all) SI-assignments that during step <1> has been requested to
  100. * assume the active HA state.
  101. *
  102. * SG contains the following state machines:
  103. * - administrative state machine (ADSM) (NOT IN THIS RELEASE)
  104. * - availability control state machine (ACSM)
  105. *
  106. * The availability control state machine contains two states and one of them
  107. * is composite. Being a composite state means that it contains substates.
  108. * The states are:
  109. * - IDLE (non composite state)
  110. * - MANAGING_SG (composite state)
  111. * MANAGING_SG is entered at several different events which has in common
  112. * the need to set up or change the assignment of SIs to SUs. Only one such
  113. * event can be handled at the time. If new events occur while one event is
  114. * being handled then the new event is saved and will be handled after the
  115. * handling of the first event is ready (return to IDLE state has been done).
  116. * MANAGING_SG handles the following events:
  117. * - start (requests SG to order SU to instantiate all SUs in SG and waits
  118. * for SU to indicate presence state change reports from the SUs and
  119. * finally responds 'started' to the requester)
  120. * - assign (requests SG to assign SIs to SUs according to pre-configured
  121. * rules (if not already done) and transfer the HA state of
  122. * the SIs on the requested SI dependency level. Then SG waits for
  123. * confirmation that the HA state has been succesfully set and
  124. * finally responds 'assigned' to the reqeuster)
  125. * - auto_adjust (this event indicates that the auto-adjust probation timer has
  126. * expired and that SG should evaluate current assignments of
  127. * SIs to SUs and if needed remove current assignments and
  128. * create new according to what is specified in paragraph
  129. * 3.7.1.2)
  130. * - failover_comp (requests SG to failover a specific component according to
  131. * the procedure described in paragraph 3.12.1.3)
  132. * - failover_su (requests SG to failover a specific SU according to the
  133. * procedure described in paragraph 3.12.1.3 and 3.12.1.4)
  134. * - switchover_node (requests SG to execute the recovery actions described
  135. * in 3.12.1.3 and respond to the requester when recovery
  136. * is completed)
  137. * - failover_node (requests SG to execute the recovery actions described
  138. * in 3.12.1.3 and respond to the requester when recovery is
  139. * completed)
  140. *
  141. */
  142. #include <stdlib.h>
  143. #include <errno.h>
  144. #include "amf.h"
  145. #include "print.h"
  146. #include "main.h"
  147. #include "util.h"
  148. static void acsm_enter_activating_standby (struct amf_sg *sg);
  149. static void delete_si_assignments_in_scope (struct amf_sg *sg);
  150. static void acsm_enter_repairing_su (struct amf_sg *sg);
  151. static void standby_su_activated_cbfn (
  152. struct amf_si_assignment *si_assignment, int result);
  153. static void dependent_si_deactivated_cbfn (
  154. struct amf_si_assignment *si_assignment, int result);
  155. static const char *sg_recovery_type_text[] = {
  156. "Unknown",
  157. "FailoverSU",
  158. "FailoverNode"
  159. };
  160. static void return_to_idle (struct amf_sg *sg)
  161. {
  162. SaNameT dn;
  163. ENTER ("sg: %s state: %d", sg->name.value,sg->avail_state);
  164. sg->avail_state = SG_AC_Idle;
  165. if (sg->recovery_scope.recovery_type != 0) {
  166. switch (sg->recovery_scope.recovery_type) {
  167. case SG_RT_FailoverSU:
  168. assert (sg->recovery_scope.sus[0] != NULL);
  169. amf_su_dn_make (sg->recovery_scope.sus[0], &dn);
  170. log_printf (
  171. LOG_NOTICE, "'%s' %s recovery action finished",
  172. dn.value,
  173. sg_recovery_type_text[sg->recovery_scope.recovery_type]);
  174. break;
  175. case SG_RT_FailoverNode:
  176. log_printf (
  177. LOG_NOTICE, "'%s for %s' recovery action finished",
  178. sg_recovery_type_text[sg->recovery_scope.recovery_type],
  179. sg->name.value);
  180. break;
  181. default:
  182. log_printf (
  183. LOG_NOTICE, "'%s' recovery action finished",
  184. sg_recovery_type_text[0]);
  185. }
  186. }
  187. if (sg->recovery_scope.sus != NULL) {
  188. free ((void *)sg->recovery_scope.sus);
  189. }
  190. if (sg->recovery_scope.sis != NULL) {
  191. free ((void *)sg->recovery_scope.sis);
  192. }
  193. memset (&sg->recovery_scope, 0, sizeof (struct sg_recovery_scope));
  194. sg->node_to_start = NULL;
  195. }
  196. static int su_instantiated_count (struct amf_sg *sg)
  197. {
  198. int cnt = 0;
  199. struct amf_su *su;
  200. for (su = sg->su_head; su != NULL; su = su->next) {
  201. if (su->saAmfSUPresenceState == SA_AMF_PRESENCE_INSTANTIATED)
  202. cnt++;
  203. }
  204. return cnt;
  205. }
  206. static int has_any_su_in_scope_active_workload (struct amf_sg *sg)
  207. {
  208. struct amf_su **sus= sg->recovery_scope.sus;
  209. struct amf_si_assignment *si_assignment;
  210. while (*sus != NULL) {
  211. si_assignment = amf_su_get_next_si_assignment (*sus, NULL);
  212. while (si_assignment != NULL) {
  213. if (si_assignment->saAmfSISUHAState != SA_AMF_HA_ACTIVE) {
  214. break;
  215. }
  216. si_assignment = amf_su_get_next_si_assignment (
  217. *sus, si_assignment);
  218. }
  219. if (si_assignment != NULL) {
  220. break;
  221. }
  222. sus++;
  223. }
  224. return(*sus == NULL);
  225. }
  226. static int is_standby_for_non_active_si_in_scope (struct amf_sg *sg)
  227. {
  228. struct amf_si **sis= sg->recovery_scope.sis;
  229. struct amf_si_assignment *si_assignment;
  230. /*
  231. * Check if there is any si in the scope which has no active assignment
  232. * and at least one standby assignment.
  233. */
  234. while (*sis != NULL) {
  235. si_assignment = (*sis)->assigned_sis;
  236. while (si_assignment != NULL) {
  237. if (si_assignment->saAmfSISUHAState == SA_AMF_HA_ACTIVE) {
  238. break;
  239. }
  240. si_assignment = si_assignment->next;
  241. }
  242. if (si_assignment == NULL) {
  243. /* There is no ACTIVE assignment ..*/
  244. si_assignment = (*sis)->assigned_sis;
  245. while (si_assignment != NULL) {
  246. if (si_assignment->saAmfSISUHAState == SA_AMF_HA_STANDBY) {
  247. break;
  248. }
  249. si_assignment = si_assignment->next;
  250. }
  251. if (si_assignment != NULL) {
  252. /* .. and one STANDBY assignment*/
  253. break;
  254. }
  255. }
  256. sis++;
  257. }
  258. return(*sis != NULL);
  259. }
  260. static void acsm_enter_terminating_suspected (struct amf_sg *sg)
  261. {
  262. struct amf_su **sus= sg->recovery_scope.sus;
  263. sg->avail_state = SG_AC_TerminatingSuspected;
  264. /*
  265. * Terminate suspected SU(s)
  266. */
  267. while (*sus != 0) {
  268. amf_su_terminate (*sus);
  269. sus++;
  270. }
  271. }
  272. /**
  273. * Callback function used by SI when there is no dependent SI to
  274. * deactivate.
  275. * @param sg
  276. */
  277. static void dependent_si_deactivated_cbfn2 (struct amf_sg *sg)
  278. {
  279. struct amf_su **sus = sg->recovery_scope.sus;
  280. ENTER("'%s'", sg->name.value);
  281. /* Select next state depending on if some SU in the scope is
  282. * needs to be terminated.
  283. */
  284. while (*sus != NULL) {
  285. ENTER("SU %s pr_state='%d'",(*sus)->name.value,
  286. (*sus)->saAmfSUPresenceState);
  287. if (((*sus)->saAmfSUPresenceState ==
  288. SA_AMF_PRESENCE_UNINSTANTIATED) ||
  289. ((*sus)->saAmfSUPresenceState ==
  290. SA_AMF_PRESENCE_TERMINATION_FAILED) ||
  291. ((*sus)->saAmfSUPresenceState ==
  292. SA_AMF_PRESENCE_INSTANTIATION_FAILED)) {
  293. sus++;
  294. continue;
  295. }
  296. break;
  297. }
  298. if (*sus != NULL) {
  299. acsm_enter_terminating_suspected (sg);
  300. } else {
  301. delete_si_assignments_in_scope(sg);
  302. acsm_enter_activating_standby (sg);
  303. }
  304. }
  305. static void timer_function_dependent_si_deactivated2 (void *sg)
  306. {
  307. ENTER ("");
  308. dependent_si_deactivated_cbfn2 (sg);
  309. }
  310. static struct amf_si *si_get_dependent (struct amf_si *si)
  311. {
  312. struct amf_si *tmp_si = NULL;
  313. ENTER("'%p'",si->depends_on);
  314. if (si->depends_on != NULL) {
  315. if (si->depends_on->name.length < SA_MAX_NAME_LENGTH) {
  316. si->depends_on->name.value[si->depends_on->name.length] = '\0';
  317. }
  318. SaNameT res_arr[2];
  319. int is_match;
  320. is_match = sa_amf_grep ((char*)si->depends_on->name.value,
  321. "safDepend=.*,safSi=(.*),safApp=.*",
  322. 2, res_arr);
  323. if (is_match) {
  324. tmp_si = amf_si_find (si->application, (char*)res_arr[1].value);
  325. } else {
  326. log_printf (LOG_LEVEL_ERROR, "distinguished name for "
  327. "amf_si_depedency failed\n");
  328. openais_exit_error (AIS_DONE_FATAL_ERR);
  329. }
  330. }
  331. return tmp_si;
  332. }
  333. struct amf_si *amf_dependent_get_next (struct amf_si *si,
  334. struct amf_si *si_iter)
  335. {
  336. struct amf_si *tmp_si;
  337. struct amf_application *application;
  338. ENTER("");
  339. if (si_iter == NULL) {
  340. assert(amf_cluster != NULL);
  341. application = amf_cluster->application_head;
  342. assert(application != NULL);
  343. tmp_si = application->si_head;
  344. } else {
  345. tmp_si = si_iter->next;
  346. if (tmp_si == NULL) {
  347. application = si->application->next;
  348. if (application == NULL) {
  349. goto out;
  350. }
  351. }
  352. }
  353. for (; tmp_si != NULL; tmp_si = tmp_si->next) {
  354. struct amf_si *depends_on_si = si_get_dependent (tmp_si);
  355. while (depends_on_si != NULL) {
  356. if (depends_on_si == si) {
  357. goto out;
  358. }
  359. depends_on_si = depends_on_si->next;
  360. }
  361. }
  362. out:
  363. return tmp_si;
  364. }
  365. static void acsm_enter_deactivating_dependent_workload (struct amf_sg *sg)
  366. {
  367. struct amf_si **sis= sg->recovery_scope.sis;
  368. struct amf_si_assignment *si_assignment;
  369. int callback_pending = 0;
  370. sg->avail_state = SG_AC_DeactivatingDependantWorkload;
  371. ENTER("'%s'",sg->name.value);
  372. /*
  373. * For each SI in the recovery scope, find all active assignments
  374. * and request them to be deactivated.
  375. */
  376. while (*sis != NULL) {
  377. struct amf_si *dependent_si;
  378. struct amf_si *si = *sis;
  379. si_assignment = si->assigned_sis;
  380. dependent_si = amf_dependent_get_next (si, NULL);
  381. while (dependent_si != NULL) {
  382. si_assignment = dependent_si->assigned_sis;
  383. while (si_assignment != NULL) {
  384. if (si_assignment->saAmfSISUHAState == SA_AMF_HA_ACTIVE) {
  385. si_assignment->requested_ha_state = SA_AMF_HA_QUIESCED;
  386. callback_pending = 1;
  387. amf_si_ha_state_assume (
  388. si_assignment, dependent_si_deactivated_cbfn);
  389. }
  390. si_assignment = si_assignment->next;
  391. }
  392. dependent_si = amf_dependent_get_next (si, dependent_si);
  393. }
  394. sis++;
  395. }
  396. if (callback_pending == 0) {
  397. poll_timer_handle handle;
  398. ENTER("");
  399. poll_timer_add (aisexec_poll_handle, 0, sg,
  400. timer_function_dependent_si_deactivated2, &handle);
  401. }
  402. }
  403. /**
  404. * Enter function for state SG_AC_ActivatingStandby. It activates
  405. * one STANDBY assignment for each SI in the recovery scope.
  406. * @param sg
  407. */
  408. static void acsm_enter_activating_standby (struct amf_sg *sg)
  409. {
  410. struct amf_si **sis= sg->recovery_scope.sis;
  411. struct amf_si_assignment *si_assignment;
  412. int is_no_standby_activated = 1;
  413. ENTER("'%s'",sg->name.value);
  414. sg->avail_state = SG_AC_ActivatingStandby;
  415. /*
  416. * For each SI in the recovery scope, find one standby
  417. * SI assignment and activate it.
  418. */
  419. while (*sis != NULL) {
  420. si_assignment = (*sis)->assigned_sis;
  421. while (si_assignment != NULL) {
  422. if (si_assignment->saAmfSISUHAState == SA_AMF_HA_STANDBY) {
  423. si_assignment->requested_ha_state = SA_AMF_HA_ACTIVE;
  424. amf_si_ha_state_assume (
  425. si_assignment, standby_su_activated_cbfn);
  426. is_no_standby_activated = 0;
  427. break;
  428. }
  429. si_assignment = si_assignment->next;
  430. }
  431. sis++;
  432. }
  433. if (is_no_standby_activated) {
  434. sg->avail_state = SG_AC_AssigningStandbyToSpare;
  435. acsm_enter_repairing_su (sg);
  436. }
  437. }
  438. static void acsm_enter_repairing_su (struct amf_sg *sg)
  439. {
  440. struct amf_su **sus= sg->recovery_scope.sus;
  441. ENTER("'%s'",sg->name.value);
  442. sg->avail_state = SG_AC_ReparingSu;
  443. int is_any_su_instantiated = 0;
  444. /*
  445. * Instantiate SUs in current recovery scope until the configured
  446. * preference is fulfiled.
  447. */
  448. while (*sus != NULL) {
  449. if (su_instantiated_count ((*sus)->sg) <
  450. (*sus)->sg->saAmfSGNumPrefInserviceSUs) {
  451. struct amf_node *node = amf_node_find(&((*sus)->saAmfSUHostedByNode));
  452. if (node == NULL) {
  453. log_printf (LOG_LEVEL_ERROR, "no node to hosted on su found"
  454. "amf_si_depedency failed\n");
  455. openais_exit_error (AIS_DONE_FATAL_ERR);
  456. }
  457. if (node->saAmfNodeOperState == SA_AMF_OPERATIONAL_ENABLED) {
  458. is_any_su_instantiated = 1;
  459. amf_su_instantiate ((*sus));
  460. } else {
  461. return_to_idle (sg);
  462. }
  463. }
  464. sus++;
  465. }
  466. if (is_any_su_instantiated == 0) {
  467. return_to_idle (sg);
  468. }
  469. }
  470. /**
  471. * Checks if the si pointed out is already in the scope.
  472. * @param sg
  473. * @param si
  474. */
  475. static int is_si_in_scope(struct amf_sg *sg, struct amf_si *si)
  476. {
  477. struct amf_si **tmp_sis= sg->recovery_scope.sis;
  478. while (*tmp_sis != NULL) {
  479. if (*tmp_sis == si) {
  480. break;
  481. }
  482. tmp_sis++;
  483. }
  484. return(*tmp_sis == si);
  485. }
  486. /**
  487. * Adds the si pointed out to the scope.
  488. * @param sg
  489. * @param si
  490. */
  491. static void add_si_to_scope ( struct amf_sg *sg, struct amf_si *si)
  492. {
  493. int number_of_si = 2; /* It shall be at least two */
  494. struct amf_si **tmp_sis= sg->recovery_scope.sis;
  495. ENTER ("'%s'", si->name.value);
  496. while (*tmp_sis != NULL) {
  497. number_of_si++;
  498. tmp_sis++;
  499. }
  500. sg->recovery_scope.sis = (struct amf_si **)
  501. realloc((void *)sg->recovery_scope.sis,
  502. sizeof (struct amf_si *)*number_of_si);
  503. assert (sg->recovery_scope.sis != NULL);
  504. tmp_sis= sg->recovery_scope.sis;
  505. while (*tmp_sis != NULL) {
  506. tmp_sis++;
  507. }
  508. *tmp_sis = si;
  509. *(++tmp_sis) = NULL;
  510. }
  511. /**
  512. * Adds the ssu pointed out to the scope.
  513. * @param sg
  514. * @param su
  515. */
  516. static void add_su_to_scope (struct amf_sg *sg, struct amf_su *su)
  517. {
  518. int number_of_su = 2; /* It shall be at least two */
  519. struct amf_su **tmp_sus= sg->recovery_scope.sus;
  520. ENTER ("'%s'", su->name.value);
  521. while (*tmp_sus != NULL) {
  522. number_of_su++;
  523. tmp_sus++;
  524. }
  525. sg->recovery_scope.sus = (struct amf_su **)
  526. realloc((void *)sg->recovery_scope.sus,
  527. sizeof (struct amf_su *)*number_of_su);
  528. assert (sg->recovery_scope.sus != NULL);
  529. tmp_sus= sg->recovery_scope.sus;
  530. while (*tmp_sus != NULL) {
  531. tmp_sus++;
  532. }
  533. *tmp_sus = su;
  534. *(++tmp_sus) = NULL;
  535. }
  536. /**
  537. * Set recovery scope for failover SU.
  538. * @param sg
  539. * @param su
  540. */
  541. static void set_scope_for_failover_su (struct amf_sg *sg, struct amf_su *su)
  542. {
  543. struct amf_si_assignment *si_assignment;
  544. struct amf_si **sis;
  545. struct amf_su **sus;
  546. SaNameT dn;
  547. sg->recovery_scope.recovery_type = SG_RT_FailoverSU;
  548. sg->recovery_scope.comp = NULL;
  549. sg->recovery_scope.sus = (struct amf_su **)
  550. calloc (2, sizeof (struct amf_su *));
  551. sg->recovery_scope.sis = (struct amf_si **)
  552. calloc (1, sizeof (struct amf_si *));
  553. assert ((sg->recovery_scope.sus != NULL) &&
  554. (sg->recovery_scope.sis != NULL));
  555. sg->recovery_scope.sus[0] = su;
  556. amf_su_dn_make (sg->recovery_scope.sus[0], &dn);
  557. log_printf (
  558. LOG_NOTICE, "'%s' for %s recovery action started",
  559. sg_recovery_type_text[sg->recovery_scope.recovery_type],
  560. dn.value);
  561. si_assignment = amf_su_get_next_si_assignment (su, NULL);
  562. while (si_assignment != NULL) {
  563. if (is_si_in_scope(sg, si_assignment->si) == 0) {
  564. add_si_to_scope(sg,si_assignment->si );
  565. }
  566. si_assignment = amf_su_get_next_si_assignment (su, si_assignment);
  567. }
  568. sus = sg->recovery_scope.sus;
  569. dprintf("The following sus are within the scope:\n");
  570. while (*sus != NULL) {
  571. dprintf("%s\n", (*sus)->name.value);
  572. sus++;
  573. }
  574. sis= sg->recovery_scope.sis;
  575. dprintf("The following sis are within the scope:\n");
  576. while (*sis != NULL) {
  577. dprintf("%s\n", (*sis)->name.value);
  578. sis++;
  579. }
  580. }
  581. static void set_scope_for_failover_node (struct amf_sg *sg, struct amf_node *node)
  582. {
  583. struct amf_si_assignment *si_assignment;
  584. struct amf_si **sis;
  585. struct amf_su **sus;
  586. struct amf_su *su;
  587. ENTER ("'%s'", node->name.value);
  588. sg->recovery_scope.recovery_type = SG_RT_FailoverNode;
  589. sg->recovery_scope.comp = NULL;
  590. sg->recovery_scope.sus = (struct amf_su **)
  591. calloc (1, sizeof (struct amf_su *));
  592. sg->recovery_scope.sis = (struct amf_si **)
  593. calloc (1, sizeof (struct amf_si *));
  594. log_printf (
  595. LOG_NOTICE, "'%s' for node %s recovery action started",
  596. sg_recovery_type_text[sg->recovery_scope.recovery_type],
  597. node->name.value);
  598. assert ((sg->recovery_scope.sus != NULL) &&
  599. (sg->recovery_scope.sis != NULL));
  600. for (su = sg->su_head; su != NULL; su = su->next) {
  601. if (name_match (&node->name, &su->saAmfSUHostedByNode)) {
  602. add_su_to_scope (sg, su);
  603. }
  604. }
  605. sus = sg->recovery_scope.sus;
  606. while (*sus != 0) {
  607. su = *sus;
  608. si_assignment = amf_su_get_next_si_assignment (su, NULL);
  609. while (si_assignment != NULL) {
  610. if (is_si_in_scope(sg, si_assignment->si) == 0) {
  611. add_si_to_scope(sg, si_assignment->si );
  612. }
  613. si_assignment = amf_su_get_next_si_assignment (su, si_assignment);
  614. }
  615. sus++;
  616. }
  617. sus = sg->recovery_scope.sus;
  618. dprintf("The following sus are within the scope:\n");
  619. while (*sus != NULL) {
  620. dprintf("%s\n", (*sus)->name.value);
  621. sus++;
  622. }
  623. sis = sg->recovery_scope.sis;
  624. dprintf("The following sis are within the scope:\n");
  625. while (*sis != NULL) {
  626. dprintf("%s\n", (*sis)->name.value);
  627. sis++;
  628. }
  629. }
  630. /**
  631. * Delete all SI assignments and all CSI assignments
  632. * by requesting all contained components.
  633. * @param su
  634. */
  635. static void delete_si_assignments (struct amf_su *su)
  636. {
  637. struct amf_csi *csi;
  638. struct amf_si *si;
  639. struct amf_si_assignment *si_assignment;
  640. struct amf_si_assignment **prev;
  641. ENTER ("'%s'", su->name.value);
  642. for (si = su->sg->application->si_head; si != NULL; si = si->next) {
  643. prev = &si->assigned_sis;
  644. if (!name_match (&si->saAmfSIProtectedbySG, &su->sg->name)) {
  645. continue;
  646. }
  647. for (csi = si->csi_head; csi != NULL; csi = csi->next) {
  648. amf_csi_delete_assignments (csi, su);
  649. }
  650. for (si_assignment = si->assigned_sis; si_assignment != NULL;
  651. si_assignment = si_assignment->next) {
  652. if (si_assignment->su == su) {
  653. struct amf_si_assignment *tmp = si_assignment;
  654. *prev = si_assignment->next;
  655. dprintf ("SI assignment %s unlinked", tmp->name.value);
  656. free (tmp);
  657. } else {
  658. prev = &si_assignment->next;
  659. }
  660. }
  661. }
  662. }
  663. /**
  664. * Delete all SI assignments and all CSI assignments in current
  665. * recovery scope.
  666. * @param sg
  667. */
  668. static void delete_si_assignments_in_scope (struct amf_sg *sg)
  669. {
  670. struct amf_su **sus= sg->recovery_scope.sus;
  671. while (*sus != NULL) {
  672. delete_si_assignments (*sus);
  673. sus++;
  674. }
  675. }
  676. /**
  677. * Callback function used by SI when an SI has been deactivated.
  678. * @param si_assignment
  679. * @param result
  680. */
  681. static void dependent_si_deactivated_cbfn (
  682. struct amf_si_assignment *si_assignment, int result)
  683. {
  684. struct amf_sg *sg = si_assignment->su->sg;
  685. struct amf_su **sus = sg->recovery_scope.sus;
  686. struct amf_su *su;
  687. ENTER ("'%s', %d", si_assignment->si->name.value, result);
  688. /*
  689. * If all SI assignments for all SUs in the SG are not pending,
  690. * goto next state (TerminatingSuspected).
  691. */
  692. for (su = sg->su_head ; su != NULL; su = su->next) {
  693. struct amf_si_assignment *si_assignment;
  694. si_assignment = amf_su_get_next_si_assignment(su, NULL);
  695. while (si_assignment != NULL) {
  696. if (si_assignment->saAmfSISUHAState !=
  697. si_assignment->requested_ha_state) {
  698. goto still_wating;
  699. }
  700. si_assignment = amf_su_get_next_si_assignment(su, si_assignment);
  701. }
  702. }
  703. still_wating:
  704. if (su == NULL) {
  705. sus = si_assignment->su->sg->recovery_scope.sus;
  706. /* Select next state depending on if some SU in the scope is
  707. * needs to be terminated.
  708. */
  709. while (*sus != NULL) {
  710. if (((*sus)->saAmfSUPresenceState !=
  711. SA_AMF_PRESENCE_UNINSTANTIATED) &&
  712. ((*sus)->saAmfSUPresenceState !=
  713. SA_AMF_PRESENCE_TERMINATION_FAILED) &&
  714. ((*sus)->saAmfSUPresenceState !=
  715. SA_AMF_PRESENCE_INSTANTIATION_FAILED)) {
  716. break;
  717. }
  718. sus++;
  719. }
  720. if (*sus != NULL) {
  721. acsm_enter_terminating_suspected (sg);
  722. } else {
  723. delete_si_assignments_in_scope(sg);
  724. acsm_enter_activating_standby (sg);
  725. }
  726. }
  727. LEAVE("");
  728. }
  729. static void standby_su_activated_cbfn (
  730. struct amf_si_assignment *si_assignment, int result)
  731. {
  732. struct amf_su **sus= si_assignment->su->sg->recovery_scope.sus;
  733. struct amf_si **sis= si_assignment->su->sg->recovery_scope.sis;
  734. ENTER ("'%s', %d", si_assignment->si->name.value, result);
  735. /*
  736. * If all SI assignments for all SIs in the scope are activated, goto next
  737. * state.
  738. */
  739. while (*sis != NULL) {
  740. if ((*sis)->assigned_sis != NULL &&
  741. (*sis)->assigned_sis->saAmfSISUHAState != SA_AMF_HA_ACTIVE) {
  742. break;
  743. }
  744. sis++;
  745. }
  746. if (*sis == NULL) {
  747. /*
  748. * TODO: create SI assignment to spare and assign them
  749. */
  750. (*sus)->sg->avail_state = SG_AC_AssigningStandbyToSpare;
  751. acsm_enter_repairing_su ((*sus)->sg);
  752. }
  753. }
  754. static void assign_si_assumed_cbfn (
  755. struct amf_si_assignment *si_assignment, int result)
  756. {
  757. struct amf_si_assignment *tmp_si_assignment;
  758. struct amf_si *si;
  759. struct amf_sg *sg = si_assignment->su->sg;
  760. int si_assignment_cnt = 0;
  761. int confirmed_assignments = 0;
  762. ENTER ("'%s', %d", si_assignment->si->name.value, result);
  763. /*
  764. * Report to application when all SIs that this SG protects
  765. * has been assigned or go back to idle state if not cluster
  766. * start.
  767. */
  768. for (si = sg->application->si_head; si != NULL; si = si->next) {
  769. if (name_match (&si->saAmfSIProtectedbySG, &sg->name)) {
  770. for (tmp_si_assignment = si->assigned_sis;
  771. tmp_si_assignment != NULL;
  772. tmp_si_assignment = tmp_si_assignment->next) {
  773. si_assignment_cnt++;
  774. if (tmp_si_assignment->requested_ha_state ==
  775. tmp_si_assignment->saAmfSISUHAState) {
  776. confirmed_assignments++;
  777. }
  778. }
  779. }
  780. }
  781. assert (confirmed_assignments != 0);
  782. switch (sg->avail_state) {
  783. case SG_AC_AssigningOnRequest:
  784. if (si_assignment_cnt == confirmed_assignments) {
  785. return_to_idle (sg);
  786. amf_application_sg_assigned (sg->application, sg);
  787. } else {
  788. dprintf ("%d, %d", si_assignment_cnt, confirmed_assignments);
  789. }
  790. break;
  791. case SG_AC_AssigningStandBy:
  792. {
  793. if (si_assignment_cnt == confirmed_assignments) {
  794. return_to_idle (sg);
  795. }
  796. break;
  797. }
  798. default:
  799. dprintf ("%d, %d, %d", sg->avail_state, si_assignment_cnt,
  800. confirmed_assignments);
  801. amf_runtime_attributes_print (amf_cluster);
  802. assert (0);
  803. }
  804. }
  805. static inline int div_round (int a, int b)
  806. {
  807. int res;
  808. assert (b != 0);
  809. res = a / b;
  810. if ((a % b) != 0)
  811. res++;
  812. return res;
  813. }
  814. static int all_su_has_presence_state (
  815. struct amf_sg *sg, struct amf_node *node_to_start,
  816. SaAmfPresenceStateT state)
  817. {
  818. struct amf_su *su;
  819. int all_set = 1;
  820. for (su = sg->su_head; su != NULL; su = su->next) {
  821. if (su->saAmfSUPresenceState != state) {
  822. if (node_to_start == NULL) {
  823. all_set = 0;
  824. break;
  825. } else {
  826. if (name_match(&node_to_start->name,
  827. &su->saAmfSUHostedByNode)) {
  828. all_set = 0;
  829. break;
  830. }
  831. }
  832. }
  833. }
  834. return all_set;
  835. }
  836. static int all_su_in_scope_has_presence_state (
  837. struct amf_sg *sg, SaAmfPresenceStateT state)
  838. {
  839. struct amf_su **sus= sg->recovery_scope.sus;
  840. while (*sus != NULL) {
  841. if ((*sus)->saAmfSUPresenceState != state) {
  842. break;
  843. }
  844. sus++;
  845. }
  846. return(*sus == NULL);
  847. }
  848. /**
  849. * Get number of SIs protected by the specified SG.
  850. * @param sg
  851. *
  852. * @return int
  853. */
  854. static int sg_si_count_get (struct amf_sg *sg)
  855. {
  856. struct amf_si *si;
  857. int cnt = 0;
  858. for (si = sg->application->si_head; si != NULL; si = si->next) {
  859. if (name_match (&si->saAmfSIProtectedbySG, &sg->name)) {
  860. cnt += 1;
  861. }
  862. }
  863. return(cnt);
  864. }
  865. int amf_si_get_saAmfSINumReqActiveAssignments(struct amf_si *si)
  866. {
  867. struct amf_si_assignment *si_assignment = si->assigned_sis;
  868. int number_of_req_active_assignments = 0;
  869. for (; si_assignment != NULL; si_assignment = si_assignment->next) {
  870. if (si_assignment->requested_ha_state == SA_AMF_HA_ACTIVE) {
  871. number_of_req_active_assignments++;
  872. }
  873. }
  874. return number_of_req_active_assignments;
  875. }
  876. int amf_si_get_saAmfSINumReqStandbyAssignments(struct amf_si *si)
  877. {
  878. struct amf_si_assignment *si_assignment = si->assigned_sis;
  879. int number_of_req_active_assignments = 0;
  880. for (; si_assignment != NULL; si_assignment = si_assignment->next) {
  881. if (si_assignment->requested_ha_state == SA_AMF_HA_STANDBY) {
  882. number_of_req_active_assignments++;
  883. }
  884. }
  885. return number_of_req_active_assignments;
  886. }
  887. static int sg_assign_nm_active (struct amf_sg *sg, int su_active_assign)
  888. {
  889. struct amf_su *su;
  890. struct amf_si *si;
  891. int assigned = 0;
  892. int assign_to_su = 0;
  893. int total_assigned = 0;
  894. int si_left;
  895. int si_total;
  896. int su_left_to_assign = su_active_assign;
  897. si_total = sg_si_count_get (sg);
  898. si_left = si_total;
  899. assign_to_su = div_round (si_left, su_active_assign);
  900. if (assign_to_su > sg->saAmfSGMaxActiveSIsperSUs) {
  901. assign_to_su = sg->saAmfSGMaxActiveSIsperSUs;
  902. }
  903. su = sg->su_head;
  904. while (su != NULL && su_left_to_assign > 0) {
  905. if (amf_su_get_saAmfSUReadinessState (su) !=
  906. SA_AMF_READINESS_IN_SERVICE ||
  907. amf_su_get_saAmfSUNumCurrActiveSIs (su) ==
  908. assign_to_su ||
  909. amf_su_get_saAmfSUNumCurrStandbySIs (su) > 0) {
  910. su = su->next;
  911. continue; /* Not in service */
  912. }
  913. si = sg->application->si_head;
  914. assigned = 0;
  915. assign_to_su = div_round (si_left, su_left_to_assign);
  916. if (assign_to_su > sg->saAmfSGMaxActiveSIsperSUs) {
  917. assign_to_su = sg->saAmfSGMaxActiveSIsperSUs;
  918. }
  919. while (si != NULL) {
  920. if (name_match (&si->saAmfSIProtectedbySG, &sg->name) &&
  921. assigned < assign_to_su &&
  922. amf_si_get_saAmfSINumReqActiveAssignments(si) == 0) {
  923. assigned += 1;
  924. total_assigned += 1;
  925. amf_su_assign_si (su, si, SA_AMF_HA_ACTIVE);
  926. }
  927. si = si->next;
  928. }
  929. su = su->next;
  930. su_left_to_assign -= 1;
  931. si_left -= assigned;
  932. dprintf (" su_left_to_assign =%d, si_left=%d\n",
  933. su_left_to_assign, si_left);
  934. }
  935. assert (total_assigned <= si_total);
  936. if (total_assigned == 0) {
  937. dprintf ("Info: No SIs assigned");
  938. }
  939. LEAVE();
  940. return total_assigned;
  941. }
  942. static int sg_assign_nm_standby (struct amf_sg *sg, int su_standby_assign)
  943. {
  944. struct amf_su *su;
  945. struct amf_si *si;
  946. int assigned = 0;
  947. int assign_to_su = 0;
  948. int total_assigned = 0;
  949. int si_left;
  950. int si_total;
  951. int su_left_to_assign = su_standby_assign;
  952. ENTER ("'%s'", sg->name.value);
  953. if (su_standby_assign == 0) {
  954. return 0;
  955. }
  956. si_total = sg_si_count_get (sg);
  957. si_left = si_total;
  958. assign_to_su = div_round (si_left, su_standby_assign);
  959. if (assign_to_su > sg->saAmfSGMaxStandbySIsperSUs) {
  960. assign_to_su = sg->saAmfSGMaxStandbySIsperSUs;
  961. }
  962. su = sg->su_head;
  963. while (su != NULL && su_left_to_assign > 0) {
  964. if (amf_su_get_saAmfSUReadinessState (su) !=
  965. SA_AMF_READINESS_IN_SERVICE ||
  966. amf_su_get_saAmfSUNumCurrActiveSIs (su) > 0 ||
  967. amf_su_get_saAmfSUNumCurrStandbySIs (su) ==
  968. assign_to_su) {
  969. su = su->next;
  970. continue; /* Not available for assignment */
  971. }
  972. si = sg->application->si_head;
  973. assigned = 0;
  974. assign_to_su = div_round (si_left, su_left_to_assign);
  975. if (assign_to_su > sg->saAmfSGMaxStandbySIsperSUs) {
  976. assign_to_su = sg->saAmfSGMaxStandbySIsperSUs;
  977. }
  978. while (si != NULL) {
  979. if (name_match (&si->saAmfSIProtectedbySG, &sg->name) &&
  980. assigned < assign_to_su &&
  981. amf_si_get_saAmfSINumReqStandbyAssignments (si) == 0) {
  982. assigned += 1;
  983. total_assigned += 1;
  984. amf_su_assign_si (su, si, SA_AMF_HA_STANDBY);
  985. }
  986. si = si->next;
  987. }
  988. su_left_to_assign -= 1;
  989. si_left -= assigned;
  990. dprintf (" su_left_to_assign =%d, si_left=%d\n",
  991. su_left_to_assign, si_left);
  992. su = su->next;
  993. }
  994. assert (total_assigned <= si_total);
  995. if (total_assigned == 0) {
  996. dprintf ("Info: No SIs assigned!");
  997. }
  998. return total_assigned;
  999. }
  1000. static int su_inservice_count_get (struct amf_sg *sg)
  1001. {
  1002. struct amf_su *su;
  1003. int answer = 0;
  1004. for (su = sg->su_head; su != NULL; su = su->next) {
  1005. if (amf_su_get_saAmfSUReadinessState (su) ==
  1006. SA_AMF_READINESS_IN_SERVICE) {
  1007. answer += 1;
  1008. }
  1009. }
  1010. return(answer);
  1011. }
  1012. /**
  1013. * TODO: dependency_level not used, hard coded
  1014. * @param sg
  1015. * @param dependency_level
  1016. */
  1017. static int assign_si (struct amf_sg *sg, int dependency_level)
  1018. {
  1019. int active_sus_needed = 0;
  1020. int standby_sus_needed = 0;
  1021. int inservice_count;
  1022. int su_active_assign;
  1023. int su_standby_assign;
  1024. int su_spare_assign;
  1025. int assigned = 0;
  1026. ENTER ("'%s'", sg->name.value);
  1027. /**
  1028. * Phase 1: Calculate assignments and create all runtime objects in
  1029. * information model. Do not do the actual assignment, done in
  1030. * phase 2.
  1031. */
  1032. /**
  1033. * Calculate number of SUs to assign to active or standby state
  1034. */
  1035. inservice_count = su_inservice_count_get (sg);
  1036. if (sg->saAmfSGNumPrefActiveSUs > 0) {
  1037. active_sus_needed = div_round (
  1038. sg_si_count_get (sg),
  1039. sg->saAmfSGMaxActiveSIsperSUs);
  1040. } else {
  1041. log_printf (LOG_LEVEL_ERROR, "ERROR: saAmfSGNumPrefActiveSUs == 0 !!");
  1042. openais_exit_error (AIS_DONE_FATAL_ERR);
  1043. }
  1044. if (sg->saAmfSGNumPrefStandbySUs > 0) {
  1045. standby_sus_needed = div_round (
  1046. sg_si_count_get (sg),
  1047. sg->saAmfSGMaxStandbySIsperSUs);
  1048. } else {
  1049. log_printf (LOG_LEVEL_ERROR, "ERROR: saAmfSGNumPrefStandbySUs == 0 !!");
  1050. openais_exit_error (AIS_DONE_FATAL_ERR);
  1051. }
  1052. dprintf ("(inservice=%d) (active_sus_needed=%d) (standby_sus_needed=%d)"
  1053. "\n",
  1054. inservice_count, active_sus_needed, standby_sus_needed);
  1055. /* Determine number of active and standby service units
  1056. * to assign based upon reduction procedure
  1057. */
  1058. if ((inservice_count < active_sus_needed)) {
  1059. dprintf ("assignment VI - partial assignment with SIs drop outs\n");
  1060. su_active_assign = inservice_count;
  1061. su_standby_assign = 0;
  1062. su_spare_assign = 0;
  1063. } else
  1064. if ((inservice_count < active_sus_needed + standby_sus_needed)) {
  1065. dprintf ("assignment V - partial assignment with reduction of"
  1066. " standby units\n");
  1067. su_active_assign = active_sus_needed;
  1068. su_standby_assign = inservice_count - active_sus_needed;
  1069. su_spare_assign = 0;
  1070. } else
  1071. if ((inservice_count < sg->saAmfSGNumPrefActiveSUs + standby_sus_needed)) {
  1072. dprintf ("IV: full assignment with reduction of active service"
  1073. " units\n");
  1074. su_active_assign = inservice_count - standby_sus_needed;
  1075. su_standby_assign = standby_sus_needed;
  1076. su_spare_assign = 0;
  1077. } else
  1078. if ((inservice_count <
  1079. sg->saAmfSGNumPrefActiveSUs + sg->saAmfSGNumPrefStandbySUs)) {
  1080. dprintf ("III: full assignment with reduction of standby service"
  1081. " units\n");
  1082. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  1083. su_standby_assign = inservice_count - sg->saAmfSGNumPrefActiveSUs;
  1084. su_spare_assign = 0;
  1085. } else
  1086. if ((inservice_count ==
  1087. sg->saAmfSGNumPrefActiveSUs + sg->saAmfSGNumPrefStandbySUs)) {
  1088. if (sg->saAmfSGNumPrefInserviceSUs > inservice_count) {
  1089. dprintf ("II: full assignment with spare reduction\n");
  1090. } else {
  1091. dprintf ("II: full assignment without spares\n");
  1092. }
  1093. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  1094. su_standby_assign = sg->saAmfSGNumPrefStandbySUs;
  1095. su_spare_assign = 0;
  1096. } else {
  1097. dprintf ("I: full assignment with spares\n");
  1098. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  1099. su_standby_assign = sg->saAmfSGNumPrefStandbySUs;
  1100. su_spare_assign = inservice_count -
  1101. sg->saAmfSGNumPrefActiveSUs - sg->saAmfSGNumPrefStandbySUs;
  1102. }
  1103. dprintf ("(inservice=%d) (assigning active=%d) (assigning standby=%d)"
  1104. " (assigning spares=%d)\n",
  1105. inservice_count, su_active_assign, su_standby_assign, su_spare_assign);
  1106. if (inservice_count > 0) {
  1107. assigned = sg_assign_nm_active (sg, su_active_assign);
  1108. assigned += sg_assign_nm_standby (sg, su_standby_assign);
  1109. #if 0
  1110. assert (assigned > 0);
  1111. #endif
  1112. sg->saAmfSGNumCurrAssignedSUs = inservice_count;
  1113. /**
  1114. * Phase 2: do the actual assignment to the component
  1115. * TODO: first do active, then standby
  1116. */
  1117. {
  1118. struct amf_si *si;
  1119. struct amf_si_assignment *si_assignment;
  1120. for (si = sg->application->si_head; si != NULL; si = si->next) {
  1121. if (name_match (&si->saAmfSIProtectedbySG, &sg->name)) {
  1122. for (si_assignment = si->assigned_sis;
  1123. si_assignment != NULL;
  1124. si_assignment = si_assignment->next) {
  1125. if (si_assignment->requested_ha_state !=
  1126. si_assignment->saAmfSISUHAState) {
  1127. amf_si_ha_state_assume (
  1128. si_assignment, assign_si_assumed_cbfn);
  1129. }
  1130. }
  1131. }
  1132. }
  1133. }
  1134. }
  1135. LEAVE ("'%s'", sg->name.value);
  1136. return assigned;
  1137. }
  1138. void amf_sg_assign_si (struct amf_sg *sg, int dependency_level)
  1139. {
  1140. sg->avail_state = SG_AC_AssigningOnRequest;
  1141. if (assign_si (sg, dependency_level) == 0) {
  1142. return_to_idle (sg);
  1143. amf_application_sg_assigned (sg->application, sg);
  1144. }
  1145. }
  1146. void amf_sg_failover_node_req (
  1147. struct amf_sg *sg, struct amf_node *node)
  1148. {
  1149. ENTER("'%s, %s'",node->name.value, sg->name.value);
  1150. /*
  1151. * TODO: Defer all new events. Workaround is to exit.
  1152. */
  1153. if (sg->avail_state != SG_AC_Idle) {
  1154. log_printf (LOG_LEVEL_ERROR, "To handle multiple simultaneous SG"
  1155. " recovery actions is not implemented yet:"
  1156. " SG '%s', NODE '%s', avail_state %d",
  1157. sg->name.value, node->name.value, sg->avail_state);
  1158. openais_exit_error (AIS_DONE_FATAL_ERR);
  1159. return;
  1160. }
  1161. set_scope_for_failover_node(sg, node);
  1162. if (has_any_su_in_scope_active_workload (sg)) {
  1163. acsm_enter_deactivating_dependent_workload (sg);
  1164. } else {
  1165. struct amf_su **sus = sg->recovery_scope.sus;
  1166. /* Select next state depending on if some SU in the scope is
  1167. * needs to be terminated.
  1168. */
  1169. while (*sus != NULL) {
  1170. ENTER("SU %s pr_state='%d'",(*sus)->name.value,
  1171. (*sus)->saAmfSUPresenceState);
  1172. if (((*sus)->saAmfSUPresenceState ==
  1173. SA_AMF_PRESENCE_UNINSTANTIATED) ||
  1174. ((*sus)->saAmfSUPresenceState ==
  1175. SA_AMF_PRESENCE_TERMINATION_FAILED) ||
  1176. ((*sus)->saAmfSUPresenceState ==
  1177. SA_AMF_PRESENCE_INSTANTIATION_FAILED)) {
  1178. sus++;
  1179. continue;
  1180. }
  1181. break;
  1182. }
  1183. if (*sus != NULL) {
  1184. acsm_enter_terminating_suspected (sg);
  1185. } else {
  1186. delete_si_assignments_in_scope (sg);
  1187. return_to_idle (sg);
  1188. }
  1189. }
  1190. }
  1191. void amf_sg_start (struct amf_sg *sg, struct amf_node *node)
  1192. {
  1193. struct amf_su *su;
  1194. sg_avail_control_state_t old_avail_state = sg->avail_state;
  1195. int instantiated_sus = 0;
  1196. ENTER ("'%s'", sg->name.value);
  1197. sg->node_to_start = node;
  1198. sg->avail_state = SG_AC_InstantiatingServiceUnits;
  1199. for (su = sg->su_head; su != NULL; su = su->next) {
  1200. if (node == NULL) {
  1201. /* Cluster start */
  1202. amf_su_instantiate (su);
  1203. instantiated_sus++;
  1204. } else {
  1205. /* Node start, match if SU is hosted on the specified node*/
  1206. if (name_match (&node->name, &su->saAmfSUHostedByNode)) {
  1207. amf_su_instantiate (su);
  1208. instantiated_sus++;
  1209. }
  1210. }
  1211. }
  1212. if (instantiated_sus == 0) {
  1213. sg->avail_state = old_avail_state;
  1214. }
  1215. }
  1216. void amf_sg_su_state_changed (struct amf_sg *sg,
  1217. struct amf_su *su, SaAmfStateT type, int state)
  1218. {
  1219. ENTER ("'%s' SU '%s' state %s",
  1220. sg->name.value, su->name.value, amf_presence_state(state));
  1221. if (type == SA_AMF_PRESENCE_STATE) {
  1222. if (state == SA_AMF_PRESENCE_INSTANTIATED) {
  1223. if (sg->avail_state == SG_AC_InstantiatingServiceUnits) {
  1224. if (all_su_has_presence_state(sg, sg->node_to_start,
  1225. SA_AMF_PRESENCE_INSTANTIATED)) {
  1226. su->sg->avail_state = SG_AC_Idle;
  1227. amf_application_sg_started (
  1228. sg->application, sg, this_amf_node);
  1229. }
  1230. } else if (sg->avail_state == SG_AC_ReparingSu) {
  1231. if (all_su_in_scope_has_presence_state(su->sg,
  1232. SA_AMF_PRESENCE_INSTANTIATED)) {
  1233. su->sg->avail_state = SG_AC_AssigningStandBy;
  1234. if (assign_si (sg, 0) == 0) {
  1235. return_to_idle (sg);
  1236. }
  1237. } else {
  1238. dprintf ("avail-state: %u", sg->avail_state);
  1239. assert (0);
  1240. }
  1241. } else {
  1242. assert (0);
  1243. }
  1244. } else if (state == SA_AMF_PRESENCE_UNINSTANTIATED) {
  1245. if (sg->avail_state == SG_AC_TerminatingSuspected) {
  1246. if (all_su_in_scope_has_presence_state (sg, state)) {
  1247. delete_si_assignments_in_scope (sg);
  1248. if (is_standby_for_non_active_si_in_scope (sg)) {
  1249. acsm_enter_activating_standby (sg);
  1250. } else {
  1251. /*
  1252. * TODO: create SI assignment to spare and assign them
  1253. */
  1254. sg->avail_state = SG_AC_AssigningStandbyToSpare;
  1255. acsm_enter_repairing_su (sg);
  1256. }
  1257. }
  1258. } else {
  1259. assert (0);
  1260. }
  1261. } else {
  1262. assert (0);
  1263. }
  1264. } else {
  1265. assert (0);
  1266. }
  1267. }
  1268. void amf_sg_init (void)
  1269. {
  1270. log_init ("AMF");
  1271. }
  1272. void amf_sg_failover_su_req (
  1273. struct amf_sg *sg, struct amf_su *su, struct amf_node *node)
  1274. {
  1275. ENTER ("");
  1276. /*
  1277. * TODO: Defer all new events. Workaround is to exit.
  1278. */
  1279. if (sg->avail_state != SG_AC_Idle) {
  1280. log_printf (LOG_LEVEL_ERROR, "To handle multiple simultaneous SG"
  1281. " recovery actions is not implemented yet:"
  1282. " SG '%s', SU '%s', avail_state %d",
  1283. sg->name.value, su->name.value, sg->avail_state);
  1284. openais_exit_error (AIS_DONE_FATAL_ERR);
  1285. return;
  1286. }
  1287. set_scope_for_failover_su (sg, su);
  1288. if (has_any_su_in_scope_active_workload (sg)) {
  1289. acsm_enter_deactivating_dependent_workload (sg);
  1290. } else {
  1291. acsm_enter_terminating_suspected (sg);
  1292. }
  1293. }
  1294. /**
  1295. * Constructor for SG objects. Adds SG to the list owned by
  1296. * the specified application. Always returns a valid SG
  1297. * object, out-of-memory problems are handled here. Default
  1298. * values are initialized.
  1299. * @param sg
  1300. * @param name
  1301. *
  1302. * @return struct amf_sg*
  1303. */
  1304. struct amf_sg *amf_sg_new (struct amf_application *app, char *name)
  1305. {
  1306. struct amf_sg *sg = calloc (1, sizeof (struct amf_sg));
  1307. if (sg == NULL) {
  1308. openais_exit_error (AIS_DONE_OUT_OF_MEMORY);
  1309. }
  1310. sg->next = app->sg_head;
  1311. app->sg_head = sg;
  1312. sg->saAmfSGAdminState = SA_AMF_ADMIN_UNLOCKED;
  1313. sg->saAmfSGNumPrefActiveSUs = 1;
  1314. sg->saAmfSGNumPrefStandbySUs = 1;
  1315. sg->saAmfSGNumPrefInserviceSUs = ~0;
  1316. sg->saAmfSGNumPrefAssignedSUs = ~0;
  1317. sg->saAmfSGCompRestartProb = -1;
  1318. sg->saAmfSGCompRestartMax = ~0;
  1319. sg->saAmfSGSuRestartProb = -1;
  1320. sg->saAmfSGSuRestartMax = ~0;
  1321. sg->saAmfSGAutoAdjustProb = -1;
  1322. sg->saAmfSGAutoRepair = SA_TRUE;
  1323. sg->application = app;
  1324. setSaNameT (&sg->name, name);
  1325. sg->node_to_start = NULL;
  1326. return sg;
  1327. }
  1328. void amf_sg_delete (struct amf_sg *sg)
  1329. {
  1330. struct amf_su *su;
  1331. for (su = sg->su_head; su != NULL;) {
  1332. struct amf_su *tmp = su;
  1333. su = su->next;
  1334. amf_su_delete (tmp);
  1335. }
  1336. free (sg);
  1337. }
  1338. void *amf_sg_serialize (struct amf_sg *sg, int *len)
  1339. {
  1340. char *buf = NULL;
  1341. int offset = 0, size = 0;
  1342. TRACE8 ("%s", sg->name.value);
  1343. buf = amf_serialize_SaNameT (buf, &size, &offset, &sg->name);
  1344. buf = amf_serialize_SaUint32T (buf, &size, &offset, sg->saAmfSGRedundancyModel);
  1345. buf = amf_serialize_SaUint32T (
  1346. buf, &size, &offset, sg->saAmfSGAutoAdjust);
  1347. buf = amf_serialize_SaUint32T (
  1348. buf, &size, &offset, sg->saAmfSGNumPrefActiveSUs);
  1349. buf = amf_serialize_SaUint32T (
  1350. buf, &size, &offset, sg->saAmfSGNumPrefStandbySUs);
  1351. buf = amf_serialize_SaUint32T (
  1352. buf, &size, &offset, sg->saAmfSGNumPrefInserviceSUs);
  1353. buf = amf_serialize_SaUint32T (
  1354. buf, &size, &offset, sg->saAmfSGNumPrefAssignedSUs);
  1355. buf = amf_serialize_SaUint32T (
  1356. buf, &size, &offset, sg->saAmfSGMaxActiveSIsperSUs);
  1357. buf = amf_serialize_SaUint32T (
  1358. buf, &size, &offset, sg->saAmfSGMaxStandbySIsperSUs);
  1359. buf = amf_serialize_SaUint32T (
  1360. buf, &size, &offset, sg->saAmfSGCompRestartProb);
  1361. buf = amf_serialize_SaUint32T (
  1362. buf, &size, &offset, sg->saAmfSGCompRestartMax);
  1363. buf = amf_serialize_SaUint32T (
  1364. buf, &size, &offset, sg->saAmfSGSuRestartProb);
  1365. buf = amf_serialize_SaUint32T (
  1366. buf, &size, &offset, sg->saAmfSGSuRestartMax);
  1367. buf = amf_serialize_SaUint32T (
  1368. buf, &size, &offset, sg->saAmfSGAutoAdjustProb);
  1369. buf = amf_serialize_SaUint32T (
  1370. buf, &size, &offset, sg->saAmfSGAutoRepair);
  1371. buf = amf_serialize_SaUint32T (
  1372. buf, &size, &offset, sg->saAmfSGAdminState);
  1373. buf = amf_serialize_SaUint32T (
  1374. buf, &size, &offset, sg->saAmfSGNumCurrAssignedSUs);
  1375. buf = amf_serialize_SaUint32T (
  1376. buf, &size, &offset, sg->saAmfSGNumCurrNonInstantiatedSpareSUs);
  1377. buf = amf_serialize_SaUint32T (
  1378. buf, &size, &offset, sg->saAmfSGNumCurrInstantiatedSpareSUs);
  1379. buf = amf_serialize_SaStringT (
  1380. buf, &size, &offset, sg->clccli_path);
  1381. buf = amf_serialize_SaUint32T (
  1382. buf, &size, &offset, sg->avail_state);
  1383. *len = offset;
  1384. return buf;
  1385. }
  1386. struct amf_sg *amf_sg_deserialize (
  1387. struct amf_application *app, char *buf, int size)
  1388. {
  1389. char *tmp = buf;
  1390. struct amf_sg *sg;
  1391. sg = amf_sg_new (app, "");
  1392. tmp = amf_deserialize_SaNameT (tmp, &sg->name);
  1393. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGRedundancyModel);
  1394. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGAutoAdjust);
  1395. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumPrefActiveSUs);
  1396. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumPrefStandbySUs);
  1397. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumPrefInserviceSUs);
  1398. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumPrefAssignedSUs);
  1399. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGMaxActiveSIsperSUs);
  1400. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGMaxStandbySIsperSUs);
  1401. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGCompRestartProb);
  1402. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGCompRestartMax);
  1403. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGSuRestartProb);
  1404. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGSuRestartMax);
  1405. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGAutoAdjustProb);
  1406. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGAutoRepair);
  1407. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGAdminState);
  1408. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumCurrAssignedSUs);
  1409. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumCurrNonInstantiatedSpareSUs);
  1410. tmp = amf_deserialize_SaUint32T (tmp, &sg->saAmfSGNumCurrInstantiatedSpareSUs);
  1411. tmp = amf_deserialize_SaStringT (tmp, &sg->clccli_path);
  1412. tmp = amf_deserialize_SaUint32T (tmp, &sg->avail_state);
  1413. return sg;
  1414. }
  1415. struct amf_sg *amf_sg_find (struct amf_application *app, char *name)
  1416. {
  1417. struct amf_sg *sg;
  1418. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  1419. if (sg->name.length == strlen(name) &&
  1420. strncmp (name, (char*)sg->name.value, sg->name.length) == 0) {
  1421. break;
  1422. }
  1423. }
  1424. return sg;
  1425. }