amfsg.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. /** @file amfsg.c
  2. *
  3. * Copyright (c) 2002-2006 MontaVista Software, Inc.
  4. * Author: Steven Dake (sdake@mvista.com)
  5. *
  6. * Copyright (c) 2006 Ericsson AB.
  7. * Author: Hans Feldt
  8. * - Introduced AMF B.02 information model
  9. * - Use DN in API and multicast messages
  10. * - (Re-)Introduction of event based multicast messages
  11. * - Refactoring of code into several AMF files
  12. * Author: Anders Eriksson
  13. *
  14. * All rights reserved.
  15. *
  16. *
  17. * This software licensed under BSD license, the text of which follows:
  18. *
  19. * Redistribution and use in source and binary forms, with or without
  20. * modification, are permitted provided that the following conditions are met:
  21. *
  22. * - Redistributions of source code must retain the above copyright notice,
  23. * this list of conditions and the following disclaimer.
  24. * - Redistributions in binary form must reproduce the above copyright notice,
  25. * this list of conditions and the following disclaimer in the documentation
  26. * and/or other materials provided with the distribution.
  27. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  28. * contributors may be used to endorse or promote products derived from this
  29. * software without specific prior written permission.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  32. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  33. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  34. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  35. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  36. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  37. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  38. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  39. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  41. * THE POSSIBILITY OF SUCH DAMAGE.
  42. *
  43. * AMF Service Group Class Implementation
  44. *
  45. * This file contains functions for handling AMF-service groups(SGs). It can be
  46. * viewed as the implementation of the AMF Service Group class (called SG)
  47. * as described in SAI-Overview-B.02.01. The SA Forum specification
  48. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  49. * and is referred to as 'the spec' below.
  50. *
  51. * The functions in this file are responsible for:
  52. * -on request start the service group by instantiating the contained SUs
  53. * -on request assign the service instances it protects to the in-service
  54. * service units it contains respecting as many as possible of the configured
  55. * requirements for the group
  56. * -create and delete an SI-assignment object for each relation between
  57. * an SI and an SU
  58. * -order each contained SU to create and delete CSI-assignments
  59. * -request the Service Instance class (SI) to execute the transfer of the
  60. * HA-state set/remove requests to each component involved
  61. * -fully control the execution of component failover and SU failover
  62. * -on request control the execution of the initial steps of node switchover
  63. * and node failover
  64. * -fully handle the auto adjust procedure
  65. *
  66. * Currently only the 'n+m' redundancy model is implemented. It is the
  67. * ambition to identify n+m specific variables and functions and add the suffix
  68. * '_nplusm' to them so that they can be easily recognized.
  69. *
  70. * When SG is requested to assign workload to all SUs or all SUs hosted on
  71. * a specific node, a procedure containing several steps is executed:
  72. * <1> An algorithm is executed which assigns SIs to SUs respecting the rules
  73. * that has been configured for SG. The algorithm also has to consider
  74. * if assignments between som SIs and SUs already exist. The scope of this
  75. * algorithm is to create SI-assignments and set up requested HA-state for
  76. * each assignment but not to transfer those HA-states to the components.
  77. * <2> All SI-assignments with a requested HA state == ACTIVE are transferred
  78. * to the components concerned before any STANDBY assignments are
  79. * transferred. All components have to acknowledge the setting of the
  80. * ACTIVE HA state before the transfer of any STANDBY assignment is
  81. * initiated.
  82. * <3> All active assignments can not be transferred at the same time to the
  83. * different components because the rules for dependencies between SI and
  84. * SI application wide and CSI and CSI within one SI, has to be respected.
  85. *
  86. * SG is fully responsible for step <1> but not fully responsible for handling
  87. * step <2> and <3>. However, SG uses an attribute called 'dependency level'
  88. * when requsted to assign workload. This parameter refers to an integer that
  89. * has been calculated initially for each SI. The 'dependency level' indicates
  90. * to which extent an SI depends on other SIs such that an SI that depends on
  91. * no other SI is on dependecy_level == 1, an SI that depends only on an SI on
  92. * dependency_level == 1 is on dependency-level == 2.
  93. * An SI that depends on several SIs gets a
  94. * dependency_level that is one unit higher than the SI with the highest
  95. * dependency_level it depends on. When SG is requested to assign the workload
  96. * on a certain dependency level, it requests all SI objects on that level to
  97. * activate (all) SI-assignments that during step <1> has been requested to
  98. * assume the active HA state.
  99. *
  100. * SG contains the following state machines:
  101. * - administrative state machine (ADSM) (NOT IN THIS RELEASE)
  102. * - availability control state machine (ACSM)
  103. *
  104. * The availability control state machine contains two states and one of them
  105. * is composite. Being a composite state means that it contains substates.
  106. * The states are:
  107. * - IDLE (non composite state)
  108. * - MANAGING_SG (composite state)
  109. * MANAGING_SG is entered at several different events which has in common
  110. * the need to set up or change the assignment of SIs to SUs. Only one such
  111. * event can be handled at the time. If new events occur while one event is
  112. * being handled then the new event is saved and will be handled after the
  113. * handling of the first event is ready (return to IDLE state has been done).
  114. * MANAGING_SG handles the following events:
  115. * - start (requests SG to order SU to instantiate all SUs in SG and waits
  116. * for SU to indicate presence state change reports from the SUs and
  117. * finally responds 'started' to the requester)
  118. * - assign (requests SG to assign SIs to SUs according to pre-configured
  119. * rules (if not already done) and transfer the HA state of
  120. * the SIs on the requested SI dependency level. Then SG waits for
  121. * confirmation that the HA state has been succesfully set and
  122. * finally responds 'assigned' to the reqeuster)
  123. * - auto_adjust (this event indicates that the auto-adjust probation timer has
  124. * expired and that SG should evaluate current assignments of
  125. * SIs to SUs and if needed remove current assignments and
  126. * create new according to what is specified in paragraph
  127. * 3.7.1.2)
  128. * - failover_comp (requests SG to failover a specific component according to
  129. * the procedure described in paragraph 3.12.1.3)
  130. * - failover_su (requests SG to failover a specific SU according to the
  131. * procedure described in paragraph 3.12.1.3 and 3.12.1.4)
  132. * - switchover_node (requests SG to execute the recovery actions described
  133. * in 3.12.1.3 and respond to the requester when recovery
  134. * is completed)
  135. * - failover_node (requests SG to execute the recovery actions described
  136. * in 3.12.1.3 and respond to the requester when recovery is
  137. * completed)
  138. *
  139. */
  140. #include <stdlib.h>
  141. #include <errno.h>
  142. #include "amf.h"
  143. #include "print.h"
  144. #include "main.h"
  145. #include "util.h"
  146. static inline int div_round (int a, int b)
  147. {
  148. int res;
  149. res = a / b;
  150. if ((a % b) != 0)
  151. res++;
  152. return res;
  153. }
  154. static int all_su_instantiated(struct amf_sg *sg)
  155. {
  156. struct amf_su *su;
  157. int all_instantiated = 1;
  158. for (su = sg->su_head; su != NULL; su = su->next) {
  159. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
  160. all_instantiated = 0;
  161. break;
  162. }
  163. }
  164. return all_instantiated;
  165. }
  166. static int application_si_count_get (struct amf_application *app)
  167. {
  168. struct amf_si *si;
  169. int answer = 0;
  170. for (si = app->si_head; si != NULL; si = si->next) {
  171. answer += 1;
  172. }
  173. return (answer);
  174. }
  175. static void sg_assign_nm_active (struct amf_sg *sg, int su_units_assign)
  176. {
  177. struct amf_su *unit;
  178. struct amf_si *si;
  179. int assigned = 0;
  180. int assign_per_su = 0;
  181. int total_assigned = 0;
  182. assign_per_su = application_si_count_get (sg->application);
  183. assign_per_su = div_round (assign_per_su, su_units_assign);
  184. if (assign_per_su > sg->saAmfSGMaxActiveSIsperSUs) {
  185. assign_per_su = sg->saAmfSGMaxActiveSIsperSUs;
  186. }
  187. si = sg->application->si_head;
  188. unit = sg->su_head;
  189. while (unit != NULL) {
  190. if (unit->saAmfSUReadinessState != SA_AMF_READINESS_IN_SERVICE ||
  191. unit->saAmfSUNumCurrActiveSIs == sg->saAmfSGMaxActiveSIsperSUs ||
  192. unit->saAmfSUNumCurrStandbySIs > 0) {
  193. unit = unit->next;
  194. continue; /* Not in service */
  195. }
  196. assigned = 0;
  197. while (si != NULL &&
  198. assigned < assign_per_su &&
  199. total_assigned < application_si_count_get (sg->application)) {
  200. assigned += 1;
  201. total_assigned += 1;
  202. amf_su_assign_si (unit, si, SA_AMF_HA_ACTIVE);
  203. si = si->next;
  204. }
  205. unit = unit->next;
  206. }
  207. if (total_assigned == 0) {
  208. dprintf ("Error: No SIs assigned!");
  209. }
  210. }
  211. static void sg_assign_nm_standby (struct amf_sg *sg, int units_assign_standby)
  212. {
  213. struct amf_su *unit;
  214. struct amf_si *si;
  215. int assigned = 0;
  216. int assign_per_su = 0;
  217. int total_assigned = 0;
  218. if (units_assign_standby == 0) {
  219. return;
  220. }
  221. assign_per_su = application_si_count_get (sg->application);
  222. assign_per_su = div_round (assign_per_su, units_assign_standby);
  223. if (assign_per_su > sg->saAmfSGMaxStandbySIsperSUs) {
  224. assign_per_su = sg->saAmfSGMaxStandbySIsperSUs;
  225. }
  226. si = sg->application->si_head;
  227. unit = sg->su_head;
  228. while (unit != NULL) {
  229. if (unit->saAmfSUReadinessState != SA_AMF_READINESS_IN_SERVICE ||
  230. unit->saAmfSUNumCurrActiveSIs > 0 ||
  231. unit->saAmfSUNumCurrStandbySIs == sg->saAmfSGMaxStandbySIsperSUs) {
  232. unit = unit->next;
  233. continue; /* Not available for assignment */
  234. }
  235. assigned = 0;
  236. while (si != NULL && assigned < assign_per_su) {
  237. assigned += 1;
  238. total_assigned += 1;
  239. amf_su_assign_si (unit, si, SA_AMF_HA_STANDBY);
  240. si = si->next;
  241. }
  242. unit = unit->next;
  243. }
  244. if (total_assigned == 0) {
  245. dprintf ("Error: No SIs assigned!");
  246. }
  247. }
  248. #if 0
  249. static void assign_nm_spare (struct amf_sg *sg)
  250. {
  251. struct amf_su *unit;
  252. for (unit = sg->su_head; unit != NULL; unit = unit->next) {
  253. if (unit->saAmfSUReadinessState == SA_AMF_READINESS_IN_SERVICE &&
  254. (unit->requested_ha_state != SA_AMF_HA_ACTIVE &&
  255. unit->requested_ha_state != SA_AMF_HA_STANDBY)) {
  256. dprintf ("Assigning to SU %s with SPARE\n",
  257. getSaNameT (&unit->name));
  258. }
  259. }
  260. }
  261. #endif
  262. static int su_inservice_count_get (struct amf_sg *sg)
  263. {
  264. struct amf_su *unit;
  265. int answer = 0;
  266. for (unit = sg->su_head; unit != NULL; unit = unit->next) {
  267. if (unit->saAmfSUReadinessState == SA_AMF_READINESS_IN_SERVICE) {
  268. answer += 1;
  269. }
  270. }
  271. return (answer);
  272. }
  273. static void si_activated_callback (struct amf_si *si, int result)
  274. {
  275. /*
  276. * TODO: not implemented yet...
  277. */
  278. }
  279. /**
  280. * TODO: dependency_level not used, hard coded
  281. * @param sg
  282. * @param dependency_level
  283. */
  284. void amf_sg_assign_si (struct amf_sg *sg, int dependency_level)
  285. {
  286. int active_sus_needed;
  287. int standby_sus_needed;
  288. int inservice_count;
  289. int units_for_standby;
  290. int units_for_active;
  291. int ii_spare;
  292. int su_active_assign;
  293. int su_standby_assign;
  294. int su_spare_assign;
  295. ENTER ("'%s'", sg->name.value);
  296. /**
  297. * Phase 1: Calculate assignments and create all runtime objects in
  298. * information model. Do not do the actual assignment, done in
  299. * phase 2.
  300. */
  301. /**
  302. * Calculate number of SUs to assign to active or standby state
  303. */
  304. inservice_count = (float)su_inservice_count_get (sg);
  305. active_sus_needed = div_round (application_si_count_get (sg->application),
  306. sg->saAmfSGMaxActiveSIsperSUs);
  307. standby_sus_needed = div_round (application_si_count_get (sg->application),
  308. sg->saAmfSGMaxStandbySIsperSUs);
  309. units_for_active = inservice_count - sg->saAmfSGNumPrefStandbySUs;
  310. if (units_for_active < 0) {
  311. units_for_active = 0;
  312. }
  313. units_for_standby = inservice_count - sg->saAmfSGNumPrefActiveSUs;
  314. if (units_for_standby < 0) {
  315. units_for_standby = 0;
  316. }
  317. ii_spare = inservice_count - sg->saAmfSGNumPrefActiveSUs - sg->saAmfSGNumPrefStandbySUs;
  318. if (ii_spare < 0) {
  319. ii_spare = 0;
  320. }
  321. /**
  322. * Determine number of active and standby service units
  323. * to assign based upon reduction procedure
  324. */
  325. if ((inservice_count - active_sus_needed) < 0) {
  326. dprintf ("assignment VI - partial assignment with SIs drop outs\n");
  327. su_active_assign = active_sus_needed;
  328. su_standby_assign = 0;
  329. su_spare_assign = 0;
  330. } else
  331. if ((inservice_count - active_sus_needed - standby_sus_needed) < 0) {
  332. dprintf ("assignment V - partial assignment with reduction of standby units\n");
  333. su_active_assign = active_sus_needed;
  334. if (standby_sus_needed > units_for_standby) {
  335. su_standby_assign = units_for_standby;
  336. } else {
  337. su_standby_assign = standby_sus_needed;
  338. }
  339. su_spare_assign = 0;
  340. } else
  341. if ((sg->saAmfSGMaxStandbySIsperSUs * units_for_standby) <= application_si_count_get (sg->application)) {
  342. dprintf ("IV: full assignment with reduction of active service units\n");
  343. su_active_assign = inservice_count - standby_sus_needed;
  344. su_standby_assign = standby_sus_needed;
  345. su_spare_assign = 0;
  346. } else
  347. if ((sg->saAmfSGMaxActiveSIsperSUs * units_for_active) <= application_si_count_get (sg->application)) {
  348. dprintf ("III: full assignment with reduction of standby service units\n");
  349. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  350. su_standby_assign = units_for_standby;
  351. su_spare_assign = 0;
  352. } else
  353. if (ii_spare == 0) {
  354. dprintf ("II: full assignment with spare reduction\n");
  355. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  356. su_standby_assign = sg->saAmfSGNumPrefStandbySUs;
  357. su_spare_assign = 0;
  358. } else {
  359. dprintf ("I: full assignment with spares\n");
  360. su_active_assign = sg->saAmfSGNumPrefActiveSUs;
  361. su_standby_assign = sg->saAmfSGNumPrefStandbySUs;
  362. su_spare_assign = ii_spare;
  363. }
  364. dprintf ("(inservice=%d) (assigning active=%d) (assigning standby=%d) (assigning spares=%d)\n",
  365. inservice_count, su_active_assign, su_standby_assign, su_spare_assign);
  366. sg_assign_nm_active (sg, su_active_assign);
  367. sg_assign_nm_standby (sg, su_standby_assign);
  368. /**
  369. * Phase 2: do the actual assignment to the component
  370. */
  371. {
  372. struct amf_si *si;
  373. for (si = sg->application->si_head; si != NULL; si = si->next) {
  374. if (name_match (&si->saAmfSIProtectedbySG, &sg->name)) {
  375. amf_si_activate (si, si_activated_callback);
  376. }
  377. }
  378. }
  379. LEAVE ("'%s'", sg->name.value);
  380. }
  381. void amf_sg_start (struct amf_sg *sg, struct amf_node *node)
  382. {
  383. struct amf_su *su;
  384. ENTER ("'%s'", sg->name.value);
  385. for (su = sg->su_head; su != NULL; su = su->next) {
  386. amf_su_instantiate (su);
  387. }
  388. }
  389. void amf_sg_su_state_changed (
  390. struct amf_sg *sg, struct amf_su *su, SaAmfStateT type, int state)
  391. {
  392. ENTER ("'%s' SU '%s' state %d", sg->name.value, su->name.value, state);
  393. if (type == SA_AMF_PRESENCE_STATE) {
  394. if (state == SA_AMF_PRESENCE_INSTANTIATED) {
  395. /*
  396. * If all SU presence states are INSTANTIATED, report to SG.
  397. */
  398. if (all_su_instantiated(su->sg)) {
  399. amf_application_sg_started (sg->application, sg, this_amf_node);
  400. }
  401. } else {
  402. assert (0);
  403. }
  404. } else {
  405. assert (0);
  406. }
  407. }
  408. void amf_sg_init (void)
  409. {
  410. log_init ("AMF");
  411. }
  412. void amf_sg_si_activated (struct amf_sg *sg, struct amf_si *si)
  413. {
  414. ENTER ("");
  415. amf_application_sg_assigned (sg->application, sg);
  416. }