amfcluster.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /** @file amfcluster.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Refactoring of code into several AMF files
  6. * - Constructors/destructors
  7. * - Serializers/deserializers
  8. *
  9. * All rights reserved.
  10. *
  11. *
  12. * This software licensed under BSD license, the text of which follows:
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  23. * contributors may be used to endorse or promote products derived from this
  24. * software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  36. * THE POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * AMF Cluster Class Implementation
  39. *
  40. * This file contains functions for handling the AMF cluster. It can be
  41. * viewed as the implementation of the AMF Cluster class
  42. * as described in SAI-Overview-B.02.01. The SA Forum specification
  43. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  44. * and is referred to as 'the spec' below.
  45. *
  46. * The functions in this file are responsible for:
  47. * - to start the cluster initially
  48. * - to handle the administrative operation support for the cluster (FUTURE)
  49. *
  50. * The cluster class contains the following state machines:
  51. * - administrative state machine (ADSM)
  52. * - availability control state machine (ACSM)
  53. *
  54. * The administrative state machine will be implemented in the future.
  55. *
  56. * ACSM handles initial start of the cluster. In the future it will also handle
  57. * administrative commands on the cluster as described in paragraph 7.4 of the
  58. * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
  59. * number of states to control the transition between the stable states.
  60. *
  61. * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
  62. * future this state will also be assumed after the LOCK_INSTANTIATION
  63. * administrative command.)
  64. *
  65. * State STARTED is assumed when the cluster has been initially started and
  66. * will in the future be re-assumed after the administrative command RESTART
  67. * have been executed.
  68. *
  69. * 1. Cluster Availability Control State Machine
  70. * =============================================
  71. *
  72. * 1.1 State Transition Table
  73. *
  74. * State: Event: Action: New state:
  75. * ===========================================================================
  76. * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
  77. * STARTING_APPS sync_ready A2,A1 STARTING_APPS
  78. * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
  79. * STARTING_APPS local_timer_expired A8 STARTING_APPS
  80. * STARTING_APPS time_out [C2] A7,A3 ASSIGNING_WORKLOAD
  81. * STARTING_APPS time_out A7 WAITING_OVERTIME
  82. * WAITING_OVERTIME sync_ready A4 WAITING_OVERTIME
  83. * WAITING_OVERTIME app_started A3 ASSIGNING_WORKLOAD
  84. * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
  85. * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
  86. * STARTED sync_ready A8 STARTED
  87. *
  88. * 1.2 State Description
  89. * =====================
  90. * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
  91. * STARTING_APPLICATIONS - All applications have been requested to start
  92. * their contained SGs, which in its turn has requested
  93. * their contained SUs to instantiate all their
  94. * components. The cluster startup timer is running.
  95. * WAITING_OVERTIME - The cluster startup timer has expired but all
  96. * applications have yet not responded that they have been
  97. * started. Cluster will wait infinitely for the
  98. * applications to respond. It is correct to do so even when
  99. * the startup timer has expired, because the applications
  100. * will report they are started as soon as there is no
  101. * attempt to instantiate any of its components pending,
  102. * because attempts to instantiate a component can not go on
  103. * forever, see saAmfCompInstantiateTimeout,
  104. * saAmfCompNumMaxInstantiateWithoutDelay and
  105. * saAmfCompNumMaxInstantiateWithDelay.
  106. * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
  107. * specified workload to it's service units according to
  108. * the redundancy model specified by it's SGs.
  109. * STARTED - A best effort has been made to instatiate the components of all
  110. * applications and assign the specified workload as close as possible
  111. * to what is described in the configuration.
  112. *
  113. * 1.3 Actions
  114. * ===========
  115. * A1 - [foreach application in cluster]/start application
  116. * A2 - start cluster startup timer
  117. * A3 - [foreach application in cluster]/assign workload to application
  118. * A4 - defer sync_ready event
  119. * A5 - forward sync_ready to appropriate node object
  120. * A6 - recall deferred event
  121. * A7 - stop node local instance of cluster startup timer
  122. * A8 - multicast 'cluster startup timer time-out' event
  123. *
  124. * 1.4 Guards
  125. * ==========
  126. * C1 - No sg has availability control state == INSTANTIATING_SERVICE_UNITS
  127. * C2 - No application has Availability Control state == STARTING_SGS
  128. * C3 - All SGs are fully instantiated
  129. */
  130. #include <stdlib.h>
  131. #include <errno.h>
  132. #include "print.h"
  133. #include "amf.h"
  134. #include "util.h"
  135. #include "main.h"
  136. #include "service.h"
  137. /**
  138. * Determine if all applications are started so that all
  139. * SUs is in SA_AMF_PRESENCE_INSTANTIATED prsense state
  140. * @param cluster
  141. *
  142. * @return int
  143. */
  144. static int cluster_applications_started_instantiated (struct amf_cluster *cluster)
  145. {
  146. int all_started = 1;
  147. struct amf_application *app;
  148. struct amf_sg *sg;
  149. struct amf_su *su;
  150. for (app = cluster->application_head; app != NULL; app = app->next) {
  151. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  152. for (su = sg->su_head; su != NULL; su = su->next) {
  153. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
  154. all_started = 0;
  155. goto done;
  156. }
  157. }
  158. }
  159. }
  160. done:
  161. return all_started;
  162. }
  163. static int cluster_applications_are_starting_sgs(struct amf_cluster *cluster)
  164. {
  165. struct amf_application *application = 0;
  166. int is_starting_sgs = 0;
  167. for (application = cluster->application_head; application != NULL;
  168. application = application->next) {
  169. if (application->acsm_state == APP_AC_STARTING_SGS) {
  170. is_starting_sgs = 1;
  171. break;
  172. }
  173. }
  174. return is_starting_sgs;
  175. }
  176. static void acsm_cluster_enter_assigning_workload (struct amf_cluster *cluster)
  177. {
  178. log_printf(LOG_NOTICE,
  179. "Cluster: all applications started, assigning workload.");
  180. cluster->acsm_state = CLUSTER_AC_ASSIGNING_WORKLOAD;
  181. amf_cluster_assign_workload (cluster);
  182. }
  183. static void timer_function_cluster_assign_workload_tmo (void *cluster)
  184. {
  185. struct req_exec_amf_cluster_start_tmo req;
  186. ((struct amf_cluster*)cluster)->timeout_handle = 0;;
  187. ENTER ("");
  188. amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO, &req, sizeof(req));
  189. }
  190. static inline void stop_cluster_startup_timer (struct amf_cluster *cluster)
  191. {
  192. if (cluster->timeout_handle) {
  193. dprintf ("Stop cluster startup timer");
  194. poll_timer_delete (aisexec_poll_handle,
  195. cluster->timeout_handle);
  196. cluster->timeout_handle = 0;
  197. }
  198. }
  199. static void start_cluster_startup_timer (struct amf_cluster *cluster)
  200. {
  201. poll_timer_add (aisexec_poll_handle,
  202. cluster->saAmfClusterStartupTimeout,
  203. cluster,
  204. timer_function_cluster_assign_workload_tmo,
  205. &cluster->timeout_handle);
  206. }
  207. static inline void amf_cluster_enter_starting_applications (
  208. struct amf_cluster *cluster)
  209. {
  210. ENTER ("");
  211. start_cluster_startup_timer (cluster);
  212. amf_cluster->acsm_state = CLUSTER_AC_STARTING_APPLICATIONS;
  213. amf_cluster_start_applications (cluster);
  214. }
  215. static void add_assign_workload_deferred_list (struct amf_cluster *cluster,
  216. struct amf_node *node, amf_cluster_event_t event)
  217. {
  218. cluster_deferredt_t *tmp_deferred_list =
  219. calloc (1, sizeof (cluster_deferredt_t));
  220. tmp_deferred_list->defered_list.next =
  221. (amf_deferred_t*) cluster->deferred_events_head;
  222. cluster->deferred_events_head = tmp_deferred_list;
  223. }
  224. static void defer_assigning_worload_to_node (struct amf_node *node,
  225. amf_cluster_event_t event)
  226. {
  227. add_assign_workload_deferred_list(amf_cluster, node, event);
  228. }
  229. static amf_deferred_t *recall_defered_cluster_events (
  230. struct amf_cluster *cluster)
  231. {
  232. return (amf_deferred_t*) cluster->deferred_events_head;
  233. }
  234. static void acsm_cluster_enter_started (struct amf_cluster *cluster)
  235. {
  236. amf_deferred_t *deferred_events;
  237. amf_cluster->acsm_state = CLUSTER_AC_STARTED;
  238. for (deferred_events = recall_defered_cluster_events (cluster);
  239. deferred_events != NULL;
  240. deferred_events = deferred_events->next){
  241. amf_node_sync_ready (((cluster_deferredt_t*)deferred_events)->node);
  242. }
  243. }
  244. int amf_cluster_applications_started_with_no_starting_sgs (struct amf_cluster *cluster)
  245. {
  246. return !cluster_applications_are_starting_sgs (cluster);
  247. }
  248. void amf_cluster_start_tmo_event (int is_sync_masterm,
  249. struct amf_cluster *cluster)
  250. {
  251. ENTER ("acsm_state = %d", amf_cluster->acsm_state);
  252. stop_cluster_startup_timer (cluster);
  253. switch (cluster->acsm_state) {
  254. case CLUSTER_AC_STARTING_APPLICATIONS:
  255. if (cluster_applications_are_starting_sgs (cluster)) {
  256. dprintf ("Cluster startup timeout, start waiting over time");
  257. amf_cluster->acsm_state = CLUSTER_AC_WAITING_OVER_TIME;
  258. } else {
  259. dprintf ("Cluster startup timeout, assigning workload");
  260. acsm_cluster_enter_assigning_workload (cluster);
  261. }
  262. break;
  263. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  264. /* ignore cluster startup timer expiration */
  265. case CLUSTER_AC_STARTED:
  266. /* ignore cluster startup timer expiration */
  267. case CLUSTER_AC_WAITING_OVER_TIME:
  268. /* ignore cluster startup timer expiration */
  269. break;
  270. default:
  271. log_printf(LOG_LEVEL_ERROR, "Cluster timout expired in wrong cluster"
  272. " state = %d", cluster->acsm_state);
  273. assert(0);
  274. break;
  275. }
  276. }
  277. /**
  278. * Start all applications in the cluster and start
  279. * the cluster startup timeout.
  280. * @param cluster
  281. * @param app
  282. */
  283. void amf_cluster_start_applications(struct amf_cluster *cluster)
  284. {
  285. struct amf_application *app;
  286. for (app = cluster->application_head; app != NULL; app = app->next) {
  287. amf_application_start (app, NULL);
  288. }
  289. }
  290. void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node)
  291. {
  292. log_printf(LOG_NOTICE, "Cluster: starting applications.");
  293. switch (amf_cluster->acsm_state) {
  294. case CLUSTER_AC_UNINSTANTIATED:
  295. if (amf_cluster->saAmfClusterAdminState == SA_AMF_ADMIN_UNLOCKED) {
  296. amf_cluster_enter_starting_applications (cluster);
  297. }
  298. break;
  299. case CLUSTER_AC_STARTING_APPLICATIONS:
  300. amf_cluster_enter_starting_applications(cluster);
  301. break;
  302. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  303. defer_assigning_worload_to_node (node, CLUSTER_SYNC_READY_EV);
  304. log_printf (LOG_LEVEL_ERROR, "Sync ready not implemented in "
  305. "cluster state: %u\n", amf_cluster->acsm_state);
  306. assert (0);
  307. break;
  308. case CLUSTER_AC_WAITING_OVER_TIME:
  309. /* TODO: Defer the implementation of assigning
  310. * workload to those syncronized nodes to CLUSTER_AC_STARTED
  311. * state.
  312. */
  313. defer_assigning_worload_to_node (node, CLUSTER_SYNC_READY_EV);
  314. break;
  315. case CLUSTER_AC_STARTED:
  316. TRACE1 ("Node sync ready sent from cluster in "
  317. "CLUSTER_AC_STARTED state");
  318. amf_node_sync_ready (node);
  319. break;
  320. default:
  321. assert (0);
  322. break;
  323. }
  324. }
  325. void amf_cluster_init (void)
  326. {
  327. log_init ("AMF");
  328. }
  329. void amf_cluster_application_started (
  330. struct amf_cluster *cluster, struct amf_application *application)
  331. {
  332. ENTER ("application '%s' started", application->name.value);
  333. switch (cluster->acsm_state) {
  334. case CLUSTER_AC_STARTING_APPLICATIONS:
  335. if (cluster_applications_started_instantiated (cluster)) {
  336. stop_cluster_startup_timer (cluster);
  337. acsm_cluster_enter_assigning_workload (cluster);
  338. }
  339. break;
  340. case CLUSTER_AC_WAITING_OVER_TIME:
  341. if (amf_cluster_applications_started_with_no_starting_sgs (cluster)) {
  342. acsm_cluster_enter_assigning_workload (cluster);
  343. }
  344. break;
  345. default: {
  346. log_printf (LOG_ERR,"Error invalid cluster availability state %d",
  347. cluster->acsm_state);
  348. openais_exit_error(cluster->acsm_state);
  349. break;
  350. }
  351. }
  352. }
  353. struct amf_cluster *amf_cluster_new (void) {
  354. struct amf_cluster *cluster = amf_calloc (1, sizeof (struct amf_cluster));
  355. cluster->saAmfClusterStartupTimeout = -1;
  356. cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED;
  357. cluster->deferred_events_head = 0;
  358. cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED;
  359. return cluster;
  360. }
  361. int amf_cluster_applications_assigned (struct amf_cluster *cluster)
  362. {
  363. struct amf_application *app = 0;
  364. int is_all_application_assigned = 1;
  365. for (app = cluster->application_head; app != NULL; app = app->next) {
  366. if (app->acsm_state != APP_AC_WORKLOAD_ASSIGNED) {
  367. is_all_application_assigned = 0;
  368. break;
  369. }
  370. }
  371. return is_all_application_assigned;
  372. }
  373. void amf_cluster_application_workload_assigned (
  374. struct amf_cluster *cluster, struct amf_application *app)
  375. {
  376. ENTER ("");
  377. switch (cluster->acsm_state) {
  378. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  379. log_printf (LOG_NOTICE, "Cluster: application %s assigned.",
  380. app->name.value);
  381. if (amf_cluster_applications_assigned (cluster)) {
  382. acsm_cluster_enter_started (cluster);
  383. }
  384. break;
  385. default:
  386. assert(0);
  387. break;
  388. }
  389. }
  390. void *amf_cluster_serialize (struct amf_cluster *cluster, int *len)
  391. {
  392. char *buf = NULL;
  393. int offset = 0, size = 0;
  394. TRACE8 ("%s", cluster->name.value);
  395. buf = amf_serialize_SaNameT (buf, &size, &offset, &cluster->name);
  396. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  397. cluster->saAmfClusterStartupTimeout);
  398. buf = amf_serialize_SaNameT (buf, &size, &offset,
  399. &cluster->saAmfClusterClmCluster);
  400. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  401. cluster->saAmfClusterAdminState);
  402. buf = amf_serialize_SaUint32T (buf, &size, &offset, cluster->acsm_state);
  403. *len = offset;
  404. return buf;
  405. }
  406. struct amf_cluster *amf_cluster_deserialize (char *buf) {
  407. char *tmp = buf;
  408. struct amf_cluster *cluster = amf_cluster_new ();
  409. tmp = amf_deserialize_SaNameT (tmp, &cluster->name);
  410. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterStartupTimeout);
  411. tmp = amf_deserialize_SaNameT (tmp, &cluster->saAmfClusterClmCluster);
  412. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterAdminState);
  413. tmp = amf_deserialize_SaUint32T (tmp, &cluster->acsm_state);
  414. return cluster;
  415. }
  416. void amf_cluster_assign_workload (struct amf_cluster *cluster)
  417. {
  418. struct amf_application *app;
  419. ENTER ("");
  420. for (app = cluster->application_head; app != NULL; app = app->next) {
  421. amf_application_assign_workload (app, NULL);
  422. }
  423. }